| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581 |
- // Copyright (C) 2014 The Syncthing Authors.
- //
- // This Source Code Form is subject to the terms of the Mozilla Public
- // License, v. 2.0. If a copy of the MPL was not distributed with this file,
- // You can obtain one at https://mozilla.org/MPL/2.0/.
- package main
- import (
- "bufio"
- "context"
- "fmt"
- "io"
- "os"
- "os/exec"
- "os/signal"
- "path/filepath"
- "runtime"
- "strings"
- "syscall"
- "time"
- "github.com/syncthing/syncthing/lib/events"
- "github.com/syncthing/syncthing/lib/fs"
- "github.com/syncthing/syncthing/lib/locations"
- "github.com/syncthing/syncthing/lib/osutil"
- "github.com/syncthing/syncthing/lib/protocol"
- "github.com/syncthing/syncthing/lib/sync"
- "github.com/syncthing/syncthing/lib/syncthing"
- )
- var (
- stdoutFirstLines []string // The first 10 lines of stdout
- stdoutLastLines []string // The last 50 lines of stdout
- stdoutMut = sync.NewMutex()
- )
- const (
- countRestarts = 4
- loopThreshold = 60 * time.Second
- logFileAutoCloseDelay = 5 * time.Second
- logFileMaxOpenTime = time.Minute
- panicUploadMaxWait = 30 * time.Second
- panicUploadNoticeWait = 10 * time.Second
- )
- func monitorMain(runtimeOptions RuntimeOptions) {
- l.SetPrefix("[monitor] ")
- var dst io.Writer = os.Stdout
- logFile := runtimeOptions.logFile
- if logFile != "-" {
- if expanded, err := fs.ExpandTilde(logFile); err == nil {
- logFile = expanded
- }
- var fileDst io.Writer
- if runtimeOptions.logMaxSize > 0 {
- open := func(name string) (io.WriteCloser, error) {
- return newAutoclosedFile(name, logFileAutoCloseDelay, logFileMaxOpenTime), nil
- }
- fileDst = newRotatedFile(logFile, open, int64(runtimeOptions.logMaxSize), runtimeOptions.logMaxFiles)
- } else {
- fileDst = newAutoclosedFile(logFile, logFileAutoCloseDelay, logFileMaxOpenTime)
- }
- if runtime.GOOS == "windows" {
- // Translate line breaks to Windows standard
- fileDst = osutil.ReplacingWriter{
- Writer: fileDst,
- From: '\n',
- To: []byte{'\r', '\n'},
- }
- }
- // Log to both stdout and file.
- dst = io.MultiWriter(dst, fileDst)
- l.Infof(`Log output saved to file "%s"`, logFile)
- }
- args := os.Args
- var restarts [countRestarts]time.Time
- stopSign := make(chan os.Signal, 1)
- signal.Notify(stopSign, os.Interrupt, sigTerm)
- restartSign := make(chan os.Signal, 1)
- sigHup := syscall.Signal(1)
- signal.Notify(restartSign, sigHup)
- childEnv := childEnv()
- first := true
- for {
- maybeReportPanics()
- if t := time.Since(restarts[0]); t < loopThreshold {
- l.Warnf("%d restarts in %v; not retrying further", countRestarts, t)
- os.Exit(syncthing.ExitError.AsInt())
- }
- copy(restarts[0:], restarts[1:])
- restarts[len(restarts)-1] = time.Now()
- cmd := exec.Command(args[0], args[1:]...)
- cmd.Env = childEnv
- stderr, err := cmd.StderrPipe()
- if err != nil {
- panic(err)
- }
- stdout, err := cmd.StdoutPipe()
- if err != nil {
- panic(err)
- }
- l.Debugln("Starting syncthing")
- err = cmd.Start()
- if err != nil {
- l.Warnln("Error starting the main Syncthing process:", err)
- panic("Error starting the main Syncthing process")
- }
- stdoutMut.Lock()
- stdoutFirstLines = make([]string, 0, 10)
- stdoutLastLines = make([]string, 0, 50)
- stdoutMut.Unlock()
- wg := sync.NewWaitGroup()
- wg.Add(1)
- go func() {
- copyStderr(stderr, dst)
- wg.Done()
- }()
- wg.Add(1)
- go func() {
- copyStdout(stdout, dst)
- wg.Done()
- }()
- exit := make(chan error)
- go func() {
- wg.Wait()
- exit <- cmd.Wait()
- }()
- stopped := false
- select {
- case s := <-stopSign:
- l.Infof("Signal %d received; exiting", s)
- cmd.Process.Signal(sigTerm)
- err = <-exit
- stopped = true
- case s := <-restartSign:
- l.Infof("Signal %d received; restarting", s)
- cmd.Process.Signal(sigHup)
- err = <-exit
- case err = <-exit:
- }
- if err == nil {
- // Successful exit indicates an intentional shutdown
- os.Exit(syncthing.ExitSuccess.AsInt())
- }
- if exiterr, ok := err.(*exec.ExitError); ok {
- exitCode := exiterr.ExitCode()
- if stopped || runtimeOptions.noRestart {
- os.Exit(exitCode)
- }
- if exitCode == syncthing.ExitUpgrade.AsInt() {
- // Restart the monitor process to release the .old
- // binary as part of the upgrade process.
- l.Infoln("Restarting monitor...")
- if err = restartMonitor(args); err != nil {
- l.Warnln("Restart:", err)
- }
- os.Exit(exitCode)
- }
- }
- if runtimeOptions.noRestart {
- os.Exit(syncthing.ExitError.AsInt())
- }
- l.Infoln("Syncthing exited:", err)
- time.Sleep(1 * time.Second)
- if first {
- // Let the next child process know that this is not the first time
- // it's starting up.
- childEnv = append(childEnv, "STRESTART=yes")
- first = false
- }
- }
- }
- func copyStderr(stderr io.Reader, dst io.Writer) {
- br := bufio.NewReader(stderr)
- var panicFd *os.File
- defer func() {
- if panicFd != nil {
- _ = panicFd.Close()
- maybeReportPanics()
- }
- }()
- for {
- line, err := br.ReadString('\n')
- if err != nil {
- return
- }
- if panicFd == nil {
- dst.Write([]byte(line))
- if strings.Contains(line, "SIGILL") {
- l.Warnln(`
- *******************************************************************************
- * Crash due to illegal instruction detected. This is most likely due to a CPU *
- * incompatibility with the high performance hashing package. Switching to the *
- * standard hashing package instead. Please report this issue at: *
- * *
- * https://github.com/syncthing/syncthing/issues *
- * *
- * Include the details of your CPU. *
- *******************************************************************************
- `)
- os.Setenv("STHASHING", "standard")
- return
- }
- if strings.HasPrefix(line, "panic:") || strings.HasPrefix(line, "fatal error:") {
- panicFd, err = os.Create(locations.GetTimestamped(locations.PanicLog))
- if err != nil {
- l.Warnln("Create panic log:", err)
- continue
- }
- l.Warnf("Panic detected, writing to \"%s\"", panicFd.Name())
- if strings.Contains(line, "leveldb") && strings.Contains(line, "corrupt") {
- l.Warnln(`
- *********************************************************************************
- * Crash due to corrupt database. *
- * *
- * This crash usually occurs due to one of the following reasons: *
- * - Syncthing being stopped abruptly (killed/loss of power) *
- * - Bad hardware (memory/disk issues) *
- * - Software that affects disk writes (SSD caching software and simillar) *
- * *
- * Please see the following URL for instructions on how to recover: *
- * https://docs.syncthing.net/users/faq.html#my-syncthing-database-is-corrupt *
- *********************************************************************************
- `)
- } else {
- l.Warnln("Please check for existing issues with similar panic message at https://github.com/syncthing/syncthing/issues/")
- l.Warnln("If no issue with similar panic message exists, please create a new issue with the panic log attached")
- }
- stdoutMut.Lock()
- for _, line := range stdoutFirstLines {
- panicFd.WriteString(line)
- }
- panicFd.WriteString("...\n")
- for _, line := range stdoutLastLines {
- panicFd.WriteString(line)
- }
- stdoutMut.Unlock()
- }
- panicFd.WriteString("Panic at " + time.Now().Format(time.RFC3339) + "\n")
- }
- if panicFd != nil {
- panicFd.WriteString(line)
- }
- }
- }
- func copyStdout(stdout io.Reader, dst io.Writer) {
- br := bufio.NewReader(stdout)
- for {
- line, err := br.ReadString('\n')
- if err != nil {
- return
- }
- stdoutMut.Lock()
- if len(stdoutFirstLines) < cap(stdoutFirstLines) {
- stdoutFirstLines = append(stdoutFirstLines, line)
- } else {
- if l := len(stdoutLastLines); l == cap(stdoutLastLines) {
- stdoutLastLines = stdoutLastLines[:l-1]
- }
- stdoutLastLines = append(stdoutLastLines, line)
- }
- stdoutMut.Unlock()
- dst.Write([]byte(line))
- }
- }
- func restartMonitor(args []string) error {
- if runtime.GOOS != "windows" {
- // syscall.Exec is the cleanest way to restart on Unixes as it
- // replaces the current process with the new one, keeping the pid and
- // controlling terminal and so on
- return restartMonitorUnix(args)
- }
- // but it isn't supported on Windows, so there we start a normal
- // exec.Command and return.
- return restartMonitorWindows(args)
- }
- func restartMonitorUnix(args []string) error {
- if !strings.ContainsRune(args[0], os.PathSeparator) {
- // The path to the binary doesn't contain a slash, so it should be
- // found in $PATH.
- binary, err := exec.LookPath(args[0])
- if err != nil {
- return err
- }
- args[0] = binary
- }
- return syscall.Exec(args[0], args, os.Environ())
- }
- func restartMonitorWindows(args []string) error {
- cmd := exec.Command(args[0], args[1:]...)
- // Retain the standard streams
- cmd.Stderr = os.Stderr
- cmd.Stdout = os.Stdout
- cmd.Stdin = os.Stdin
- return cmd.Start()
- }
- // rotatedFile keeps a set of rotating logs. There will be the base file plus up
- // to maxFiles rotated ones, each ~ maxSize bytes large.
- type rotatedFile struct {
- name string
- create createFn
- maxSize int64 // bytes
- maxFiles int
- currentFile io.WriteCloser
- currentSize int64
- }
- // the createFn should act equivalently to os.Create
- type createFn func(name string) (io.WriteCloser, error)
- func newRotatedFile(name string, create createFn, maxSize int64, maxFiles int) *rotatedFile {
- return &rotatedFile{
- name: name,
- create: create,
- maxSize: maxSize,
- maxFiles: maxFiles,
- }
- }
- func (r *rotatedFile) Write(bs []byte) (int, error) {
- // Check if we're about to exceed the max size, and if so close this
- // file so we'll start on a new one.
- if r.currentSize+int64(len(bs)) > r.maxSize {
- r.currentFile.Close()
- r.currentFile = nil
- r.currentSize = 0
- }
- // If we have no current log, rotate old files out of the way and create
- // a new one.
- if r.currentFile == nil {
- r.rotate()
- fd, err := r.create(r.name)
- if err != nil {
- return 0, err
- }
- r.currentFile = fd
- }
- n, err := r.currentFile.Write(bs)
- r.currentSize += int64(n)
- return n, err
- }
- func (r *rotatedFile) rotate() {
- // The files are named "name", "name.0", "name.1", ...
- // "name.(r.maxFiles-1)". Increase the numbers on the
- // suffixed ones.
- for i := r.maxFiles - 1; i > 0; i-- {
- from := numberedFile(r.name, i-1)
- to := numberedFile(r.name, i)
- err := os.Rename(from, to)
- if err != nil && !os.IsNotExist(err) {
- fmt.Println("LOG: Rotating logs:", err)
- }
- }
- // Rename the base to base.0
- err := os.Rename(r.name, numberedFile(r.name, 0))
- if err != nil && !os.IsNotExist(err) {
- fmt.Println("LOG: Rotating logs:", err)
- }
- }
- // numberedFile adds the number between the file name and the extension.
- func numberedFile(name string, num int) string {
- ext := filepath.Ext(name) // contains the dot
- withoutExt := name[:len(name)-len(ext)]
- return fmt.Sprintf("%s.%d%s", withoutExt, num, ext)
- }
- // An autoclosedFile is an io.WriteCloser that opens itself for appending on
- // Write() and closes itself after an interval of no writes (closeDelay) or
- // when the file has been open for too long (maxOpenTime). A call to Write()
- // will return any error that happens on the resulting Open() call too. Errors
- // on automatic Close() calls are silently swallowed...
- type autoclosedFile struct {
- name string // path to write to
- closeDelay time.Duration // close after this long inactivity
- maxOpenTime time.Duration // or this long after opening
- fd io.WriteCloser // underlying WriteCloser
- opened time.Time // timestamp when the file was last opened
- closed chan struct{} // closed on Close(), stops the closerLoop
- closeTimer *time.Timer // fires closeDelay after a write
- mut sync.Mutex
- }
- func newAutoclosedFile(name string, closeDelay, maxOpenTime time.Duration) *autoclosedFile {
- f := &autoclosedFile{
- name: name,
- closeDelay: closeDelay,
- maxOpenTime: maxOpenTime,
- mut: sync.NewMutex(),
- closed: make(chan struct{}),
- closeTimer: time.NewTimer(time.Minute),
- }
- go f.closerLoop()
- return f
- }
- func (f *autoclosedFile) Write(bs []byte) (int, error) {
- f.mut.Lock()
- defer f.mut.Unlock()
- // Make sure the file is open for appending
- if err := f.ensureOpen(); err != nil {
- return 0, err
- }
- // If we haven't run into the maxOpenTime, postpone close for another
- // closeDelay
- if time.Since(f.opened) < f.maxOpenTime {
- f.closeTimer.Reset(f.closeDelay)
- }
- return f.fd.Write(bs)
- }
- func (f *autoclosedFile) Close() error {
- f.mut.Lock()
- defer f.mut.Unlock()
- // Stop the timer and closerLoop() routine
- f.closeTimer.Stop()
- close(f.closed)
- // Close the file, if it's open
- if f.fd != nil {
- return f.fd.Close()
- }
- return nil
- }
- // Must be called with f.mut held!
- func (f *autoclosedFile) ensureOpen() error {
- if f.fd != nil {
- // File is already open
- return nil
- }
- // We open the file for write only, and create it if it doesn't exist.
- flags := os.O_WRONLY | os.O_CREATE
- if f.opened.IsZero() {
- // This is the first time we are opening the file. We should truncate
- // it to better emulate an os.Create() call.
- flags |= os.O_TRUNC
- } else {
- // The file was already opened once, so we should append to it.
- flags |= os.O_APPEND
- }
- fd, err := os.OpenFile(f.name, flags, 0644)
- if err != nil {
- return err
- }
- f.fd = fd
- f.opened = time.Now()
- return nil
- }
- func (f *autoclosedFile) closerLoop() {
- for {
- select {
- case <-f.closeTimer.C:
- // Close the file when the timer expires.
- f.mut.Lock()
- if f.fd != nil {
- f.fd.Close() // errors, schmerrors
- f.fd = nil
- }
- f.mut.Unlock()
- case <-f.closed:
- return
- }
- }
- }
- // Returns the desired child environment, properly filtered and added to.
- func childEnv() []string {
- var env []string
- for _, str := range os.Environ() {
- if strings.HasPrefix(str, "STNORESTART=") {
- continue
- }
- if strings.HasPrefix(str, "STMONITORED=") {
- continue
- }
- env = append(env, str)
- }
- env = append(env, "STMONITORED=yes")
- return env
- }
- // maybeReportPanics tries to figure out if crash reporting is on or off,
- // and reports any panics it can find if it's enabled. We spend at most
- // panicUploadMaxWait uploading panics...
- func maybeReportPanics() {
- // Try to get a config to see if/where panics should be reported.
- cfg, err := loadOrDefaultConfig(protocol.EmptyDeviceID, events.NoopLogger)
- if err != nil {
- l.Warnln("Couldn't load config; not reporting crash")
- return
- }
- // Bail if we're not supposed to report panics.
- opts := cfg.Options()
- if !opts.CREnabled {
- return
- }
- // Set up a timeout on the whole operation.
- ctx, cancel := context.WithTimeout(context.Background(), panicUploadMaxWait)
- defer cancel()
- // Print a notice if the upload takes a long time.
- go func() {
- select {
- case <-ctx.Done():
- return
- case <-time.After(panicUploadNoticeWait):
- l.Warnln("Uploading crash reports is taking a while, please wait...")
- }
- }()
- // Report the panics.
- dir := locations.GetBaseDir(locations.ConfigBaseDir)
- uploadPanicLogs(ctx, opts.CRURL, dir)
- }
|