Bladeren bron

Use a monitor process to handle panics and restarts (fixes #586)

Jakob Borg 11 jaren geleden
bovenliggende
commit
10f0713257
4 gewijzigde bestanden met toevoegingen van 189 en 91 verwijderingen
  1. 2 1
      cmd/syncthing/heapprof.go
  2. 42 89
      cmd/syncthing/main.go
  3. 144 0
      cmd/syncthing/monitor.go
  4. 1 1
      cmd/syncthing/perfstats_unix.go

+ 2 - 1
cmd/syncthing/heapprof.go

@@ -14,7 +14,8 @@ import (
 )
 
 func init() {
-	if os.Getenv("STHEAPPROFILE") != "" {
+	if innerProcess && os.Getenv("STHEAPPROFILE") != "" {
+		l.Debugln("Starting heap profiling")
 		go saveHeapProfiles()
 	}
 }

+ 42 - 89
cmd/syncthing/main.go

@@ -16,7 +16,6 @@ import (
 	"net/http"
 	_ "net/http/pprof"
 	"os"
-	"os/exec"
 	"path/filepath"
 	"regexp"
 	"runtime"
@@ -52,7 +51,15 @@ var (
 	GoArchExtra string // "", "v5", "v6", "v7"
 )
 
+const (
+	exitSuccess            = 0
+	exitError              = 1
+	exitNoUpgradeAvailable = 2
+	exitRestarting         = 3
+)
+
 var l = logger.DefaultLogger
+var innerProcess = os.Getenv("STNORESTART") != ""
 
 func init() {
 	if Version != "unknown-dev" {
@@ -80,10 +87,8 @@ var (
 	confDir      string
 	logFlags     int = log.Ltime
 	rateBucket   *ratelimit.Bucket
-	stop         = make(chan bool)
+	stop         = make(chan int)
 	discoverer   *discover.Discoverer
-	lockConn     *net.TCPListener
-	lockPort     int
 	externalPort int
 	cert         tls.Certificate
 )
@@ -152,16 +157,20 @@ func init() {
 	rand.Seed(time.Now().UnixNano())
 }
 
+// Command line options
+var (
+	reset             bool
+	showVersion       bool
+	doUpgrade         bool
+	doUpgradeCheck    bool
+	noBrowser         bool
+	generateDir       string
+	guiAddress        string
+	guiAuthentication string
+	guiAPIKey         string
+)
+
 func main() {
-	var reset bool
-	var showVersion bool
-	var doUpgrade bool
-	var doUpgradeCheck bool
-	var noBrowser bool
-	var generateDir string
-	var guiAddress string
-	var guiAuthentication string
-	var guiAPIKey string
 	flag.StringVar(&confDir, "home", getDefaultConfDir(), "Set configuration directory")
 	flag.BoolVar(&reset, "reset", false, "Prepare to resync from cluster")
 	flag.BoolVar(&showVersion, "version", false, "Show version")
@@ -216,7 +225,7 @@ func main() {
 
 		if upgrade.CompareVersions(rel.Tag, Version) <= 0 {
 			l.Infof("No upgrade available (current %q >= latest %q).", Version, rel.Tag)
-			os.Exit(2)
+			os.Exit(exitNoUpgradeAvailable)
 		}
 
 		l.Infof("Upgrade available (current %q < latest %q)", Version, rel.Tag)
@@ -233,12 +242,21 @@ func main() {
 		}
 	}
 
-	var err error
-	lockPort, err = getLockPort()
-	if err != nil {
-		l.Fatalln("Opening lock port:", err)
+	if reset {
+		resetRepositories()
+		return
 	}
 
+	if os.Getenv("STNORESTART") != "" {
+		syncthingMain()
+	} else {
+		monitorMain()
+	}
+}
+
+func syncthingMain() {
+	var err error
+
 	if len(os.Getenv("GOGC")) == 0 {
 		debug.SetGCPercent(25)
 	}
@@ -251,7 +269,7 @@ func main() {
 
 	events.Default.Log(events.Starting, map[string]string{"home": confDir})
 
-	if _, err := os.Stat(confDir); err != nil && confDir == getDefaultConfDir() {
+	if _, err = os.Stat(confDir); err != nil && confDir == getDefaultConfDir() {
 		// We are supposed to use the default configuration directory. It
 		// doesn't exist. In the past our default has been ~/.syncthing, so if
 		// that directory exists we move it to the new default location and
@@ -346,15 +364,6 @@ func main() {
 		l.Infof("Edit %s to taste or use the GUI\n", cfgFile)
 	}
 
-	if reset {
-		resetRepositories()
-		return
-	}
-
-	if len(os.Getenv("STRESTART")) > 0 {
-		waitForParentExit()
-	}
-
 	if profiler := os.Getenv("STPROFILER"); len(profiler) > 0 {
 		go func() {
 			l.Debugln("Starting profiler on", profiler)
@@ -585,9 +594,10 @@ nextRepo:
 	events.Default.Log(events.StartupComplete, nil)
 	go generateEvents()
 
-	<-stop
+	code := <-stop
 
 	l.Okln("Exiting")
+	os.Exit(code)
 }
 
 func generateEvents() {
@@ -597,25 +607,6 @@ func generateEvents() {
 	}
 }
 
-func waitForParentExit() {
-	l.Infoln("Waiting for parent to exit...")
-	lockPortStr := os.Getenv("STRESTART")
-	lockPort, err := strconv.Atoi(lockPortStr)
-	if err != nil {
-		l.Warnln("Invalid lock port %q: %v", lockPortStr, err)
-	}
-	// Wait for the listen address to become free, indicating that the parent has exited.
-	for {
-		ln, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", lockPort))
-		if err == nil {
-			ln.Close()
-			break
-		}
-		time.Sleep(250 * time.Millisecond)
-	}
-	l.Infoln("Continuing")
-}
-
 func setupUPnP() {
 	if len(cfg.Options.ListenAddress) == 1 {
 		_, portStr, err := net.SplitHostPort(cfg.Options.ListenAddress[0])
@@ -742,40 +733,12 @@ func archiveLegacyConfig() {
 
 func restart() {
 	l.Infoln("Restarting")
-	if os.Getenv("SMF_FMRI") != "" || os.Getenv("STNORESTART") != "" {
-		// Solaris SMF
-		l.Infoln("Service manager detected; exit instead of restart")
-		stop <- true
-		return
-	}
-
-	env := os.Environ()
-	newEnv := make([]string, 0, len(env))
-	for _, s := range env {
-		if !strings.HasPrefix(s, "STRESTART=") {
-			newEnv = append(newEnv, s)
-		}
-	}
-	newEnv = append(newEnv, fmt.Sprintf("STRESTART=%d", lockPort))
-
-	pgm, err := exec.LookPath(os.Args[0])
-	if err != nil {
-		l.Warnln("Cannot restart:", err)
-		return
-	}
-	proc, err := os.StartProcess(pgm, os.Args, &os.ProcAttr{
-		Env:   newEnv,
-		Files: []*os.File{os.Stdin, os.Stdout, os.Stderr},
-	})
-	if err != nil {
-		l.Fatalln(err)
-	}
-	proc.Release()
-	stop <- true
+	stop <- exitRestarting
 }
 
 func shutdown() {
-	stop <- true
+	l.Infoln("Shutting down")
+	stop <- exitSuccess
 }
 
 var saveConfigCh = make(chan struct{})
@@ -1129,16 +1092,6 @@ func getFreePort(host string, ports ...int) (int, error) {
 	return addr.Port, nil
 }
 
-func getLockPort() (int, error) {
-	var err error
-	lockConn, err = net.ListenTCP("tcp", &net.TCPAddr{IP: net.IP{127, 0, 0, 1}})
-	if err != nil {
-		return 0, err
-	}
-	addr := lockConn.Addr().(*net.TCPAddr)
-	return addr.Port, nil
-}
-
 func overrideGUIConfig(originalCfg config.GUIConfiguration, address, authentication, apikey string) config.GUIConfiguration {
 	// Make a copy of the config
 	cfg := originalCfg

+ 144 - 0
cmd/syncthing/monitor.go

@@ -0,0 +1,144 @@
+package main
+
+import (
+	"bufio"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+)
+
+var (
+	stdoutFirstLines []string // The first 10 lines of stdout
+	stdoutLastLines  []string // The last 50 lines of stdout
+	stdoutMut        sync.Mutex
+)
+
+const (
+	countRestarts = 5
+	loopThreshold = 15 * time.Second
+)
+
+func monitorMain() {
+	os.Setenv("STNORESTART", "yes")
+	l.SetPrefix("[monitor] ")
+
+	args := os.Args
+	var restarts [countRestarts]time.Time
+
+	for {
+		if t := time.Since(restarts[0]); t < loopThreshold {
+			l.Warnf("%d restarts in %v; not retrying further", countRestarts, t)
+			os.Exit(exitError)
+		}
+
+		copy(restarts[0:], restarts[1:])
+		restarts[len(restarts)-1] = time.Now()
+
+		cmd := exec.Command(args[0], args[1:]...)
+
+		stderr, err := cmd.StderrPipe()
+		if err != nil {
+			l.Fatalln(err)
+		}
+
+		stdout, err := cmd.StdoutPipe()
+		if err != nil {
+			l.Fatalln(err)
+		}
+
+		l.Infoln("Starting syncthing")
+		err = cmd.Start()
+		if err != nil {
+			l.Fatalln(err)
+		}
+
+		stdoutMut.Lock()
+		stdoutFirstLines = make([]string, 0, 10)
+		stdoutLastLines = make([]string, 0, 50)
+		stdoutMut.Unlock()
+
+		go copyStderr(stderr)
+		go copyStdout(stdout)
+
+		err = cmd.Wait()
+		if err == nil {
+			// Successfull exit indicates an intentional shutdown
+			return
+		}
+
+		l.Infoln("Syncthing exited:", err)
+		time.Sleep(1 * time.Second)
+	}
+}
+
+func copyStderr(stderr io.ReadCloser) {
+	br := bufio.NewReader(stderr)
+
+	var panicFd *os.File
+	for {
+		line, err := br.ReadString('\n')
+		if err != nil {
+			if err != io.EOF {
+				l.Warnln("stderr:", err)
+			}
+			return
+		}
+
+		if panicFd == nil {
+			os.Stderr.WriteString(line)
+
+			if strings.HasPrefix(line, "panic:") || strings.HasPrefix(line, "fatal error:") {
+				panicFd, err = os.Create(filepath.Join(confDir, time.Now().Format("panic-20060102-150405.log")))
+				if err != nil {
+					l.Warnln("Create panic log:", err)
+					continue
+				}
+
+				l.Warnf("Panic detected, writing to \"%s\"", panicFd.Name())
+				l.Warnln("Please create an issue at https://github.com/syncting/syncthing/issues/ with the panic log attached")
+
+				stdoutMut.Lock()
+				for _, line := range stdoutFirstLines {
+					panicFd.WriteString(line)
+				}
+				panicFd.WriteString("...\n")
+				for _, line := range stdoutLastLines {
+					panicFd.WriteString(line)
+				}
+			}
+		}
+
+		if panicFd != nil {
+			panicFd.WriteString(line)
+		}
+	}
+}
+
+func copyStdout(stderr io.ReadCloser) {
+	br := bufio.NewReader(stderr)
+	for {
+		line, err := br.ReadString('\n')
+		if err != nil {
+			if err != io.EOF {
+				l.Warnln("stdout:", err)
+			}
+			return
+		}
+
+		stdoutMut.Lock()
+		if len(stdoutFirstLines) < cap(stdoutFirstLines) {
+			stdoutFirstLines = append(stdoutFirstLines, line)
+		}
+		if l := len(stdoutLastLines); l == cap(stdoutLastLines) {
+			stdoutLastLines = stdoutLastLines[:l-1]
+		}
+		stdoutLastLines = append(stdoutLastLines, line)
+		stdoutMut.Unlock()
+
+		os.Stdout.WriteString(line)
+	}
+}

+ 1 - 1
cmd/syncthing/perfstats_unix.go

@@ -15,7 +15,7 @@ import (
 )
 
 func init() {
-	if os.Getenv("STPERFSTATS") != "" {
+	if innerProcess && os.Getenv("STPERFSTATS") != "" {
 		go savePerfStats(fmt.Sprintf("perfstats-%d.csv", syscall.Getpid()))
 	}
 }