Browse Source

lib/rc: Fix hangups when Syncthing process ends unexpectedly (#5383)

Simon Frei 6 năm trước cách đây
mục cha
commit
2626143fc5
1 tập tin đã thay đổi với 51 bổ sung45 xóa
  1. 51 45
      lib/rc/rc.go

+ 51 - 45
lib/rc/rc.go

@@ -22,7 +22,6 @@ import (
 	"os/exec"
 	"path/filepath"
 	"strconv"
-	stdsync "sync"
 	"time"
 
 	"github.com/syncthing/syncthing/lib/config"
@@ -40,14 +39,14 @@ type Process struct {
 	addr string
 
 	// Set by eventLoop()
-	eventMut          sync.Mutex
-	id                protocol.DeviceID
-	folders           []string
-	startComplete     bool
-	startCompleteCond *stdsync.Cond
-	stop              bool
-	sequence          map[string]map[string]int64 // Folder ID => Device ID => Sequence
-	done              map[string]bool             // Folder ID => 100%
+	eventMut      sync.Mutex
+	id            protocol.DeviceID
+	folders       []string
+	startComplete chan struct{}
+	stopped       chan struct{}
+	stopErr       error
+	sequence      map[string]map[string]int64 // Folder ID => Device ID => Sequence
+	done          map[string]bool             // Folder ID => 100%
 
 	cmd   *exec.Cmd
 	logfd *os.File
@@ -57,12 +56,13 @@ type Process struct {
 // Example: NewProcess("127.0.0.1:8082")
 func NewProcess(addr string) *Process {
 	p := &Process{
-		addr:     addr,
-		sequence: make(map[string]map[string]int64),
-		done:     make(map[string]bool),
-		eventMut: sync.NewMutex(),
+		addr:          addr,
+		sequence:      make(map[string]map[string]int64),
+		done:          make(map[string]bool),
+		eventMut:      sync.NewMutex(),
+		startComplete: make(chan struct{}),
+		stopped:       make(chan struct{}),
 	}
-	p.startCompleteCond = stdsync.NewCond(p.eventMut)
 	return p
 }
 
@@ -108,19 +108,30 @@ func (p *Process) Start(bin string, args ...string) error {
 
 	p.cmd = cmd
 	go p.eventLoop()
+	go p.wait()
 
 	return nil
 }
 
+func (p *Process) wait() {
+	p.cmd.Wait()
+
+	if p.logfd != nil {
+		p.stopErr = p.checkForProblems(p.logfd)
+	}
+
+	close(p.stopped)
+}
+
 // AwaitStartup waits for the Syncthing process to start and perform initial
 // scans of all folders.
 func (p *Process) AwaitStartup() {
-	p.eventMut.Lock()
-	for !p.startComplete {
-		p.startCompleteCond.Wait()
+	fmt.Println("awaiting startup")
+	select {
+	case <-p.startComplete:
+	case <-p.stopped:
 	}
-	p.eventMut.Unlock()
-	return
+	fmt.Println("awaited startup")
 }
 
 // Stop stops the running Syncthing process. If the process was logging to a
@@ -128,27 +139,21 @@ func (p *Process) AwaitStartup() {
 // panics and data races. The presence of either will be signalled in the form
 // of a returned error.
 func (p *Process) Stop() (*os.ProcessState, error) {
-	p.eventMut.Lock()
-	if p.stop {
-		p.eventMut.Unlock()
-		return p.cmd.ProcessState, nil
+	select {
+	case <-p.stopped:
+		return p.cmd.ProcessState, p.stopErr
+	default:
 	}
-	p.stop = true
-	p.eventMut.Unlock()
 
 	if _, err := p.Post("/rest/system/shutdown", nil); err != nil && err != io.ErrUnexpectedEOF {
 		// Unexpected EOF is somewhat expected here, as we may exit before
 		// returning something sensible.
 		return nil, err
 	}
-	p.cmd.Wait()
 
-	var err error
-	if p.logfd != nil {
-		err = p.checkForProblems(p.logfd)
-	}
+	<-p.stopped
 
-	return p.cmd.ProcessState, err
+	return p.cmd.ProcessState, p.stopErr
 }
 
 // Get performs an HTTP GET and returns the bytes and/or an error. Any non-200
@@ -403,7 +408,11 @@ func (p *Process) checkForProblems(logfd *os.File) error {
 	raceConditionStart := []byte("WARNING: DATA RACE")
 	raceConditionSep := []byte("==================")
 	panicConditionStart := []byte("panic:")
-	panicConditionSep := []byte(p.id.String()[:5])
+	p.eventMut.Lock()
+	panicConditionSep := []byte("[") // fallback if we don't already know our ID
+	if p.id.String() != "" {
+		panicConditionSep = []byte(p.id.String()[:5])
+	}
 	sc := bufio.NewScanner(fd)
 	race := false
 	_panic := false
@@ -442,12 +451,11 @@ func (p *Process) eventLoop() {
 	notScanned := make(map[string]struct{})
 	start := time.Now()
 	for {
-		p.eventMut.Lock()
-		if p.stop {
-			p.eventMut.Unlock()
+		select {
+		case <-p.stopped:
 			return
+		default:
 		}
-		p.eventMut.Unlock()
 
 		events, err := p.Events(since)
 		if err != nil {
@@ -457,12 +465,11 @@ func (p *Process) eventLoop() {
 			}
 
 			// If we're stopping, no need to print the error.
-			p.eventMut.Lock()
-			if p.stop {
-				p.eventMut.Unlock()
+			select {
+			case <-p.stopped:
 				return
+			default:
 			}
-			p.eventMut.Unlock()
 
 			log.Println("eventLoop: events:", err)
 			continue
@@ -511,17 +518,16 @@ func (p *Process) eventLoop() {
 					panic("race, or lost startup event")
 				}
 
-				if !p.startComplete {
+				select {
+				case <-p.startComplete:
+				default:
 					data := ev.Data.(map[string]interface{})
 					to := data["to"].(string)
 					if to == "idle" {
 						folder := data["folder"].(string)
 						delete(notScanned, folder)
 						if len(notScanned) == 0 {
-							p.eventMut.Lock()
-							p.startComplete = true
-							p.startCompleteCond.Broadcast()
-							p.eventMut.Unlock()
+							close(p.startComplete)
 						}
 					}
 				}