1
0
Эх сурвалжийг харах

Update suture for data race bug

Jakob Borg 10 жил өмнө
parent
commit
3b81d4b8a5

+ 4 - 1
Godeps/Godeps.json

@@ -43,10 +43,12 @@
 		},
 		{
 			"ImportPath": "github.com/syncthing/relaysrv/client",
+			"Comment": "v0.12.0",
 			"Rev": "6e126fb97e2ff566d35f8d8824e86793d22b2147"
 		},
 		{
 			"ImportPath": "github.com/syncthing/relaysrv/protocol",
+			"Comment": "v0.12.0",
 			"Rev": "6e126fb97e2ff566d35f8d8824e86793d22b2147"
 		},
 		{
@@ -55,7 +57,8 @@
 		},
 		{
 			"ImportPath": "github.com/thejerf/suture",
-			"Rev": "860b44045335c64a6d54ac7eed22a3aedfc687c9"
+			"Comment": "v1.0.1",
+			"Rev": "99c1f2d613756768fc4299acd9dc621e11ed3fd7"
 		},
 		{
 			"ImportPath": "github.com/vitrun/qart/coding",

+ 11 - 0
Godeps/_workspace/src/github.com/thejerf/suture/README.md

@@ -37,3 +37,14 @@ easily fit into the OTP paradigm. It ought to someday be considered a good
 idea to distribute libraries that provide some sort of supervisor tree
 functionality out of the box. It is possible to provide this functionality
 without explicitly depending on the Suture library.
+
+Changelog
+---------
+
+suture uses semantic versioning.
+
+1. 1.0.0
+  * Initial release.
+2. 1.0.1
+  * Fixed data race on the .state variable.
+

+ 23 - 4
Godeps/_workspace/src/github.com/thejerf/suture/suture.go

@@ -59,6 +59,7 @@ import (
 	"log"
 	"math"
 	"runtime"
+	"sync"
 	"sync/atomic"
 	"time"
 )
@@ -123,7 +124,6 @@ type Supervisor struct {
 	lastFail         time.Time
 	failures         float64
 	restartQueue     []serviceID
-	state            uint8
 	serviceCounter   serviceID
 	control          chan supervisorMessage
 	resumeTimer      <-chan time.Time
@@ -143,6 +143,9 @@ type Supervisor struct {
 	// a minimal chunk.
 	getNow    func() time.Time
 	getResume func(time.Duration) <-chan time.Time
+
+	sync.Mutex
+	state uint8
 }
 
 // Spec is used to pass arguments to the New function to create a
@@ -373,6 +376,7 @@ func (s *Supervisor) Add(service Service) ServiceToken {
 		supervisor.logBackoff = s.logBackoff
 	}
 
+	s.Lock()
 	if s.state == notRunning {
 		id := s.serviceCounter
 		s.serviceCounter++
@@ -380,8 +384,10 @@ func (s *Supervisor) Add(service Service) ServiceToken {
 		s.services[id] = service
 		s.restartQueue = append(s.restartQueue, id)
 
+		s.Unlock()
 		return ServiceToken{uint64(s.id)<<32 | uint64(id)}
 	}
+	s.Unlock()
 
 	response := make(chan serviceID)
 	s.control <- addService{service, response}
@@ -408,16 +414,19 @@ func (s *Supervisor) Serve() {
 	}
 
 	defer func() {
+		s.Lock()
 		s.state = notRunning
+		s.Unlock()
 	}()
 
+	s.Lock()
 	if s.state != notRunning {
-		// FIXME: Don't explain why I don't need a semaphore, just use one
-		// This doesn't use a semaphore because it's just a sanity check.
+		s.Unlock()
 		panic("Running a supervisor while it is already running?")
 	}
 
 	s.state = normal
+	s.Unlock()
 
 	// for all the services I currently know about, start them
 	for _, id := range s.restartQueue {
@@ -472,7 +481,9 @@ func (s *Supervisor) Serve() {
 			// excessive thrashing
 			// FIXME: Ought to permit some spacing of these functions, rather
 			// than simply hammering through them
+			s.Lock()
 			s.state = normal
+			s.Unlock()
 			s.failures = 0
 			s.logBackoff(s, false)
 			for _, id := range s.restartQueue {
@@ -499,7 +510,9 @@ func (s *Supervisor) handleFailedService(id serviceID, err interface{}, stacktra
 	}
 
 	if s.failures > s.failureThreshold {
+		s.Lock()
 		s.state = paused
+		s.Unlock()
 		s.logBackoff(s, true)
 		s.resumeTimer = s.getResume(s.failureBackoff)
 	}
@@ -511,7 +524,13 @@ func (s *Supervisor) handleFailedService(id serviceID, err interface{}, stacktra
 	// It is possible for a service to be no longer monitored
 	// by the time we get here. In that case, just ignore it.
 	if monitored {
-		if s.state == normal {
+		// this may look dangerous because the state could change, but this
+		// code is only ever run in the one goroutine that is permitted to
+		// change the state, so nothing else will.
+		s.Lock()
+		curState := s.state
+		s.Unlock()
+		if curState == normal {
 			s.runService(failedService, id)
 			s.logFailure(s, failedService, s.failures, s.failureThreshold, true, err, stacktrace)
 		} else {

+ 1 - 1
Godeps/_workspace/src/github.com/thejerf/suture/suture_simple_test.go

@@ -17,7 +17,7 @@ func (i *Incrementor) Serve() {
 	for {
 		select {
 		case i.next <- i.current:
-			i.current += 1
+			i.current++
 		case <-i.stop:
 			// We sync here just to guarantee the output of "Stopping the service",
 			// so this passes the test reliably.

+ 20 - 4
Godeps/_workspace/src/github.com/thejerf/suture/suture_test.go

@@ -478,6 +478,22 @@ func TestNilSupervisorAdd(t *testing.T) {
 	s.Add(s)
 }
 
+// https://github.com/thejerf/suture/issues/11
+//
+// The purpose of this test is to verify that it does not cause data races,
+// so there are no obvious assertions.
+func TestIssue11(t *testing.T) {
+	t.Parallel()
+
+	s := NewSimple("main")
+	s.ServeBackground()
+
+	subsuper := NewSimple("sub")
+	s.Add(subsuper)
+
+	subsuper.Add(NewService("may cause data race"))
+}
+
 // http://golangtutorials.blogspot.com/2011/10/gotest-unit-testing-and-benchmarking-go.html
 // claims test function are run in the same order as the source file...
 // I'm not sure if this is part of the contract, though. Especially in the
@@ -509,7 +525,7 @@ func (s *FailableService) Serve() {
 		everMultistarted = true
 		panic("Multi-started the same service! " + s.name)
 	}
-	s.existing += 1
+	s.existing++
 
 	s.started <- true
 
@@ -522,13 +538,13 @@ func (s *FailableService) Serve() {
 			case Happy:
 				// Do nothing on purpose. Life is good!
 			case Fail:
-				s.existing -= 1
+				s.existing--
 				if useStopChan {
 					s.stop <- true
 				}
 				return
 			case Panic:
-				s.existing -= 1
+				s.existing--
 				panic("Panic!")
 			case Hang:
 				// or more specifically, "hang until I release you"
@@ -537,7 +553,7 @@ func (s *FailableService) Serve() {
 				useStopChan = true
 			}
 		case <-s.shutdown:
-			s.existing -= 1
+			s.existing--
 			if useStopChan {
 				s.stop <- true
 			}