Преглед изворни кода

fix(protocol): avoid deadlock with concurrent connection start and close (#10140)

Jakob Borg пре 4 месеци
родитељ
комит
3bd2bff23b
2 измењених фајлова са 41 додато и 11 уклоњено
  1. 25 9
      lib/model/model.go
  2. 16 2
      lib/protocol/protocol.go

+ 25 - 9
lib/model/model.go

@@ -2384,8 +2384,14 @@ func (m *model) scheduleConnectionPromotion() {
 // be called after adding new connections, and after closing a primary
 // device connection.
 func (m *model) promoteConnections() {
+	// Slice of actions to take on connections after releasing the main
+	// mutex. We do this so that we do not perform blocking network actions
+	// inside the loop, and also to avoid a possible deadlock with calling
+	// Start() on connections that are already executing a Close() with a
+	// callback into the model...
+	var postLockActions []func()
+
 	m.mut.Lock()
-	defer m.mut.Unlock()
 
 	for deviceID, connIDs := range m.deviceConnIDs {
 		cm, passwords := m.generateClusterConfigRLocked(deviceID)
@@ -2398,11 +2404,13 @@ func (m *model) promoteConnections() {
 			// on where we get ClusterConfigs from the peer.)
 			conn := m.connections[connIDs[0]]
 			l.Debugf("Promoting connection to %s at %s", deviceID.Short(), conn)
-			if conn.Statistics().StartedAt.IsZero() {
-				conn.SetFolderPasswords(passwords)
-				conn.Start()
-			}
-			conn.ClusterConfig(cm)
+			postLockActions = append(postLockActions, func() {
+				if conn.Statistics().StartedAt.IsZero() {
+					conn.SetFolderPasswords(passwords)
+					conn.Start()
+				}
+				conn.ClusterConfig(cm)
+			})
 			m.promotedConnID[deviceID] = connIDs[0]
 		}
 
@@ -2411,12 +2419,20 @@ func (m *model) promoteConnections() {
 		for _, connID := range connIDs[1:] {
 			conn := m.connections[connID]
 			if conn.Statistics().StartedAt.IsZero() {
-				conn.SetFolderPasswords(passwords)
-				conn.Start()
-				conn.ClusterConfig(&protocol.ClusterConfig{Secondary: true})
+				postLockActions = append(postLockActions, func() {
+					conn.SetFolderPasswords(passwords)
+					conn.Start()
+					conn.ClusterConfig(&protocol.ClusterConfig{Secondary: true})
+				})
 			}
 		}
 	}
+
+	m.mut.Unlock()
+
+	for _, action := range postLockActions {
+		action()
+	}
 }
 
 func (m *model) DownloadProgress(conn protocol.Connection, p *protocol.DownloadProgress) error {

+ 16 - 2
lib/protocol/protocol.go

@@ -269,6 +269,15 @@ func newRawConnection(deviceID DeviceID, reader io.Reader, writer io.Writer, clo
 func (c *rawConnection) Start() {
 	c.startStopMut.Lock()
 	defer c.startStopMut.Unlock()
+
+	select {
+	case <-c.closed:
+		// we have already closed the connection before starting processing
+		// on it.
+		return
+	default:
+	}
+
 	c.loopWG.Add(5)
 	go func() {
 		c.readerLoop()
@@ -291,6 +300,7 @@ func (c *rawConnection) Start() {
 		c.pingReceiver()
 		c.loopWG.Done()
 	}()
+
 	c.startTime = time.Now().Truncate(time.Second)
 	close(c.started)
 }
@@ -950,9 +960,9 @@ func (c *rawConnection) Close(err error) {
 
 // internalClose is called if there is an unexpected error during normal operation.
 func (c *rawConnection) internalClose(err error) {
-	c.startStopMut.Lock()
-	defer c.startStopMut.Unlock()
 	c.closeOnce.Do(func() {
+		c.startStopMut.Lock()
+
 		l.Debugf("close connection to %s at %s due to %v", c.deviceID.Short(), c.ConnectionInfo, err)
 		if cerr := c.closer.Close(); cerr != nil {
 			l.Debugf("failed to close underlying conn %s at %s %v:", c.deviceID.Short(), c.ConnectionInfo, cerr)
@@ -974,6 +984,10 @@ func (c *rawConnection) internalClose(err error) {
 			<-c.dispatcherLoopStopped
 		}
 
+		c.startStopMut.Unlock()
+
+		// We don't want to call into the model while holding the
+		// startStopMut.
 		c.model.Closed(err)
 	})
 }