Browse Source

Handle relay connect timeout (fixes #8749) (#8755)

This makes sure the service manager doesn't interpret timeout errors, or any other error, as a signal to stop the service instead of restarting it.

I added it directly to our service utility function, as it may help catch other instances of the same problem... We would typically want timeouts etc to be a retryable error, unless it is the top level context that has timed out and we check for that specifically.
Jakob Borg 2 years ago
parent
commit
abdac2caa2
1 changed files with 22 additions and 2 deletions
  1. 22 2
      lib/svcutil/svcutil.go

+ 22 - 2
lib/svcutil/svcutil.go

@@ -122,7 +122,12 @@ func (s *service) Serve(ctx context.Context) error {
 	s.err = nil
 	s.mut.Unlock()
 
-	err := s.serve(ctx)
+	// The error returned by serve() may well be a network timeout, which as
+	// of Go 1.19 is a context.DeadlineExceeded, which Suture interprets as
+	// a signal to stop the service instead of restarting it. This typically
+	// isn't what we want, so we make sure to remove the context specific
+	// error types unless *our* context is actually cancelled.
+	err := asNonContextError(ctx, s.serve(ctx))
 
 	s.mut.Lock()
 	s.err = err
@@ -139,7 +144,6 @@ func (s *service) Error() error {
 
 func (s *service) String() string {
 	return fmt.Sprintf("Service@%p created by %v", s, s.creator)
-
 }
 
 type doneService func()
@@ -203,3 +207,19 @@ func infoEventHook(l logger.Logger) suture.EventHook {
 		}
 	}
 }
+
+// asNonContextError returns err, except if it is context.Canceled or
+// context.DeadlineExceeded in which case the error will be a simple string
+// representation instead. The given context is checked for cancellation,
+// and if it is cancelled then that error is returned instead of err.
+func asNonContextError(ctx context.Context, err error) error {
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	default:
+	}
+	if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
+		return fmt.Errorf("%s (non-context)", err.Error())
+	}
+	return err
+}