svcutil.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. // Copyright (C) 2016 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. package svcutil
  7. import (
  8. "context"
  9. "errors"
  10. "fmt"
  11. "log/slog"
  12. "sync"
  13. "time"
  14. "github.com/syncthing/syncthing/internal/slogutil"
  15. "github.com/thejerf/suture/v4"
  16. )
  17. const ServiceTimeout = 10 * time.Second
  18. type FatalErr struct {
  19. Err error
  20. Status ExitStatus
  21. }
  22. // AsFatalErr wraps the given error creating a FatalErr. If the given error
  23. // already is of type FatalErr, it is not wrapped again.
  24. func AsFatalErr(err error, status ExitStatus) *FatalErr {
  25. var ferr *FatalErr
  26. if errors.As(err, &ferr) {
  27. return ferr
  28. }
  29. return &FatalErr{
  30. Err: err,
  31. Status: status,
  32. }
  33. }
  34. func IsFatal(err error) bool {
  35. ferr := &FatalErr{}
  36. return errors.As(err, &ferr)
  37. }
  38. func (e *FatalErr) Error() string {
  39. return e.Err.Error()
  40. }
  41. func (e *FatalErr) Unwrap() error {
  42. return e.Err
  43. }
  44. func (*FatalErr) Is(target error) bool {
  45. return target == suture.ErrTerminateSupervisorTree
  46. }
  47. // NoRestartErr wraps the given error err (which may be nil) to make sure that
  48. // `errors.Is(err, suture.ErrDoNotRestart) == true`.
  49. func NoRestartErr(err error) error {
  50. if err == nil {
  51. return suture.ErrDoNotRestart
  52. }
  53. return &noRestartErr{err}
  54. }
  55. type noRestartErr struct {
  56. err error
  57. }
  58. func (e *noRestartErr) Error() string {
  59. return e.err.Error()
  60. }
  61. func (e *noRestartErr) Unwrap() error {
  62. return e.err
  63. }
  64. func (*noRestartErr) Is(target error) bool {
  65. return target == suture.ErrDoNotRestart
  66. }
  67. type ExitStatus int
  68. const (
  69. ExitSuccess ExitStatus = 0
  70. ExitError ExitStatus = 1
  71. ExitNoUpgradeAvailable ExitStatus = 2
  72. ExitRestart ExitStatus = 3
  73. ExitUpgrade ExitStatus = 4
  74. )
  75. func (s ExitStatus) AsInt() int {
  76. return int(s)
  77. }
  78. type ServiceWithError interface {
  79. suture.Service
  80. fmt.Stringer
  81. Error() error
  82. }
  83. // AsService wraps the given function to implement suture.Service. In addition
  84. // it keeps track of the returned error and allows querying that error.
  85. func AsService(fn func(ctx context.Context) error, creator string) ServiceWithError {
  86. return &service{
  87. creator: creator,
  88. serve: fn,
  89. }
  90. }
  91. type service struct {
  92. creator string
  93. serve func(ctx context.Context) error
  94. err error
  95. mut sync.Mutex
  96. }
  97. func (s *service) Serve(ctx context.Context) error {
  98. s.mut.Lock()
  99. s.err = nil
  100. s.mut.Unlock()
  101. // The error returned by serve() may well be a network timeout, which as
  102. // of Go 1.19 is a context.DeadlineExceeded, which Suture interprets as
  103. // a signal to stop the service instead of restarting it. This typically
  104. // isn't what we want, so we make sure to remove the context specific
  105. // error types unless *our* context is actually cancelled.
  106. err := asNonContextError(ctx, s.serve(ctx))
  107. s.mut.Lock()
  108. s.err = err
  109. s.mut.Unlock()
  110. return err
  111. }
  112. func (s *service) Error() error {
  113. s.mut.Lock()
  114. defer s.mut.Unlock()
  115. return s.err
  116. }
  117. func (s *service) String() string {
  118. return fmt.Sprintf("Service@%p created by %v", s, s.creator)
  119. }
  120. type doneService func()
  121. func (fn doneService) Serve(ctx context.Context) error {
  122. <-ctx.Done()
  123. fn()
  124. return nil
  125. }
  126. // OnSupervisorDone calls fn when sup is done.
  127. func OnSupervisorDone(sup *suture.Supervisor, fn func()) {
  128. sup.Add(doneService(fn))
  129. }
  130. func SpecWithDebugLogger() suture.Spec {
  131. return spec(func(e suture.Event) { slog.Debug(e.String()) })
  132. }
  133. func SpecWithInfoLogger() suture.Spec {
  134. return spec(infoEventHook())
  135. }
  136. func spec(eventHook suture.EventHook) suture.Spec {
  137. return suture.Spec{
  138. EventHook: eventHook,
  139. Timeout: ServiceTimeout,
  140. PassThroughPanics: true,
  141. DontPropagateTermination: false,
  142. }
  143. }
  144. // infoEventHook prints service failures and failures to stop services at level
  145. // info. All other events and identical, consecutive failures are logged at
  146. // debug only.
  147. func infoEventHook() suture.EventHook {
  148. var prevTerminate suture.EventServiceTerminate
  149. return func(ei suture.Event) {
  150. m := ei.Map()
  151. l := slog.Default().With("supervisor", m["supervisor_name"], "service", m["service_name"])
  152. switch e := ei.(type) {
  153. case suture.EventStopTimeout:
  154. l.Warn("Service failed to terminate in a timely manner")
  155. case suture.EventServicePanic:
  156. l.Error("Caught a service panic, which shouldn't happen")
  157. l.Warn(e.String()) //nolint:sloglint
  158. case suture.EventServiceTerminate:
  159. if e.ServiceName == prevTerminate.ServiceName && e.Err == prevTerminate.Err {
  160. l.Debug("Service failed repeatedly", slogutil.Error(e.Err))
  161. } else {
  162. l.Warn("Service failed", slogutil.Error(e.Err))
  163. }
  164. prevTerminate = e
  165. l.Debug(e.String()) // Contains some backoff statistics
  166. case suture.EventBackoff:
  167. l.Debug("Exiting the backoff state")
  168. case suture.EventResume:
  169. l.Debug("Too many service failures - entering the backoff state")
  170. default:
  171. l.Warn("Unknown suture supervisor event", slog.Any("type", e.Type()))
  172. l.Warn(e.String()) //nolint:sloglint
  173. }
  174. }
  175. }
  176. // asNonContextError returns err, except if it is context.Canceled or
  177. // context.DeadlineExceeded in which case the error will be a simple string
  178. // representation instead. The given context is checked for cancellation,
  179. // and if it is cancelled then that error is returned instead of err.
  180. func asNonContextError(ctx context.Context, err error) error {
  181. select {
  182. case <-ctx.Done():
  183. return ctx.Err()
  184. default:
  185. }
  186. if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
  187. return fmt.Errorf("%s (non-context)", err.Error())
  188. }
  189. return err
  190. }
  191. func CallWithContext(ctx context.Context, fn func() error) error {
  192. var err error
  193. done := make(chan struct{})
  194. go func() {
  195. err = fn()
  196. close(done)
  197. }()
  198. select {
  199. case <-done:
  200. return err
  201. case <-ctx.Done():
  202. return ctx.Err()
  203. }
  204. }