failurereporting.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. // Copyright (C) 2020 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. package ur
  7. import (
  8. "bytes"
  9. "context"
  10. "encoding/json"
  11. "net/http"
  12. "time"
  13. "github.com/syncthing/syncthing/lib/build"
  14. "github.com/syncthing/syncthing/lib/config"
  15. "github.com/syncthing/syncthing/lib/dialer"
  16. "github.com/syncthing/syncthing/lib/events"
  17. "github.com/syncthing/syncthing/lib/svcutil"
  18. "github.com/thejerf/suture/v4"
  19. )
  20. var (
  21. // When a specific failure first occurs, it is delayed by minDelay. If
  22. // more of the same failures occurs those are further delayed and
  23. // aggregated for maxDelay.
  24. minDelay = 10 * time.Second
  25. maxDelay = time.Minute
  26. sendTimeout = time.Minute
  27. finalSendTimeout = svcutil.ServiceTimeout / 2
  28. evChanClosed = "failure event channel closed"
  29. invalidEventDataType = "failure event data is not a string"
  30. )
  31. type FailureReport struct {
  32. Description string
  33. Count int
  34. Version string
  35. }
  36. type FailureHandler interface {
  37. suture.Service
  38. config.Committer
  39. }
  40. func NewFailureHandler(cfg config.Wrapper, evLogger events.Logger) FailureHandler {
  41. return &failureHandler{
  42. cfg: cfg,
  43. evLogger: evLogger,
  44. optsChan: make(chan config.OptionsConfiguration),
  45. buf: make(map[string]*failureStat),
  46. }
  47. }
  48. type failureHandler struct {
  49. cfg config.Wrapper
  50. evLogger events.Logger
  51. optsChan chan config.OptionsConfiguration
  52. buf map[string]*failureStat
  53. }
  54. type failureStat struct {
  55. first, last time.Time
  56. count int
  57. }
  58. func (h *failureHandler) Serve(ctx context.Context) error {
  59. cfg := h.cfg.Subscribe(h)
  60. defer h.cfg.Unsubscribe(h)
  61. url, sub, evChan := h.applyOpts(cfg.Options, nil)
  62. var err error
  63. timer := time.NewTimer(minDelay)
  64. resetTimer := make(chan struct{})
  65. for err == nil {
  66. select {
  67. case opts := <-h.optsChan:
  68. url, sub, evChan = h.applyOpts(opts, sub)
  69. case e, ok := <-evChan:
  70. if !ok {
  71. // Just to be safe - shouldn't ever happen, as
  72. // evChan is set to nil when unsubscribing.
  73. h.addReport(evChanClosed, time.Now())
  74. evChan = nil
  75. continue
  76. }
  77. descr, ok := e.Data.(string)
  78. if !ok {
  79. // Same here, shouldn't ever happen.
  80. h.addReport(invalidEventDataType, time.Now())
  81. continue
  82. }
  83. h.addReport(descr, e.Time)
  84. case <-timer.C:
  85. reports := make([]FailureReport, 0, len(h.buf))
  86. now := time.Now()
  87. for descr, stat := range h.buf {
  88. if now.Sub(stat.last) > minDelay || now.Sub(stat.first) > maxDelay {
  89. reports = append(reports, newFailureReport(descr, stat.count))
  90. delete(h.buf, descr)
  91. }
  92. }
  93. if len(reports) > 0 {
  94. // Lets keep process events/configs while it might be timing out for a while
  95. go func() {
  96. sendFailureReports(ctx, reports, url)
  97. select {
  98. case resetTimer <- struct{}{}:
  99. case <-ctx.Done():
  100. }
  101. }()
  102. } else {
  103. timer.Reset(minDelay)
  104. }
  105. case <-resetTimer:
  106. timer.Reset(minDelay)
  107. case <-ctx.Done():
  108. err = ctx.Err()
  109. }
  110. }
  111. if sub != nil {
  112. sub.Unsubscribe()
  113. reports := make([]FailureReport, 0, len(h.buf))
  114. for descr, stat := range h.buf {
  115. reports = append(reports, newFailureReport(descr, stat.count))
  116. }
  117. timeout, cancel := context.WithTimeout(context.Background(), finalSendTimeout)
  118. defer cancel()
  119. sendFailureReports(timeout, reports, url)
  120. }
  121. return err
  122. }
  123. func (h *failureHandler) applyOpts(opts config.OptionsConfiguration, sub events.Subscription) (string, events.Subscription, <-chan events.Event) {
  124. // Sub nil checks just for safety - config updates can be racy.
  125. url := opts.CRURL + "/failure"
  126. if opts.URAccepted > 0 {
  127. if sub == nil {
  128. sub = h.evLogger.Subscribe(events.Failure)
  129. }
  130. return url, sub, sub.C()
  131. }
  132. if sub != nil {
  133. sub.Unsubscribe()
  134. }
  135. return url, nil, nil
  136. }
  137. func (h *failureHandler) addReport(descr string, evTime time.Time) {
  138. if stat, ok := h.buf[descr]; ok {
  139. stat.last = evTime
  140. stat.count++
  141. return
  142. }
  143. h.buf[descr] = &failureStat{
  144. first: evTime,
  145. last: evTime,
  146. count: 1,
  147. }
  148. }
  149. func (h *failureHandler) VerifyConfiguration(_, _ config.Configuration) error {
  150. return nil
  151. }
  152. func (h *failureHandler) CommitConfiguration(from, to config.Configuration) bool {
  153. if from.Options.CREnabled != to.Options.CREnabled || from.Options.CRURL != to.Options.CRURL {
  154. h.optsChan <- to.Options
  155. }
  156. return true
  157. }
  158. func (h *failureHandler) String() string {
  159. return "FailureHandler"
  160. }
  161. func sendFailureReports(ctx context.Context, reports []FailureReport, url string) {
  162. var b bytes.Buffer
  163. if err := json.NewEncoder(&b).Encode(reports); err != nil {
  164. panic(err)
  165. }
  166. client := &http.Client{
  167. Transport: &http.Transport{
  168. DialContext: dialer.DialContext,
  169. Proxy: http.ProxyFromEnvironment,
  170. },
  171. }
  172. reqCtx, reqCancel := context.WithTimeout(ctx, sendTimeout)
  173. defer reqCancel()
  174. req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, url, &b)
  175. if err != nil {
  176. l.Infoln("Failed to send failure report:", err)
  177. return
  178. }
  179. req.Header.Set("Content-Type", "application/json")
  180. resp, err := client.Do(req)
  181. if err != nil {
  182. l.Infoln("Failed to send failure report:", err)
  183. return
  184. }
  185. resp.Body.Close()
  186. return
  187. }
  188. func newFailureReport(descr string, count int) FailureReport {
  189. return FailureReport{
  190. Description: descr,
  191. Count: count,
  192. Version: build.LongVersion,
  193. }
  194. }