service.go 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440
  1. // Copyright (C) 2015 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. //go:generate go tool counterfeiter -o mocks/service.go --fake-name Service . Service
  7. package connections
  8. import (
  9. "context"
  10. "crypto/rand"
  11. "crypto/tls"
  12. "crypto/x509"
  13. "encoding/base32"
  14. "encoding/binary"
  15. "errors"
  16. "fmt"
  17. "io"
  18. "log/slog"
  19. "math"
  20. "net"
  21. "net/url"
  22. "slices"
  23. "strings"
  24. "sync"
  25. "time"
  26. "github.com/thejerf/suture/v4"
  27. "github.com/syncthing/syncthing/internal/slogutil"
  28. "github.com/syncthing/syncthing/lib/build"
  29. "github.com/syncthing/syncthing/lib/config"
  30. "github.com/syncthing/syncthing/lib/connections/registry"
  31. "github.com/syncthing/syncthing/lib/discover"
  32. "github.com/syncthing/syncthing/lib/events"
  33. "github.com/syncthing/syncthing/lib/nat"
  34. "github.com/syncthing/syncthing/lib/osutil"
  35. "github.com/syncthing/syncthing/lib/protocol"
  36. "github.com/syncthing/syncthing/lib/semaphore"
  37. "github.com/syncthing/syncthing/lib/sliceutil"
  38. "github.com/syncthing/syncthing/lib/stringutil"
  39. "github.com/syncthing/syncthing/lib/svcutil"
  40. // Registers NAT service providers
  41. _ "github.com/syncthing/syncthing/lib/pmp"
  42. _ "github.com/syncthing/syncthing/lib/upnp"
  43. )
  44. var (
  45. dialers = make(map[string]dialerFactory)
  46. listeners = make(map[string]listenerFactory)
  47. )
  48. var (
  49. // Dialers and listeners return errUnsupported (or a wrapped variant)
  50. // when they are intentionally out of service due to configuration,
  51. // build, etc. This is not logged loudly.
  52. errUnsupported = errors.New("unsupported protocol")
  53. // These are specific explanations for errUnsupported.
  54. errDisabled = fmt.Errorf("%w: disabled by configuration", errUnsupported)
  55. errDeprecated = fmt.Errorf("%w: deprecated", errUnsupported)
  56. // Various reasons to reject a connection
  57. errNetworkNotAllowed = errors.New("network not allowed")
  58. errDeviceAlreadyConnected = errors.New("already connected to this device")
  59. errDeviceIgnored = errors.New("device is ignored")
  60. errConnLimitReached = errors.New("connection limit reached")
  61. errDevicePaused = errors.New("device is paused")
  62. // A connection is being closed to make space for better ones
  63. errReplacingConnection = errors.New("replacing connection")
  64. )
  65. const (
  66. perDeviceWarningIntv = 15 * time.Minute
  67. tlsHandshakeTimeout = 10 * time.Second
  68. minConnectionLoopSleep = 5 * time.Second
  69. stdConnectionLoopSleep = time.Minute
  70. worstDialerPriority = math.MaxInt32
  71. recentlySeenCutoff = 7 * 24 * time.Hour
  72. shortLivedConnectionThreshold = 5 * time.Second
  73. dialMaxParallel = 64
  74. dialMaxParallelPerDevice = 8
  75. maxNumConnections = 128 // the maximum number of connections we maintain to any given device
  76. )
  77. // From go/src/crypto/tls/cipher_suites.go
  78. var tlsCipherSuiteNames = map[uint16]string{
  79. // TLS 1.2
  80. 0x0005: "TLS_RSA_WITH_RC4_128_SHA",
  81. 0x000a: "TLS_RSA_WITH_3DES_EDE_CBC_SHA",
  82. 0x002f: "TLS_RSA_WITH_AES_128_CBC_SHA",
  83. 0x0035: "TLS_RSA_WITH_AES_256_CBC_SHA",
  84. 0x003c: "TLS_RSA_WITH_AES_128_CBC_SHA256",
  85. 0x009c: "TLS_RSA_WITH_AES_128_GCM_SHA256",
  86. 0x009d: "TLS_RSA_WITH_AES_256_GCM_SHA384",
  87. 0xc007: "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",
  88. 0xc009: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
  89. 0xc00a: "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
  90. 0xc011: "TLS_ECDHE_RSA_WITH_RC4_128_SHA",
  91. 0xc012: "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",
  92. 0xc013: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
  93. 0xc014: "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",
  94. 0xc023: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
  95. 0xc027: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
  96. 0xc02f: "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
  97. 0xc02b: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
  98. 0xc030: "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
  99. 0xc02c: "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
  100. 0xcca8: "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305",
  101. 0xcca9: "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305",
  102. // TLS 1.3
  103. 0x1301: "TLS_AES_128_GCM_SHA256",
  104. 0x1302: "TLS_AES_256_GCM_SHA384",
  105. 0x1303: "TLS_CHACHA20_POLY1305_SHA256",
  106. }
  107. var tlsVersionNames = map[uint16]string{
  108. tls.VersionTLS12: "TLS1.2",
  109. tls.VersionTLS13: "TLS1.3",
  110. }
  111. // Service listens and dials all configured unconnected devices, via supported
  112. // dialers. Successful connections are handed to the model.
  113. type Service interface {
  114. suture.Service
  115. discover.AddressLister
  116. ListenerStatus() map[string]ListenerStatusEntry
  117. ConnectionStatus() map[string]ConnectionStatusEntry
  118. NATType() string
  119. }
  120. type ListenerStatusEntry struct {
  121. Error *string `json:"error"`
  122. LANAddresses []string `json:"lanAddresses"`
  123. WANAddresses []string `json:"wanAddresses"`
  124. }
  125. type ConnectionStatusEntry struct {
  126. When time.Time `json:"when"`
  127. Error *string `json:"error"`
  128. }
  129. type connWithHello struct {
  130. c internalConn
  131. hello protocol.Hello
  132. err error
  133. remoteID protocol.DeviceID
  134. remoteCert *x509.Certificate
  135. }
  136. type service struct {
  137. *suture.Supervisor
  138. connectionStatusHandler
  139. deviceConnectionTracker
  140. cfg config.Wrapper
  141. myID protocol.DeviceID
  142. model Model
  143. tlsCfg *tls.Config
  144. discoverer discover.Finder
  145. conns chan internalConn
  146. hellos chan *connWithHello
  147. bepProtocolName string
  148. tlsDefaultCommonName string
  149. limiter *limiter
  150. natService *nat.Service
  151. evLogger events.Logger
  152. registry *registry.Registry
  153. keyGen *protocol.KeyGenerator
  154. lanChecker *lanChecker
  155. dialNow chan struct{}
  156. dialNowDevices map[protocol.DeviceID]struct{}
  157. dialNowDevicesMut sync.Mutex
  158. listenersMut sync.RWMutex
  159. listeners map[string]genericListener
  160. listenerTokens map[string]suture.ServiceToken
  161. }
  162. func NewService(cfg config.Wrapper, myID protocol.DeviceID, mdl Model, tlsCfg *tls.Config, discoverer discover.Finder, bepProtocolName string, tlsDefaultCommonName string, evLogger events.Logger, registry *registry.Registry, keyGen *protocol.KeyGenerator) Service {
  163. spec := svcutil.SpecWithInfoLogger()
  164. service := &service{
  165. Supervisor: suture.New("connections.Service", spec),
  166. connectionStatusHandler: newConnectionStatusHandler(),
  167. cfg: cfg,
  168. myID: myID,
  169. model: mdl,
  170. tlsCfg: tlsCfg,
  171. discoverer: discoverer,
  172. conns: make(chan internalConn),
  173. hellos: make(chan *connWithHello),
  174. bepProtocolName: bepProtocolName,
  175. tlsDefaultCommonName: tlsDefaultCommonName,
  176. limiter: newLimiter(myID, cfg),
  177. natService: nat.NewService(myID, cfg),
  178. evLogger: evLogger,
  179. registry: registry,
  180. keyGen: keyGen,
  181. lanChecker: &lanChecker{cfg},
  182. dialNow: make(chan struct{}, 1),
  183. dialNowDevices: make(map[protocol.DeviceID]struct{}),
  184. listeners: make(map[string]genericListener),
  185. listenerTokens: make(map[string]suture.ServiceToken),
  186. }
  187. cfg.Subscribe(service)
  188. raw := cfg.RawCopy()
  189. // Actually starts the listeners and NAT service
  190. // Need to start this before service.connect so that any dials that
  191. // try punch through already have a listener to cling on.
  192. service.CommitConfiguration(raw, raw)
  193. // There are several moving parts here; one routine per listening address
  194. // (handled in configuration changing) to handle incoming connections,
  195. // one routine to periodically attempt outgoing connections, one routine to
  196. // the common handling regardless of whether the connection was
  197. // incoming or outgoing.
  198. service.Add(svcutil.AsService(service.connect, fmt.Sprintf("%s/connect", service)))
  199. service.Add(svcutil.AsService(service.handleConns, fmt.Sprintf("%s/handleConns", service)))
  200. service.Add(svcutil.AsService(service.handleHellos, fmt.Sprintf("%s/handleHellos", service)))
  201. service.Add(service.natService)
  202. svcutil.OnSupervisorDone(service.Supervisor, func() {
  203. service.cfg.Unsubscribe(service.limiter)
  204. service.cfg.Unsubscribe(service)
  205. })
  206. return service
  207. }
  208. func (s *service) handleConns(ctx context.Context) error {
  209. for {
  210. var c internalConn
  211. select {
  212. case <-ctx.Done():
  213. return ctx.Err()
  214. case c = <-s.conns:
  215. }
  216. cs := c.ConnectionState()
  217. // We should have negotiated the next level protocol "bep/1.0" as part
  218. // of the TLS handshake. Unfortunately this can't be a hard error,
  219. // because there are implementations out there that don't support
  220. // protocol negotiation (iOS for one...).
  221. if cs.NegotiatedProtocol != s.bepProtocolName {
  222. slog.WarnContext(ctx, "Peer at did not negotiate bep/1.0", slogutil.Address(c.RemoteAddr()))
  223. }
  224. // We should have received exactly one certificate from the other
  225. // side. If we didn't, they don't have a device ID and we drop the
  226. // connection.
  227. certs := cs.PeerCertificates
  228. if cl := len(certs); cl != 1 {
  229. slog.WarnContext(ctx, "Got peer certificate list of incorrect length", slog.Int("length", cl), slogutil.Address(c.RemoteAddr()))
  230. c.Close()
  231. continue
  232. }
  233. remoteCert := certs[0]
  234. remoteID := protocol.NewDeviceID(remoteCert.Raw)
  235. // The device ID should not be that of ourselves. It can happen
  236. // though, especially in the presence of NAT hairpinning, multiple
  237. // clients between the same NAT gateway, and global discovery.
  238. if remoteID == s.myID {
  239. slog.DebugContext(ctx, "Connected to myself", "id", remoteID, "addr", c)
  240. c.Close()
  241. continue
  242. }
  243. if err := s.connectionCheckEarly(remoteID, c); err != nil {
  244. slog.DebugContext(ctx, "Connection rejected", remoteID.LogAttr(), slogutil.Address(c.RemoteAddr()), slog.String("type", c.Type()), slogutil.Error(err))
  245. c.Close()
  246. continue
  247. }
  248. _ = c.SetDeadline(time.Now().Add(20 * time.Second))
  249. go func() {
  250. // Exchange Hello messages with the peer.
  251. outgoing := s.helloForDevice(remoteID)
  252. incoming, err := protocol.ExchangeHello(c, outgoing)
  253. // The timestamps are used to create the connection ID.
  254. c.connectionID = newConnectionID(outgoing.Timestamp, incoming.Timestamp)
  255. select {
  256. case s.hellos <- &connWithHello{c, incoming, err, remoteID, remoteCert}:
  257. case <-ctx.Done():
  258. }
  259. }()
  260. }
  261. }
  262. func (s *service) helloForDevice(remoteID protocol.DeviceID) protocol.Hello {
  263. hello := protocol.Hello{
  264. ClientName: "syncthing",
  265. ClientVersion: build.Version,
  266. Timestamp: time.Now().UnixNano(),
  267. }
  268. if cfg, ok := s.cfg.Device(remoteID); ok {
  269. hello.NumConnections = cfg.NumConnections()
  270. // Set our name (from the config of our device ID) only if we
  271. // already know about the other side device ID.
  272. if myCfg, ok := s.cfg.Device(s.myID); ok {
  273. hello.DeviceName = myCfg.Name
  274. }
  275. }
  276. return hello
  277. }
  278. func (s *service) connectionCheckEarly(remoteID protocol.DeviceID, c internalConn) error {
  279. if s.cfg.IgnoredDevice(remoteID) {
  280. return errDeviceIgnored
  281. }
  282. if max := s.cfg.Options().ConnectionLimitMax; max > 0 && s.numConnectedDevices() >= max {
  283. // We're not allowed to accept any more connections.
  284. return errConnLimitReached
  285. }
  286. cfg, ok := s.cfg.Device(remoteID)
  287. if !ok {
  288. // We do go ahead exchanging hello messages to get information about the device.
  289. return nil
  290. }
  291. if cfg.Paused {
  292. return errDevicePaused
  293. }
  294. if len(cfg.AllowedNetworks) > 0 && !IsAllowedNetwork(c.RemoteAddr().String(), cfg.AllowedNetworks) {
  295. // The connection is not from an allowed network.
  296. return errNetworkNotAllowed
  297. }
  298. currentConns := s.numConnectionsForDevice(cfg.DeviceID)
  299. desiredConns := s.desiredConnectionsToDevice(cfg.DeviceID)
  300. worstPrio := s.worstConnectionPriority(remoteID)
  301. ourUpgradeThreshold := c.priority + s.cfg.Options().ConnectionPriorityUpgradeThreshold
  302. if currentConns >= desiredConns && ourUpgradeThreshold >= worstPrio {
  303. l.Debugf("Not accepting connection to %s at %s: already have %d connections, desire %d", remoteID, c, currentConns, desiredConns)
  304. return errDeviceAlreadyConnected
  305. }
  306. return nil
  307. }
  308. func (s *service) handleHellos(ctx context.Context) error {
  309. for {
  310. var c internalConn
  311. var hello protocol.Hello
  312. var err error
  313. var remoteID protocol.DeviceID
  314. var remoteCert *x509.Certificate
  315. select {
  316. case <-ctx.Done():
  317. return ctx.Err()
  318. case withHello := <-s.hellos:
  319. c = withHello.c
  320. hello = withHello.hello
  321. err = withHello.err
  322. remoteID = withHello.remoteID
  323. remoteCert = withHello.remoteCert
  324. }
  325. if err != nil {
  326. if protocol.IsVersionMismatch(err) {
  327. slog.WarnContext(ctx, "Remote device is too old", remoteID.LogAttr(), slogutil.Address(c.RemoteAddr()), slogutil.Error(err))
  328. } else {
  329. // It's something else - connection reset or whatever
  330. slog.WarnContext(ctx, "Failed to exchange Hello messages", remoteID.LogAttr(), slogutil.Address(c.RemoteAddr()), slogutil.Error(err))
  331. }
  332. c.Close()
  333. continue
  334. }
  335. _ = c.SetDeadline(time.Time{})
  336. // The Model will return an error for devices that we don't want to
  337. // have a connection with for whatever reason, for example unknown devices.
  338. if err := s.model.OnHello(remoteID, c.RemoteAddr(), hello); err != nil {
  339. slog.WarnContext(ctx, "Connection rejected", remoteID.LogAttr(), slogutil.Address(c.RemoteAddr()), slog.Any("type", c.Type()), slogutil.Error(err))
  340. c.Close()
  341. continue
  342. }
  343. deviceCfg, ok := s.cfg.Device(remoteID)
  344. if !ok {
  345. slog.WarnContext(ctx, "Device removed from config during connection attempt", remoteID.LogAttr(), slogutil.Address(c.RemoteAddr()))
  346. c.Close()
  347. continue
  348. }
  349. // Verify the name on the certificate. By default we set it to
  350. // "syncthing" when generating, but the user may have replaced
  351. // the certificate and used another name.
  352. certName := deviceCfg.CertName
  353. if certName == "" {
  354. certName = s.tlsDefaultCommonName
  355. }
  356. if remoteCert.Subject.CommonName == certName {
  357. // All good. We do this check because our old style certificates
  358. // have "syncthing" in the CommonName field and no SANs, which
  359. // is not accepted by VerifyHostname() any more as of Go 1.15.
  360. } else if err := remoteCert.VerifyHostname(certName); err != nil {
  361. // Incorrect certificate name is something the user most
  362. // likely wants to know about, since it's an advanced
  363. // config. Warn instead of Info.
  364. slog.ErrorContext(ctx, "Bad certificate from remote", remoteID.LogAttr(), slogutil.Address(c.RemoteAddr()), slogutil.Error(err))
  365. c.Close()
  366. continue
  367. }
  368. // Wrap the connection in rate limiters. The limiter itself will
  369. // keep up with config changes to the rate and whether or not LAN
  370. // connections are limited.
  371. rd, wr := s.limiter.getLimiters(remoteID, c, c.IsLocal())
  372. protoConn := protocol.NewConnection(remoteID, rd, wr, c, s.model, c, deviceCfg.Compression.ToProtocol(), s.keyGen)
  373. s.accountAddedConnection(protoConn, hello, s.cfg.Options().ConnectionPriorityUpgradeThreshold)
  374. go func() {
  375. <-protoConn.Closed()
  376. s.accountRemovedConnection(protoConn)
  377. s.dialNowDevicesMut.Lock()
  378. s.dialNowDevices[remoteID] = struct{}{}
  379. s.scheduleDialNow()
  380. s.dialNowDevicesMut.Unlock()
  381. }()
  382. slog.InfoContext(ctx, "Established secure connection", remoteID.LogAttr(), slog.Any("connection", c))
  383. s.model.AddConnection(protoConn, hello)
  384. continue
  385. }
  386. }
  387. func (s *service) connect(ctx context.Context) error {
  388. // Map of when to earliest dial each given device + address again
  389. nextDialAt := make(nextDialRegistry)
  390. // Used as delay for the first few connection attempts (adjusted up to
  391. // minConnectionLoopSleep), increased exponentially until it reaches
  392. // stdConnectionLoopSleep, at which time the normal sleep mechanism
  393. // kicks in.
  394. initialRampup := time.Second
  395. for {
  396. cfg := s.cfg.RawCopy()
  397. bestDialerPriority := s.bestDialerPriority(cfg)
  398. isInitialRampup := initialRampup < stdConnectionLoopSleep
  399. slog.DebugContext(ctx, "Connection loop")
  400. if isInitialRampup {
  401. slog.DebugContext(ctx, "Connection loop in initial rampup")
  402. }
  403. // Used for consistency throughout this loop run, as time passes
  404. // while we try connections etc.
  405. now := time.Now()
  406. // Attempt to dial all devices that are unconnected or can be connection-upgraded
  407. s.dialDevices(ctx, now, cfg, bestDialerPriority, nextDialAt, isInitialRampup)
  408. var sleep time.Duration
  409. if isInitialRampup {
  410. // We are in the initial rampup time, so we slowly, statically
  411. // increase the sleep time.
  412. sleep = initialRampup
  413. initialRampup *= 2
  414. } else {
  415. // The sleep time is until the next dial scheduled in nextDialAt,
  416. // clamped by stdConnectionLoopSleep as we don't want to sleep too
  417. // long (config changes might happen).
  418. sleep = nextDialAt.sleepDurationAndCleanup(now)
  419. }
  420. // ... while making sure not to loop too quickly either.
  421. if sleep < minConnectionLoopSleep {
  422. sleep = minConnectionLoopSleep
  423. }
  424. l.Debugln("Next connection loop in", sleep)
  425. timeout := time.NewTimer(sleep)
  426. select {
  427. case <-s.dialNow:
  428. // Remove affected devices from nextDialAt to dial immediately,
  429. // regardless of when we last dialed it (there's cool down in the
  430. // registry for too many repeat dials).
  431. s.dialNowDevicesMut.Lock()
  432. for device := range s.dialNowDevices {
  433. nextDialAt.redialDevice(device, now)
  434. }
  435. s.dialNowDevices = make(map[protocol.DeviceID]struct{})
  436. s.dialNowDevicesMut.Unlock()
  437. timeout.Stop()
  438. case <-timeout.C:
  439. case <-ctx.Done():
  440. return ctx.Err()
  441. }
  442. }
  443. }
  444. func (s *service) bestDialerPriority(cfg config.Configuration) int {
  445. bestDialerPriority := worstDialerPriority
  446. for _, df := range dialers {
  447. if df.Valid(cfg) != nil {
  448. continue
  449. }
  450. prio := df.New(cfg.Options, s.tlsCfg, s.registry, s.lanChecker).Priority("127.0.0.1")
  451. if prio < bestDialerPriority {
  452. bestDialerPriority = prio
  453. }
  454. }
  455. return bestDialerPriority
  456. }
  457. func (s *service) dialDevices(ctx context.Context, now time.Time, cfg config.Configuration, bestDialerPriority int, nextDialAt nextDialRegistry, initial bool) {
  458. // Figure out current connection limits up front to see if there's any
  459. // point in resolving devices and such at all.
  460. allowAdditional := 0 // no limit
  461. connectionLimit := cfg.Options.LowestConnectionLimit()
  462. if connectionLimit > 0 {
  463. current := s.numConnectedDevices()
  464. allowAdditional = connectionLimit - current
  465. if allowAdditional <= 0 {
  466. l.Debugf("Skipping dial because we've reached the connection limit, current %d >= limit %d", current, connectionLimit)
  467. return
  468. }
  469. }
  470. // Get device statistics for the last seen time of each device. This
  471. // isn't critical, so ignore the potential error.
  472. stats, _ := s.model.DeviceStatistics()
  473. queue := make(dialQueue, 0, len(cfg.Devices))
  474. for _, deviceCfg := range cfg.Devices {
  475. // Don't attempt to connect to ourselves...
  476. if deviceCfg.DeviceID == s.myID {
  477. continue
  478. }
  479. // Don't attempt to connect to paused devices...
  480. if deviceCfg.Paused {
  481. continue
  482. }
  483. // See if we are already connected and, if so, what our cutoff is
  484. // for dialer priority.
  485. priorityCutoff := worstDialerPriority
  486. if currentConns := s.numConnectionsForDevice(deviceCfg.DeviceID); currentConns > 0 {
  487. // Set the priority cutoff to the current connection's priority,
  488. // so that we don't attempt any dialers with worse priority.
  489. priorityCutoff = s.worstConnectionPriority(deviceCfg.DeviceID)
  490. // Reduce the priority cutoff by the upgrade threshold, so that
  491. // we don't attempt dialers that aren't considered a worthy upgrade.
  492. priorityCutoff -= cfg.Options.ConnectionPriorityUpgradeThreshold
  493. if bestDialerPriority >= priorityCutoff && currentConns >= s.desiredConnectionsToDevice(deviceCfg.DeviceID) {
  494. // Our best dialer is not any better than what we already
  495. // have, and we already have the desired number of
  496. // connections to this device,so nothing to do here.
  497. l.Debugf("Skipping dial to %s because we already have %d connections and our best dialer is not better than %d", deviceCfg.DeviceID.Short(), currentConns, priorityCutoff)
  498. continue
  499. }
  500. }
  501. dialTargets := s.resolveDialTargets(ctx, now, cfg, deviceCfg, nextDialAt, initial, priorityCutoff)
  502. if len(dialTargets) > 0 {
  503. queue = append(queue, dialQueueEntry{
  504. id: deviceCfg.DeviceID,
  505. lastSeen: stats[deviceCfg.DeviceID].LastSeen,
  506. shortLived: stats[deviceCfg.DeviceID].LastConnectionDurationS < shortLivedConnectionThreshold.Seconds(),
  507. targets: dialTargets,
  508. })
  509. }
  510. }
  511. // Sort the queue in an order we think will be useful (most recent
  512. // first, deprioritising unstable devices, randomizing those we haven't
  513. // seen in a long while). If we don't do connection limiting the sorting
  514. // doesn't have much effect, but it may result in getting up and running
  515. // quicker if only a subset of configured devices are actually reachable
  516. // (by prioritizing those that were reachable recently).
  517. queue.Sort()
  518. // Perform dials according to the queue, stopping when we've reached the
  519. // allowed additional number of connections (if limited).
  520. numConns := 0
  521. var numConnsMut sync.Mutex
  522. dialSemaphore := semaphore.New(dialMaxParallel)
  523. dialWG := new(sync.WaitGroup)
  524. dialCtx, dialCancel := context.WithCancel(ctx)
  525. defer func() {
  526. dialWG.Wait()
  527. dialCancel()
  528. }()
  529. for i := range queue {
  530. select {
  531. case <-dialCtx.Done():
  532. return
  533. default:
  534. }
  535. dialWG.Add(1)
  536. go func(entry dialQueueEntry) {
  537. defer dialWG.Done()
  538. conn, ok := s.dialParallel(dialCtx, entry.id, entry.targets, dialSemaphore)
  539. if !ok {
  540. return
  541. }
  542. numConnsMut.Lock()
  543. if allowAdditional == 0 || numConns < allowAdditional {
  544. select {
  545. case s.conns <- conn:
  546. numConns++
  547. if allowAdditional > 0 && numConns >= allowAdditional {
  548. dialCancel()
  549. }
  550. case <-dialCtx.Done():
  551. }
  552. }
  553. numConnsMut.Unlock()
  554. }(queue[i])
  555. }
  556. }
  557. func (s *service) resolveDialTargets(ctx context.Context, now time.Time, cfg config.Configuration, deviceCfg config.DeviceConfiguration, nextDialAt nextDialRegistry, initial bool, priorityCutoff int) []dialTarget {
  558. deviceID := deviceCfg.DeviceID
  559. addrs := s.resolveDeviceAddrs(ctx, deviceCfg)
  560. l.Debugln("Resolved device", deviceID.Short(), "addresses:", addrs)
  561. dialTargets := make([]dialTarget, 0, len(addrs))
  562. for _, addr := range addrs {
  563. // Use both device and address, as you might have two devices connected
  564. // to the same relay
  565. if !initial && nextDialAt.get(deviceID, addr).After(now) {
  566. l.Debugf("Not dialing %s via %v as it's not time yet", deviceID.Short(), addr)
  567. continue
  568. }
  569. // If we fail at any step before actually getting the dialer
  570. // retry in a minute
  571. nextDialAt.set(deviceID, addr, now.Add(time.Minute))
  572. uri, err := url.Parse(addr)
  573. if err != nil {
  574. s.setConnectionStatus(addr, err)
  575. slog.WarnContext(ctx, "Failed to parse dialer address", slogutil.Address(addr), slogutil.Error(err))
  576. continue
  577. }
  578. if len(deviceCfg.AllowedNetworks) > 0 {
  579. if !IsAllowedNetwork(uri.Host, deviceCfg.AllowedNetworks) {
  580. s.setConnectionStatus(addr, errors.New("network disallowed"))
  581. l.Debugln("Network for", uri, "is disallowed")
  582. continue
  583. }
  584. }
  585. dialerFactory, err := getDialerFactory(cfg, uri)
  586. if err != nil {
  587. s.setConnectionStatus(addr, err)
  588. }
  589. if errors.Is(err, errUnsupported) {
  590. l.Debugf("Dialer for %v: %v", uri, err)
  591. continue
  592. } else if err != nil {
  593. slog.WarnContext(ctx, "Failed to get dialer", slogutil.URI(uri), slogutil.Error(err))
  594. continue
  595. }
  596. dialer := dialerFactory.New(s.cfg.Options(), s.tlsCfg, s.registry, s.lanChecker)
  597. priority := dialer.Priority(uri.Host)
  598. currentConns := s.numConnectionsForDevice(deviceCfg.DeviceID)
  599. if priority > priorityCutoff {
  600. l.Debugf("Not dialing %s at %s using %s as priority is worse than current connection (%d > %d)", deviceID.Short(), addr, dialerFactory, priority, priorityCutoff)
  601. continue
  602. }
  603. if currentConns > 0 && !dialer.AllowsMultiConns() {
  604. l.Debugf("Not dialing %s at %s using %s as it does not allow multiple connections and we already have a connection", deviceID.Short(), addr, dialerFactory)
  605. continue
  606. }
  607. if currentConns >= s.desiredConnectionsToDevice(deviceCfg.DeviceID) && priority == priorityCutoff {
  608. l.Debugf("Not dialing %s at %s using %s as priority is equal and we already have %d/%d connections", deviceID.Short(), addr, dialerFactory, currentConns, deviceCfg.NumConnections())
  609. continue
  610. }
  611. nextDialAt.set(deviceID, addr, now.Add(dialer.RedialFrequency()))
  612. dialTargets = append(dialTargets, dialTarget{
  613. addr: addr,
  614. dialer: dialer,
  615. priority: priority,
  616. deviceID: deviceID,
  617. uri: uri,
  618. })
  619. }
  620. return dialTargets
  621. }
  622. func (s *service) resolveDeviceAddrs(ctx context.Context, cfg config.DeviceConfiguration) []string {
  623. var addrs []string
  624. for _, addr := range cfg.Addresses {
  625. if addr == "dynamic" {
  626. if s.discoverer != nil {
  627. if t, err := s.discoverer.Lookup(ctx, cfg.DeviceID); err == nil {
  628. addrs = append(addrs, t...)
  629. }
  630. }
  631. } else {
  632. addrs = append(addrs, addr)
  633. }
  634. }
  635. return stringutil.UniqueTrimmedStrings(addrs)
  636. }
  637. type lanChecker struct {
  638. cfg config.Wrapper
  639. }
  640. func (s *lanChecker) isLANHost(host string) bool {
  641. // Probably we are called with an ip:port combo which we can resolve as
  642. // a TCP address.
  643. if addr, err := net.ResolveTCPAddr("tcp", host); err == nil {
  644. return s.isLAN(addr)
  645. }
  646. // ... but this function looks general enough that someone might try
  647. // with just an IP as well in the future so lets allow that.
  648. if addr, err := net.ResolveIPAddr("ip", host); err == nil {
  649. return s.isLAN(addr)
  650. }
  651. return false
  652. }
  653. func (s *lanChecker) isLAN(addr net.Addr) bool {
  654. var ip net.IP
  655. switch addr := addr.(type) {
  656. case *net.IPAddr:
  657. ip = addr.IP
  658. case *net.TCPAddr:
  659. ip = addr.IP
  660. case *net.UDPAddr:
  661. ip = addr.IP
  662. default:
  663. // From the standard library, just Unix sockets.
  664. // If you invent your own, handle it.
  665. return false
  666. }
  667. if ip.IsLoopback() {
  668. return true
  669. }
  670. if ip.IsLinkLocalUnicast() {
  671. return true
  672. }
  673. for _, lan := range s.cfg.Options().AlwaysLocalNets {
  674. _, ipnet, err := net.ParseCIDR(lan)
  675. if err != nil {
  676. l.Debugln("Network", lan, "is malformed:", err)
  677. continue
  678. }
  679. if ipnet.Contains(ip) {
  680. return true
  681. }
  682. }
  683. lans, err := osutil.GetInterfaceAddrs(false)
  684. if err != nil {
  685. l.Debugln("Failed to retrieve interface IPs:", err)
  686. priv := ip.IsPrivate()
  687. l.Debugf("Assuming isLAN=%v for IP %v", priv, ip)
  688. return priv
  689. }
  690. for _, lan := range lans {
  691. if lan.Contains(ip) {
  692. return true
  693. }
  694. }
  695. return false
  696. }
  697. func (s *service) createListener(factory listenerFactory, uri *url.URL) bool {
  698. // must be called with listenerMut held
  699. slog.Debug("Starting listener", "uri", uri)
  700. listener := factory.New(uri, s.cfg, s.tlsCfg, s.conns, s.natService, s.registry, s.lanChecker)
  701. listener.OnAddressesChanged(s.logListenAddressesChangedEvent)
  702. // Retrying a listener many times in rapid succession is unlikely to help,
  703. // thus back off quickly. A listener may soon be functional again, e.g. due
  704. // to a network interface coming back online - retry every minute.
  705. spec := svcutil.SpecWithInfoLogger()
  706. spec.FailureThreshold = 2
  707. spec.FailureBackoff = time.Minute
  708. sup := suture.New(fmt.Sprintf("listenerSupervisor@%v", listener), spec)
  709. sup.Add(listener)
  710. s.listeners[uri.String()] = listener
  711. s.listenerTokens[uri.String()] = s.Add(sup)
  712. return true
  713. }
  714. func (s *service) logListenAddressesChangedEvent(l ListenerAddresses) {
  715. s.evLogger.Log(events.ListenAddressesChanged, map[string]interface{}{
  716. "address": l.URI,
  717. "lan": l.LANAddresses,
  718. "wan": l.WANAddresses,
  719. })
  720. }
  721. func (s *service) CommitConfiguration(from, to config.Configuration) bool {
  722. newDevices := make(map[protocol.DeviceID]bool, len(to.Devices))
  723. for _, dev := range to.Devices {
  724. newDevices[dev.DeviceID] = true
  725. registerDeviceMetrics(dev.DeviceID.String())
  726. }
  727. for _, dev := range from.Devices {
  728. if !newDevices[dev.DeviceID] {
  729. metricDeviceActiveConnections.DeleteLabelValues(dev.DeviceID.String())
  730. }
  731. }
  732. s.checkAndSignalConnectLoopOnUpdatedDevices(from, to)
  733. s.listenersMut.Lock()
  734. seen := make(map[string]struct{})
  735. for _, addr := range to.Options.ListenAddresses() {
  736. if addr == "" {
  737. // We can get an empty address if there is an empty listener
  738. // element in the config, indicating no listeners should be
  739. // used. This is not an error.
  740. continue
  741. }
  742. uri, err := url.Parse(addr)
  743. if err != nil {
  744. slog.Error("Skipping malformed listener URL", slogutil.URI(addr), slogutil.Error(err))
  745. continue
  746. }
  747. // Make sure we always have the canonical representation of the URL.
  748. // This is for consistency as we use it as a map key, but also to
  749. // avoid misunderstandings. We do not just use the canonicalized
  750. // version, because an URL that looks very similar to a human might
  751. // mean something entirely different to the computer (e.g.,
  752. // tcp:/127.0.0.1:22000 in fact being equivalent to tcp://:22000).
  753. if canonical := uri.String(); canonical != addr {
  754. slog.Error("Skipping malformed listener URL (not canonical)", slogutil.URI(addr))
  755. continue
  756. }
  757. if _, ok := s.listeners[addr]; ok {
  758. seen[addr] = struct{}{}
  759. continue
  760. }
  761. factory, err := getListenerFactory(to, uri)
  762. if errors.Is(err, errUnsupported) {
  763. l.Debugf("Listener for %v: %v", uri, err)
  764. continue
  765. } else if err != nil {
  766. slog.Warn("Failed to get listener", slogutil.URI(uri), slogutil.Error(err))
  767. continue
  768. }
  769. s.createListener(factory, uri)
  770. seen[addr] = struct{}{}
  771. }
  772. for addr, listener := range s.listeners {
  773. if _, ok := seen[addr]; !ok || listener.Factory().Valid(to) != nil {
  774. l.Debugln("Stopping listener", addr)
  775. s.Remove(s.listenerTokens[addr])
  776. delete(s.listenerTokens, addr)
  777. delete(s.listeners, addr)
  778. }
  779. }
  780. s.listenersMut.Unlock()
  781. return true
  782. }
  783. func (s *service) checkAndSignalConnectLoopOnUpdatedDevices(from, to config.Configuration) {
  784. oldDevices := from.DeviceMap()
  785. dial := false
  786. s.dialNowDevicesMut.Lock()
  787. for _, dev := range to.Devices {
  788. if dev.Paused {
  789. continue
  790. }
  791. if oldDev, ok := oldDevices[dev.DeviceID]; !ok || oldDev.Paused {
  792. s.dialNowDevices[dev.DeviceID] = struct{}{}
  793. dial = true
  794. } else if !slices.Equal(oldDev.Addresses, dev.Addresses) {
  795. dial = true
  796. }
  797. }
  798. if dial {
  799. s.scheduleDialNow()
  800. }
  801. s.dialNowDevicesMut.Unlock()
  802. }
  803. func (s *service) scheduleDialNow() {
  804. select {
  805. case s.dialNow <- struct{}{}:
  806. default:
  807. // channel is blocked - a config update is already pending for the connection loop.
  808. }
  809. }
  810. func (s *service) AllAddresses() []string {
  811. s.listenersMut.RLock()
  812. var addrs []string
  813. for _, listener := range s.listeners {
  814. for _, lanAddr := range listener.LANAddresses() {
  815. addrs = append(addrs, lanAddr.String())
  816. }
  817. for _, wanAddr := range listener.WANAddresses() {
  818. addrs = append(addrs, wanAddr.String())
  819. }
  820. }
  821. s.listenersMut.RUnlock()
  822. return stringutil.UniqueTrimmedStrings(addrs)
  823. }
  824. func (s *service) ExternalAddresses() []string {
  825. if s.cfg.Options().AnnounceLANAddresses {
  826. return s.AllAddresses()
  827. }
  828. s.listenersMut.RLock()
  829. var addrs []string
  830. for _, listener := range s.listeners {
  831. for _, wanAddr := range listener.WANAddresses() {
  832. addrs = append(addrs, wanAddr.String())
  833. }
  834. }
  835. s.listenersMut.RUnlock()
  836. return stringutil.UniqueTrimmedStrings(addrs)
  837. }
  838. func (s *service) ListenerStatus() map[string]ListenerStatusEntry {
  839. result := make(map[string]ListenerStatusEntry)
  840. s.listenersMut.RLock()
  841. for addr, listener := range s.listeners {
  842. var status ListenerStatusEntry
  843. if err := listener.Error(); err != nil {
  844. errStr := err.Error()
  845. status.Error = &errStr
  846. }
  847. status.LANAddresses = urlsToStrings(listener.LANAddresses())
  848. status.WANAddresses = urlsToStrings(listener.WANAddresses())
  849. result[addr] = status
  850. }
  851. s.listenersMut.RUnlock()
  852. return result
  853. }
  854. type connectionStatusHandler struct {
  855. connectionStatusMut sync.RWMutex
  856. connectionStatus map[string]ConnectionStatusEntry // address -> latest error/status
  857. }
  858. func newConnectionStatusHandler() connectionStatusHandler {
  859. return connectionStatusHandler{
  860. connectionStatus: make(map[string]ConnectionStatusEntry),
  861. }
  862. }
  863. func (s *connectionStatusHandler) ConnectionStatus() map[string]ConnectionStatusEntry {
  864. result := make(map[string]ConnectionStatusEntry)
  865. s.connectionStatusMut.RLock()
  866. for k, v := range s.connectionStatus {
  867. result[k] = v
  868. }
  869. s.connectionStatusMut.RUnlock()
  870. return result
  871. }
  872. func (s *connectionStatusHandler) setConnectionStatus(address string, err error) {
  873. if errors.Is(err, context.Canceled) {
  874. return
  875. }
  876. status := ConnectionStatusEntry{When: time.Now().UTC().Truncate(time.Second)}
  877. if err != nil {
  878. errStr := err.Error()
  879. status.Error = &errStr
  880. }
  881. s.connectionStatusMut.Lock()
  882. s.connectionStatus[address] = status
  883. s.connectionStatusMut.Unlock()
  884. }
  885. func (s *service) NATType() string {
  886. s.listenersMut.RLock()
  887. defer s.listenersMut.RUnlock()
  888. for _, listener := range s.listeners {
  889. natType := listener.NATType()
  890. if natType != "unknown" {
  891. return natType
  892. }
  893. }
  894. return "unknown"
  895. }
  896. func getDialerFactory(cfg config.Configuration, uri *url.URL) (dialerFactory, error) {
  897. dialerFactory, ok := dialers[uri.Scheme]
  898. if !ok {
  899. return nil, fmt.Errorf("unknown address scheme %q", uri.Scheme)
  900. }
  901. if err := dialerFactory.Valid(cfg); err != nil {
  902. return nil, err
  903. }
  904. return dialerFactory, nil
  905. }
  906. func getListenerFactory(cfg config.Configuration, uri *url.URL) (listenerFactory, error) {
  907. listenerFactory, ok := listeners[uri.Scheme]
  908. if !ok {
  909. return nil, fmt.Errorf("unknown address scheme %q", uri.Scheme)
  910. }
  911. if err := listenerFactory.Valid(cfg); err != nil {
  912. return nil, err
  913. }
  914. return listenerFactory, nil
  915. }
  916. func urlsToStrings(urls []*url.URL) []string {
  917. strings := make([]string, len(urls))
  918. for i, url := range urls {
  919. strings[i] = url.String()
  920. }
  921. return strings
  922. }
  923. func tlsTimedHandshake(tc *tls.Conn) error {
  924. tc.SetDeadline(time.Now().Add(tlsHandshakeTimeout))
  925. defer tc.SetDeadline(time.Time{})
  926. return tc.Handshake()
  927. }
  928. // IsAllowedNetwork returns true if the given host (IP or resolvable
  929. // hostname) is in the set of allowed networks (CIDR format only).
  930. func IsAllowedNetwork(host string, allowed []string) bool {
  931. if hostNoPort, _, err := net.SplitHostPort(host); err == nil {
  932. host = hostNoPort
  933. }
  934. addr, err := net.ResolveIPAddr("ip", host)
  935. if err != nil {
  936. return false
  937. }
  938. for _, n := range allowed {
  939. result := true
  940. if strings.HasPrefix(n, "!") {
  941. result = false
  942. n = n[1:]
  943. }
  944. _, cidr, err := net.ParseCIDR(n)
  945. if err != nil {
  946. continue
  947. }
  948. if cidr.Contains(addr.IP) {
  949. return result
  950. }
  951. }
  952. return false
  953. }
  954. func (s *service) dialParallel(ctx context.Context, deviceID protocol.DeviceID, dialTargets []dialTarget, parentSema *semaphore.Semaphore) (internalConn, bool) {
  955. // Group targets into buckets by priority
  956. dialTargetBuckets := make(map[int][]dialTarget, len(dialTargets))
  957. for _, tgt := range dialTargets {
  958. dialTargetBuckets[tgt.priority] = append(dialTargetBuckets[tgt.priority], tgt)
  959. }
  960. // Get all available priorities
  961. priorities := make([]int, 0, len(dialTargetBuckets))
  962. for prio := range dialTargetBuckets {
  963. priorities = append(priorities, prio)
  964. }
  965. // Sort the priorities so that we dial lowest first (which means highest...)
  966. slices.Sort(priorities)
  967. sema := semaphore.MultiSemaphore{semaphore.New(dialMaxParallelPerDevice), parentSema}
  968. for _, prio := range priorities {
  969. tgts := dialTargetBuckets[prio]
  970. res := make(chan internalConn, len(tgts))
  971. wg := sync.WaitGroup{}
  972. for _, tgt := range tgts {
  973. sema.Take(1)
  974. wg.Add(1)
  975. go func(tgt dialTarget) {
  976. defer func() {
  977. wg.Done()
  978. sema.Give(1)
  979. }()
  980. conn, err := tgt.Dial(ctx)
  981. if err == nil {
  982. // Closes the connection on error
  983. err = s.validateIdentity(conn, deviceID)
  984. }
  985. s.setConnectionStatus(tgt.addr, err)
  986. if err != nil {
  987. l.Debugln("dialing", deviceID, tgt.uri, "error:", err)
  988. } else {
  989. l.Debugln("dialing", deviceID, tgt.uri, "success:", conn)
  990. res <- conn
  991. }
  992. }(tgt)
  993. }
  994. // Spawn a routine which will unblock main routine in case we fail
  995. // to connect to anyone.
  996. go func() {
  997. wg.Wait()
  998. close(res)
  999. }()
  1000. // Wait for the first connection, or for channel closure.
  1001. if conn, ok := <-res; ok {
  1002. // Got a connection, means more might come back, hence spawn a
  1003. // routine that will do the discarding.
  1004. l.Debugln("connected to", deviceID, prio, "using", conn, conn.priority)
  1005. go func(deviceID protocol.DeviceID, prio int) {
  1006. wg.Wait()
  1007. l.Debugln("discarding", len(res), "connections while connecting to", deviceID, prio)
  1008. for conn := range res {
  1009. conn.Close()
  1010. }
  1011. }(deviceID, prio)
  1012. return conn, ok
  1013. }
  1014. // Failed to connect, report that fact.
  1015. l.Debugln("failed to connect to", deviceID, prio)
  1016. }
  1017. return internalConn{}, false
  1018. }
  1019. func (s *service) validateIdentity(c internalConn, expectedID protocol.DeviceID) error {
  1020. cs := c.ConnectionState()
  1021. // We should have received exactly one certificate from the other
  1022. // side. If we didn't, they don't have a device ID and we drop the
  1023. // connection.
  1024. certs := cs.PeerCertificates
  1025. if cl := len(certs); cl != 1 {
  1026. slog.Warn("Got peer certificate list of incorrect length", slog.Int("length", cl), slogutil.Address(c.RemoteAddr()))
  1027. c.Close()
  1028. return fmt.Errorf("expected 1 certificate, got %d", cl)
  1029. }
  1030. remoteCert := certs[0]
  1031. remoteID := protocol.NewDeviceID(remoteCert.Raw)
  1032. // The device ID should not be that of ourselves. It can happen
  1033. // though, especially in the presence of NAT hairpinning, multiple
  1034. // clients between the same NAT gateway, and global discovery.
  1035. if remoteID == s.myID {
  1036. l.Debugf("Connected to myself (%s) at %s", remoteID, c)
  1037. c.Close()
  1038. return errors.New("connected to self")
  1039. }
  1040. // We should see the expected device ID
  1041. if !remoteID.Equals(expectedID) {
  1042. c.Close()
  1043. return fmt.Errorf("unexpected device id, expected %s got %s", expectedID, remoteID)
  1044. }
  1045. return nil
  1046. }
  1047. type nextDialRegistry map[protocol.DeviceID]nextDialDevice
  1048. type nextDialDevice struct {
  1049. nextDial map[string]time.Time
  1050. coolDownIntervalStart time.Time
  1051. attempts int
  1052. }
  1053. func (r nextDialRegistry) get(device protocol.DeviceID, addr string) time.Time {
  1054. return r[device].nextDial[addr]
  1055. }
  1056. const (
  1057. dialCoolDownInterval = 2 * time.Minute
  1058. dialCoolDownDelay = 5 * time.Minute
  1059. dialCoolDownMaxAttempts = 3
  1060. )
  1061. // redialDevice marks the device for immediate redial, unless the remote keeps
  1062. // dropping established connections. Thus we keep track of when the first forced
  1063. // re-dial happened, and how many attempts happen in the dialCoolDownInterval
  1064. // after that. If it's more than dialCoolDownMaxAttempts, don't force-redial
  1065. // that device for dialCoolDownDelay (regular dials still happen).
  1066. func (r nextDialRegistry) redialDevice(device protocol.DeviceID, now time.Time) {
  1067. dev, ok := r[device]
  1068. if !ok {
  1069. r[device] = nextDialDevice{
  1070. nextDial: make(map[string]time.Time),
  1071. coolDownIntervalStart: now,
  1072. attempts: 1,
  1073. }
  1074. return
  1075. }
  1076. if dev.attempts == 0 || now.Before(dev.coolDownIntervalStart.Add(dialCoolDownInterval)) {
  1077. if dev.attempts >= dialCoolDownMaxAttempts {
  1078. // Device has been force redialed too often - let it cool down.
  1079. return
  1080. }
  1081. if dev.attempts == 0 {
  1082. dev.coolDownIntervalStart = now
  1083. }
  1084. dev.attempts++
  1085. dev.nextDial = make(map[string]time.Time)
  1086. r[device] = dev
  1087. return
  1088. }
  1089. if dev.attempts >= dialCoolDownMaxAttempts && now.Before(dev.coolDownIntervalStart.Add(dialCoolDownDelay)) {
  1090. return // Still cooling down
  1091. }
  1092. delete(r, device)
  1093. }
  1094. func (r nextDialRegistry) set(device protocol.DeviceID, addr string, next time.Time) {
  1095. if _, ok := r[device]; !ok {
  1096. r[device] = nextDialDevice{nextDial: make(map[string]time.Time)}
  1097. }
  1098. r[device].nextDial[addr] = next
  1099. }
  1100. func (r nextDialRegistry) sleepDurationAndCleanup(now time.Time) time.Duration {
  1101. sleep := stdConnectionLoopSleep
  1102. for id, dev := range r {
  1103. for address, next := range dev.nextDial {
  1104. if next.Before(now) {
  1105. // Expired entry, address was not seen in last pass(es)
  1106. delete(dev.nextDial, address)
  1107. continue
  1108. }
  1109. if cur := next.Sub(now); cur < sleep {
  1110. sleep = cur
  1111. }
  1112. }
  1113. if dev.attempts > 0 {
  1114. interval := dialCoolDownInterval
  1115. if dev.attempts >= dialCoolDownMaxAttempts {
  1116. interval = dialCoolDownDelay
  1117. }
  1118. if now.After(dev.coolDownIntervalStart.Add(interval)) {
  1119. dev.attempts = 0
  1120. }
  1121. }
  1122. if len(dev.nextDial) == 0 && dev.attempts == 0 {
  1123. delete(r, id)
  1124. }
  1125. }
  1126. return sleep
  1127. }
  1128. func (s *service) desiredConnectionsToDevice(deviceID protocol.DeviceID) int {
  1129. cfg, ok := s.cfg.Device(deviceID)
  1130. if !ok {
  1131. // We want no connections to an unknown device.
  1132. return 0
  1133. }
  1134. otherSide := s.wantConnectionsForDevice(deviceID)
  1135. thisSide := cfg.NumConnections()
  1136. switch {
  1137. case otherSide <= 0:
  1138. // The other side doesn't support multiple connections, or we
  1139. // haven't yet connected to them so we don't know what they support
  1140. // or not. Use a single connection until we know better.
  1141. return 1
  1142. case otherSide == 1:
  1143. // The other side supports multiple connections, but only wants
  1144. // one. We should honour that.
  1145. return 1
  1146. case thisSide == 1:
  1147. // We want only one connection, so we should honour that.
  1148. return 1
  1149. // Finally, we allow negotiation and use the higher of the two values,
  1150. // while keeping at or below the max allowed value.
  1151. default:
  1152. return min(max(thisSide, otherSide), maxNumConnections)
  1153. }
  1154. }
  1155. // The deviceConnectionTracker keeps track of how many devices we are
  1156. // connected to and how many connections we have to each device. It also
  1157. // tracks how many connections they are willing to use.
  1158. type deviceConnectionTracker struct {
  1159. connectionsMut sync.Mutex
  1160. connections map[protocol.DeviceID][]protocol.Connection // current connections
  1161. wantConnections map[protocol.DeviceID]int // number of connections they want
  1162. }
  1163. func (c *deviceConnectionTracker) accountAddedConnection(conn protocol.Connection, h protocol.Hello, upgradeThreshold int) {
  1164. c.connectionsMut.Lock()
  1165. defer c.connectionsMut.Unlock()
  1166. // Lazily initialize the maps
  1167. if c.connections == nil {
  1168. c.connections = make(map[protocol.DeviceID][]protocol.Connection)
  1169. c.wantConnections = make(map[protocol.DeviceID]int)
  1170. }
  1171. // Add the connection to the list of current connections and remember
  1172. // how many total connections they want
  1173. d := conn.DeviceID()
  1174. c.connections[d] = append(c.connections[d], conn)
  1175. c.wantConnections[d] = int(h.NumConnections)
  1176. l.Debugf("Added connection for %s (now %d), they want %d connections", d.Short(), len(c.connections[d]), h.NumConnections)
  1177. // Update active connections metric
  1178. metricDeviceActiveConnections.WithLabelValues(d.String()).Inc()
  1179. // Close any connections we no longer want to retain.
  1180. c.closeWorsePriorityConnectionsLocked(d, conn.Priority()-upgradeThreshold)
  1181. }
  1182. func (c *deviceConnectionTracker) accountRemovedConnection(conn protocol.Connection) {
  1183. c.connectionsMut.Lock()
  1184. defer c.connectionsMut.Unlock()
  1185. d := conn.DeviceID()
  1186. cid := conn.ConnectionID()
  1187. // Remove the connection from the list of current connections
  1188. for i, conn := range c.connections[d] {
  1189. if conn.ConnectionID() == cid {
  1190. c.connections[d] = sliceutil.RemoveAndZero(c.connections[d], i)
  1191. break
  1192. }
  1193. }
  1194. // Clean up if required
  1195. if len(c.connections[d]) == 0 {
  1196. delete(c.connections, d)
  1197. delete(c.wantConnections, d)
  1198. }
  1199. // Update active connections metric
  1200. metricDeviceActiveConnections.WithLabelValues(d.String()).Dec()
  1201. l.Debugf("Removed connection for %s (now %d)", d.Short(), c.connections[d])
  1202. }
  1203. func (c *deviceConnectionTracker) numConnectionsForDevice(d protocol.DeviceID) int {
  1204. c.connectionsMut.Lock()
  1205. defer c.connectionsMut.Unlock()
  1206. return len(c.connections[d])
  1207. }
  1208. func (c *deviceConnectionTracker) wantConnectionsForDevice(d protocol.DeviceID) int {
  1209. c.connectionsMut.Lock()
  1210. defer c.connectionsMut.Unlock()
  1211. return c.wantConnections[d]
  1212. }
  1213. func (c *deviceConnectionTracker) numConnectedDevices() int {
  1214. c.connectionsMut.Lock()
  1215. defer c.connectionsMut.Unlock()
  1216. return len(c.connections)
  1217. }
  1218. func (c *deviceConnectionTracker) worstConnectionPriority(d protocol.DeviceID) int {
  1219. c.connectionsMut.Lock()
  1220. defer c.connectionsMut.Unlock()
  1221. if len(c.connections[d]) == 0 {
  1222. return math.MaxInt // worst possible priority
  1223. }
  1224. worstPriority := c.connections[d][0].Priority()
  1225. for _, conn := range c.connections[d][1:] {
  1226. if p := conn.Priority(); p > worstPriority {
  1227. worstPriority = p
  1228. }
  1229. }
  1230. return worstPriority
  1231. }
  1232. // closeWorsePriorityConnectionsLocked closes all connections to the given
  1233. // device that are worse than the cutoff priority. Must be called with the
  1234. // lock held.
  1235. func (c *deviceConnectionTracker) closeWorsePriorityConnectionsLocked(d protocol.DeviceID, cutoff int) {
  1236. for _, conn := range c.connections[d] {
  1237. if p := conn.Priority(); p > cutoff {
  1238. l.Debugf("Closing connection %s to %s with priority %d (cutoff %d)", conn, d.Short(), p, cutoff)
  1239. go conn.Close(errReplacingConnection)
  1240. }
  1241. }
  1242. }
  1243. // newConnectionID generates a connection ID. The connection ID is designed
  1244. // to be unique for each connection and chronologically sortable. It is
  1245. // based on the sum of two timestamps: when we think the connection was
  1246. // started, and when the other side thinks the connection was started. We
  1247. // then add some random data for good measure. This way, even if the other
  1248. // side does some funny business with the timestamp, we will get no worse
  1249. // than random connection IDs.
  1250. func newConnectionID(t0, t1 int64) string {
  1251. var buf [16]byte // 8 bytes timestamp, 8 bytes random
  1252. binary.BigEndian.PutUint64(buf[:], uint64(t0+t1))
  1253. _, _ = io.ReadFull(rand.Reader, buf[8:])
  1254. enc := base32.HexEncoding.WithPadding(base32.NoPadding)
  1255. // We encode the two parts separately and concatenate the results. The
  1256. // reason for this is that the timestamp (64 bits) doesn't precisely
  1257. // align to the base32 encoding (5 bits per character), so we'd get a
  1258. // character in the middle that is a mix of bits from the timestamp and
  1259. // from the random. We want the timestamp part deterministic.
  1260. return enc.EncodeToString(buf[:8]) + enc.EncodeToString(buf[8:])
  1261. }