service.go 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481
  1. // Copyright (C) 2015 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. //go:generate -command counterfeiter go run github.com/maxbrunsfeld/counterfeiter/v6
  7. //go:generate counterfeiter -o mocks/service.go --fake-name Service . Service
  8. package connections
  9. import (
  10. "context"
  11. "crypto/rand"
  12. "crypto/tls"
  13. "crypto/x509"
  14. "encoding/base32"
  15. "encoding/binary"
  16. "errors"
  17. "fmt"
  18. "io"
  19. "math"
  20. "net"
  21. "net/url"
  22. "slices"
  23. "strings"
  24. stdsync "sync"
  25. "time"
  26. "github.com/thejerf/suture/v4"
  27. "golang.org/x/time/rate"
  28. "github.com/syncthing/syncthing/lib/build"
  29. "github.com/syncthing/syncthing/lib/config"
  30. "github.com/syncthing/syncthing/lib/connections/registry"
  31. "github.com/syncthing/syncthing/lib/discover"
  32. "github.com/syncthing/syncthing/lib/events"
  33. "github.com/syncthing/syncthing/lib/nat"
  34. "github.com/syncthing/syncthing/lib/osutil"
  35. "github.com/syncthing/syncthing/lib/protocol"
  36. "github.com/syncthing/syncthing/lib/semaphore"
  37. "github.com/syncthing/syncthing/lib/sliceutil"
  38. "github.com/syncthing/syncthing/lib/stringutil"
  39. "github.com/syncthing/syncthing/lib/svcutil"
  40. "github.com/syncthing/syncthing/lib/sync"
  41. // Registers NAT service providers
  42. _ "github.com/syncthing/syncthing/lib/pmp"
  43. _ "github.com/syncthing/syncthing/lib/upnp"
  44. )
  45. var (
  46. dialers = make(map[string]dialerFactory)
  47. listeners = make(map[string]listenerFactory)
  48. )
  49. var (
  50. // Dialers and listeners return errUnsupported (or a wrapped variant)
  51. // when they are intentionally out of service due to configuration,
  52. // build, etc. This is not logged loudly.
  53. errUnsupported = errors.New("unsupported protocol")
  54. // These are specific explanations for errUnsupported.
  55. errDisabled = fmt.Errorf("%w: disabled by configuration", errUnsupported)
  56. errDeprecated = fmt.Errorf("%w: deprecated", errUnsupported)
  57. // Various reasons to reject a connection
  58. errNetworkNotAllowed = errors.New("network not allowed")
  59. errDeviceAlreadyConnected = errors.New("already connected to this device")
  60. errDeviceIgnored = errors.New("device is ignored")
  61. errConnLimitReached = errors.New("connection limit reached")
  62. errDevicePaused = errors.New("device is paused")
  63. // A connection is being closed to make space for better ones
  64. errReplacingConnection = errors.New("replacing connection")
  65. )
  66. const (
  67. perDeviceWarningIntv = 15 * time.Minute
  68. tlsHandshakeTimeout = 10 * time.Second
  69. minConnectionLoopSleep = 5 * time.Second
  70. stdConnectionLoopSleep = time.Minute
  71. worstDialerPriority = math.MaxInt32
  72. recentlySeenCutoff = 7 * 24 * time.Hour
  73. shortLivedConnectionThreshold = 5 * time.Second
  74. dialMaxParallel = 64
  75. dialMaxParallelPerDevice = 8
  76. maxNumConnections = 128 // the maximum number of connections we maintain to any given device
  77. )
  78. // From go/src/crypto/tls/cipher_suites.go
  79. var tlsCipherSuiteNames = map[uint16]string{
  80. // TLS 1.2
  81. 0x0005: "TLS_RSA_WITH_RC4_128_SHA",
  82. 0x000a: "TLS_RSA_WITH_3DES_EDE_CBC_SHA",
  83. 0x002f: "TLS_RSA_WITH_AES_128_CBC_SHA",
  84. 0x0035: "TLS_RSA_WITH_AES_256_CBC_SHA",
  85. 0x003c: "TLS_RSA_WITH_AES_128_CBC_SHA256",
  86. 0x009c: "TLS_RSA_WITH_AES_128_GCM_SHA256",
  87. 0x009d: "TLS_RSA_WITH_AES_256_GCM_SHA384",
  88. 0xc007: "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",
  89. 0xc009: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
  90. 0xc00a: "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
  91. 0xc011: "TLS_ECDHE_RSA_WITH_RC4_128_SHA",
  92. 0xc012: "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",
  93. 0xc013: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
  94. 0xc014: "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",
  95. 0xc023: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
  96. 0xc027: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
  97. 0xc02f: "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
  98. 0xc02b: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
  99. 0xc030: "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
  100. 0xc02c: "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
  101. 0xcca8: "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305",
  102. 0xcca9: "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305",
  103. // TLS 1.3
  104. 0x1301: "TLS_AES_128_GCM_SHA256",
  105. 0x1302: "TLS_AES_256_GCM_SHA384",
  106. 0x1303: "TLS_CHACHA20_POLY1305_SHA256",
  107. }
  108. var tlsVersionNames = map[uint16]string{
  109. tls.VersionTLS12: "TLS1.2",
  110. tls.VersionTLS13: "TLS1.3",
  111. }
  112. // Service listens and dials all configured unconnected devices, via supported
  113. // dialers. Successful connections are handed to the model.
  114. type Service interface {
  115. suture.Service
  116. discover.AddressLister
  117. ListenerStatus() map[string]ListenerStatusEntry
  118. ConnectionStatus() map[string]ConnectionStatusEntry
  119. NATType() string
  120. }
  121. type ListenerStatusEntry struct {
  122. Error *string `json:"error"`
  123. LANAddresses []string `json:"lanAddresses"`
  124. WANAddresses []string `json:"wanAddresses"`
  125. }
  126. type ConnectionStatusEntry struct {
  127. When time.Time `json:"when"`
  128. Error *string `json:"error"`
  129. }
  130. type connWithHello struct {
  131. c internalConn
  132. hello protocol.Hello
  133. err error
  134. remoteID protocol.DeviceID
  135. remoteCert *x509.Certificate
  136. }
  137. type service struct {
  138. *suture.Supervisor
  139. connectionStatusHandler
  140. deviceConnectionTracker
  141. cfg config.Wrapper
  142. myID protocol.DeviceID
  143. model Model
  144. tlsCfg *tls.Config
  145. discoverer discover.Finder
  146. conns chan internalConn
  147. hellos chan *connWithHello
  148. bepProtocolName string
  149. tlsDefaultCommonName string
  150. limiter *limiter
  151. natService *nat.Service
  152. evLogger events.Logger
  153. registry *registry.Registry
  154. keyGen *protocol.KeyGenerator
  155. lanChecker *lanChecker
  156. dialNow chan struct{}
  157. dialNowDevices map[protocol.DeviceID]struct{}
  158. dialNowDevicesMut sync.Mutex
  159. listenersMut sync.RWMutex
  160. listeners map[string]genericListener
  161. listenerTokens map[string]suture.ServiceToken
  162. }
  163. func NewService(cfg config.Wrapper, myID protocol.DeviceID, mdl Model, tlsCfg *tls.Config, discoverer discover.Finder, bepProtocolName string, tlsDefaultCommonName string, evLogger events.Logger, registry *registry.Registry, keyGen *protocol.KeyGenerator) Service {
  164. spec := svcutil.SpecWithInfoLogger(l)
  165. service := &service{
  166. Supervisor: suture.New("connections.Service", spec),
  167. connectionStatusHandler: newConnectionStatusHandler(),
  168. cfg: cfg,
  169. myID: myID,
  170. model: mdl,
  171. tlsCfg: tlsCfg,
  172. discoverer: discoverer,
  173. conns: make(chan internalConn),
  174. hellos: make(chan *connWithHello),
  175. bepProtocolName: bepProtocolName,
  176. tlsDefaultCommonName: tlsDefaultCommonName,
  177. limiter: newLimiter(myID, cfg),
  178. natService: nat.NewService(myID, cfg),
  179. evLogger: evLogger,
  180. registry: registry,
  181. keyGen: keyGen,
  182. lanChecker: &lanChecker{cfg},
  183. dialNowDevicesMut: sync.NewMutex(),
  184. dialNow: make(chan struct{}, 1),
  185. dialNowDevices: make(map[protocol.DeviceID]struct{}),
  186. listenersMut: sync.NewRWMutex(),
  187. listeners: make(map[string]genericListener),
  188. listenerTokens: make(map[string]suture.ServiceToken),
  189. }
  190. cfg.Subscribe(service)
  191. raw := cfg.RawCopy()
  192. // Actually starts the listeners and NAT service
  193. // Need to start this before service.connect so that any dials that
  194. // try punch through already have a listener to cling on.
  195. service.CommitConfiguration(raw, raw)
  196. // There are several moving parts here; one routine per listening address
  197. // (handled in configuration changing) to handle incoming connections,
  198. // one routine to periodically attempt outgoing connections, one routine to
  199. // the common handling regardless of whether the connection was
  200. // incoming or outgoing.
  201. service.Add(svcutil.AsService(service.connect, fmt.Sprintf("%s/connect", service)))
  202. service.Add(svcutil.AsService(service.handleConns, fmt.Sprintf("%s/handleConns", service)))
  203. service.Add(svcutil.AsService(service.handleHellos, fmt.Sprintf("%s/handleHellos", service)))
  204. service.Add(service.natService)
  205. svcutil.OnSupervisorDone(service.Supervisor, func() {
  206. service.cfg.Unsubscribe(service.limiter)
  207. service.cfg.Unsubscribe(service)
  208. })
  209. return service
  210. }
  211. func (s *service) handleConns(ctx context.Context) error {
  212. for {
  213. var c internalConn
  214. select {
  215. case <-ctx.Done():
  216. return ctx.Err()
  217. case c = <-s.conns:
  218. }
  219. cs := c.ConnectionState()
  220. // We should have negotiated the next level protocol "bep/1.0" as part
  221. // of the TLS handshake. Unfortunately this can't be a hard error,
  222. // because there are implementations out there that don't support
  223. // protocol negotiation (iOS for one...).
  224. if cs.NegotiatedProtocol != s.bepProtocolName {
  225. l.Infof("Peer at %s did not negotiate bep/1.0", c)
  226. }
  227. // We should have received exactly one certificate from the other
  228. // side. If we didn't, they don't have a device ID and we drop the
  229. // connection.
  230. certs := cs.PeerCertificates
  231. if cl := len(certs); cl != 1 {
  232. l.Infof("Got peer certificate list of length %d != 1 from peer at %s; protocol error", cl, c)
  233. c.Close()
  234. continue
  235. }
  236. remoteCert := certs[0]
  237. remoteID := protocol.NewDeviceID(remoteCert.Raw)
  238. // The device ID should not be that of ourselves. It can happen
  239. // though, especially in the presence of NAT hairpinning, multiple
  240. // clients between the same NAT gateway, and global discovery.
  241. if remoteID == s.myID {
  242. l.Debugf("Connected to myself (%s) at %s", remoteID, c)
  243. c.Close()
  244. continue
  245. }
  246. if err := s.connectionCheckEarly(remoteID, c); err != nil {
  247. if errors.Is(err, errDeviceAlreadyConnected) {
  248. l.Debugf("Connection from %s at %s (%s) rejected: %v", remoteID, c.RemoteAddr(), c.Type(), err)
  249. } else {
  250. l.Infof("Connection from %s at %s (%s) rejected: %v", remoteID, c.RemoteAddr(), c.Type(), err)
  251. }
  252. c.Close()
  253. continue
  254. }
  255. _ = c.SetDeadline(time.Now().Add(20 * time.Second))
  256. go func() {
  257. // Exchange Hello messages with the peer.
  258. outgoing := s.helloForDevice(remoteID)
  259. incoming, err := protocol.ExchangeHello(c, outgoing)
  260. // The timestamps are used to create the connection ID.
  261. c.connectionID = newConnectionID(outgoing.Timestamp, incoming.Timestamp)
  262. select {
  263. case s.hellos <- &connWithHello{c, incoming, err, remoteID, remoteCert}:
  264. case <-ctx.Done():
  265. }
  266. }()
  267. }
  268. }
  269. func (s *service) helloForDevice(remoteID protocol.DeviceID) protocol.Hello {
  270. hello := protocol.Hello{
  271. ClientName: "syncthing",
  272. ClientVersion: build.Version,
  273. Timestamp: time.Now().UnixNano(),
  274. }
  275. if cfg, ok := s.cfg.Device(remoteID); ok {
  276. hello.NumConnections = cfg.NumConnections()
  277. // Set our name (from the config of our device ID) only if we
  278. // already know about the other side device ID.
  279. if myCfg, ok := s.cfg.Device(s.myID); ok {
  280. hello.DeviceName = myCfg.Name
  281. }
  282. }
  283. return hello
  284. }
  285. func (s *service) connectionCheckEarly(remoteID protocol.DeviceID, c internalConn) error {
  286. if s.cfg.IgnoredDevice(remoteID) {
  287. return errDeviceIgnored
  288. }
  289. if max := s.cfg.Options().ConnectionLimitMax; max > 0 && s.numConnectedDevices() >= max {
  290. // We're not allowed to accept any more connections.
  291. return errConnLimitReached
  292. }
  293. cfg, ok := s.cfg.Device(remoteID)
  294. if !ok {
  295. // We do go ahead exchanging hello messages to get information about the device.
  296. return nil
  297. }
  298. if cfg.Paused {
  299. return errDevicePaused
  300. }
  301. if len(cfg.AllowedNetworks) > 0 && !IsAllowedNetwork(c.RemoteAddr().String(), cfg.AllowedNetworks) {
  302. // The connection is not from an allowed network.
  303. return errNetworkNotAllowed
  304. }
  305. currentConns := s.numConnectionsForDevice(cfg.DeviceID)
  306. desiredConns := s.desiredConnectionsToDevice(cfg.DeviceID)
  307. worstPrio := s.worstConnectionPriority(remoteID)
  308. ourUpgradeThreshold := c.priority + s.cfg.Options().ConnectionPriorityUpgradeThreshold
  309. if currentConns >= desiredConns && ourUpgradeThreshold >= worstPrio {
  310. l.Debugf("Not accepting connection to %s at %s: already have %d connections, desire %d", remoteID, c, currentConns, desiredConns)
  311. return errDeviceAlreadyConnected
  312. }
  313. return nil
  314. }
  315. func (s *service) handleHellos(ctx context.Context) error {
  316. for {
  317. var c internalConn
  318. var hello protocol.Hello
  319. var err error
  320. var remoteID protocol.DeviceID
  321. var remoteCert *x509.Certificate
  322. select {
  323. case <-ctx.Done():
  324. return ctx.Err()
  325. case withHello := <-s.hellos:
  326. c = withHello.c
  327. hello = withHello.hello
  328. err = withHello.err
  329. remoteID = withHello.remoteID
  330. remoteCert = withHello.remoteCert
  331. }
  332. if err != nil {
  333. if protocol.IsVersionMismatch(err) {
  334. // The error will be a relatively user friendly description
  335. // of what's wrong with the version compatibility. By
  336. // default identify the other side by device ID and IP.
  337. remote := fmt.Sprintf("%v (%v)", remoteID, c.RemoteAddr())
  338. if hello.DeviceName != "" {
  339. // If the name was set in the hello return, use that to
  340. // give the user more info about which device is the
  341. // affected one. It probably says more than the remote
  342. // IP.
  343. remote = fmt.Sprintf("%q (%s %s, %v)", hello.DeviceName, hello.ClientName, hello.ClientVersion, remoteID)
  344. }
  345. msg := fmt.Sprintf("Connecting to %s: %s", remote, err)
  346. warningFor(remoteID, msg)
  347. } else {
  348. // It's something else - connection reset or whatever
  349. l.Infof("Failed to exchange Hello messages with %s at %s: %s", remoteID, c, err)
  350. }
  351. c.Close()
  352. continue
  353. }
  354. _ = c.SetDeadline(time.Time{})
  355. // The Model will return an error for devices that we don't want to
  356. // have a connection with for whatever reason, for example unknown devices.
  357. if err := s.model.OnHello(remoteID, c.RemoteAddr(), hello); err != nil {
  358. l.Infof("Connection from %s at %s (%s) rejected: %v", remoteID, c.RemoteAddr(), c.Type(), err)
  359. c.Close()
  360. continue
  361. }
  362. deviceCfg, ok := s.cfg.Device(remoteID)
  363. if !ok {
  364. l.Infof("Device %s removed from config during connection attempt at %s", remoteID, c)
  365. c.Close()
  366. continue
  367. }
  368. // Verify the name on the certificate. By default we set it to
  369. // "syncthing" when generating, but the user may have replaced
  370. // the certificate and used another name.
  371. certName := deviceCfg.CertName
  372. if certName == "" {
  373. certName = s.tlsDefaultCommonName
  374. }
  375. if remoteCert.Subject.CommonName == certName {
  376. // All good. We do this check because our old style certificates
  377. // have "syncthing" in the CommonName field and no SANs, which
  378. // is not accepted by VerifyHostname() any more as of Go 1.15.
  379. } else if err := remoteCert.VerifyHostname(certName); err != nil {
  380. // Incorrect certificate name is something the user most
  381. // likely wants to know about, since it's an advanced
  382. // config. Warn instead of Info.
  383. l.Warnf("Bad certificate from %s at %s: %v", remoteID, c, err)
  384. c.Close()
  385. continue
  386. }
  387. // Wrap the connection in rate limiters. The limiter itself will
  388. // keep up with config changes to the rate and whether or not LAN
  389. // connections are limited.
  390. rd, wr := s.limiter.getLimiters(remoteID, c, c.IsLocal())
  391. protoConn := protocol.NewConnection(remoteID, rd, wr, c, s.model, c, deviceCfg.Compression.ToProtocol(), s.cfg.FolderPasswords(remoteID), s.keyGen)
  392. s.accountAddedConnection(protoConn, hello, s.cfg.Options().ConnectionPriorityUpgradeThreshold)
  393. go func() {
  394. <-protoConn.Closed()
  395. s.accountRemovedConnection(protoConn)
  396. s.dialNowDevicesMut.Lock()
  397. s.dialNowDevices[remoteID] = struct{}{}
  398. s.scheduleDialNow()
  399. s.dialNowDevicesMut.Unlock()
  400. }()
  401. l.Infof("Established secure connection to %s at %s", remoteID.Short(), c)
  402. s.model.AddConnection(protoConn, hello)
  403. continue
  404. }
  405. }
  406. func (s *service) connect(ctx context.Context) error {
  407. // Map of when to earliest dial each given device + address again
  408. nextDialAt := make(nextDialRegistry)
  409. // Used as delay for the first few connection attempts (adjusted up to
  410. // minConnectionLoopSleep), increased exponentially until it reaches
  411. // stdConnectionLoopSleep, at which time the normal sleep mechanism
  412. // kicks in.
  413. initialRampup := time.Second
  414. for {
  415. cfg := s.cfg.RawCopy()
  416. bestDialerPriority := s.bestDialerPriority(cfg)
  417. isInitialRampup := initialRampup < stdConnectionLoopSleep
  418. l.Debugln("Connection loop")
  419. if isInitialRampup {
  420. l.Debugln("Connection loop in initial rampup")
  421. }
  422. // Used for consistency throughout this loop run, as time passes
  423. // while we try connections etc.
  424. now := time.Now()
  425. // Attempt to dial all devices that are unconnected or can be connection-upgraded
  426. s.dialDevices(ctx, now, cfg, bestDialerPriority, nextDialAt, isInitialRampup)
  427. var sleep time.Duration
  428. if isInitialRampup {
  429. // We are in the initial rampup time, so we slowly, statically
  430. // increase the sleep time.
  431. sleep = initialRampup
  432. initialRampup *= 2
  433. } else {
  434. // The sleep time is until the next dial scheduled in nextDialAt,
  435. // clamped by stdConnectionLoopSleep as we don't want to sleep too
  436. // long (config changes might happen).
  437. sleep = nextDialAt.sleepDurationAndCleanup(now)
  438. }
  439. // ... while making sure not to loop too quickly either.
  440. if sleep < minConnectionLoopSleep {
  441. sleep = minConnectionLoopSleep
  442. }
  443. l.Debugln("Next connection loop in", sleep)
  444. timeout := time.NewTimer(sleep)
  445. select {
  446. case <-s.dialNow:
  447. // Remove affected devices from nextDialAt to dial immediately,
  448. // regardless of when we last dialed it (there's cool down in the
  449. // registry for too many repeat dials).
  450. s.dialNowDevicesMut.Lock()
  451. for device := range s.dialNowDevices {
  452. nextDialAt.redialDevice(device, now)
  453. }
  454. s.dialNowDevices = make(map[protocol.DeviceID]struct{})
  455. s.dialNowDevicesMut.Unlock()
  456. timeout.Stop()
  457. case <-timeout.C:
  458. case <-ctx.Done():
  459. return ctx.Err()
  460. }
  461. }
  462. }
  463. func (s *service) bestDialerPriority(cfg config.Configuration) int {
  464. bestDialerPriority := worstDialerPriority
  465. for _, df := range dialers {
  466. if df.Valid(cfg) != nil {
  467. continue
  468. }
  469. prio := df.New(cfg.Options, s.tlsCfg, s.registry, s.lanChecker).Priority("127.0.0.1")
  470. if prio < bestDialerPriority {
  471. bestDialerPriority = prio
  472. }
  473. }
  474. return bestDialerPriority
  475. }
  476. func (s *service) dialDevices(ctx context.Context, now time.Time, cfg config.Configuration, bestDialerPriority int, nextDialAt nextDialRegistry, initial bool) {
  477. // Figure out current connection limits up front to see if there's any
  478. // point in resolving devices and such at all.
  479. allowAdditional := 0 // no limit
  480. connectionLimit := cfg.Options.LowestConnectionLimit()
  481. if connectionLimit > 0 {
  482. current := s.numConnectedDevices()
  483. allowAdditional = connectionLimit - current
  484. if allowAdditional <= 0 {
  485. l.Debugf("Skipping dial because we've reached the connection limit, current %d >= limit %d", current, connectionLimit)
  486. return
  487. }
  488. }
  489. // Get device statistics for the last seen time of each device. This
  490. // isn't critical, so ignore the potential error.
  491. stats, _ := s.model.DeviceStatistics()
  492. queue := make(dialQueue, 0, len(cfg.Devices))
  493. for _, deviceCfg := range cfg.Devices {
  494. // Don't attempt to connect to ourselves...
  495. if deviceCfg.DeviceID == s.myID {
  496. continue
  497. }
  498. // Don't attempt to connect to paused devices...
  499. if deviceCfg.Paused {
  500. continue
  501. }
  502. // See if we are already connected and, if so, what our cutoff is
  503. // for dialer priority.
  504. priorityCutoff := worstDialerPriority
  505. if currentConns := s.numConnectionsForDevice(deviceCfg.DeviceID); currentConns > 0 {
  506. // Set the priority cutoff to the current connection's priority,
  507. // so that we don't attempt any dialers with worse priority.
  508. priorityCutoff = s.worstConnectionPriority(deviceCfg.DeviceID)
  509. // Reduce the priority cutoff by the upgrade threshold, so that
  510. // we don't attempt dialers that aren't considered a worthy upgrade.
  511. priorityCutoff -= cfg.Options.ConnectionPriorityUpgradeThreshold
  512. if bestDialerPriority >= priorityCutoff && currentConns >= s.desiredConnectionsToDevice(deviceCfg.DeviceID) {
  513. // Our best dialer is not any better than what we already
  514. // have, and we already have the desired number of
  515. // connections to this device,so nothing to do here.
  516. l.Debugf("Skipping dial to %s because we already have %d connections and our best dialer is not better than %d", deviceCfg.DeviceID.Short(), currentConns, priorityCutoff)
  517. continue
  518. }
  519. }
  520. dialTargets := s.resolveDialTargets(ctx, now, cfg, deviceCfg, nextDialAt, initial, priorityCutoff)
  521. if len(dialTargets) > 0 {
  522. queue = append(queue, dialQueueEntry{
  523. id: deviceCfg.DeviceID,
  524. lastSeen: stats[deviceCfg.DeviceID].LastSeen,
  525. shortLived: stats[deviceCfg.DeviceID].LastConnectionDurationS < shortLivedConnectionThreshold.Seconds(),
  526. targets: dialTargets,
  527. })
  528. }
  529. }
  530. // Sort the queue in an order we think will be useful (most recent
  531. // first, deprioritising unstable devices, randomizing those we haven't
  532. // seen in a long while). If we don't do connection limiting the sorting
  533. // doesn't have much effect, but it may result in getting up and running
  534. // quicker if only a subset of configured devices are actually reachable
  535. // (by prioritizing those that were reachable recently).
  536. queue.Sort()
  537. // Perform dials according to the queue, stopping when we've reached the
  538. // allowed additional number of connections (if limited).
  539. numConns := 0
  540. var numConnsMut stdsync.Mutex
  541. dialSemaphore := semaphore.New(dialMaxParallel)
  542. dialWG := new(stdsync.WaitGroup)
  543. dialCtx, dialCancel := context.WithCancel(ctx)
  544. defer func() {
  545. dialWG.Wait()
  546. dialCancel()
  547. }()
  548. for i := range queue {
  549. select {
  550. case <-dialCtx.Done():
  551. return
  552. default:
  553. }
  554. dialWG.Add(1)
  555. go func(entry dialQueueEntry) {
  556. defer dialWG.Done()
  557. conn, ok := s.dialParallel(dialCtx, entry.id, entry.targets, dialSemaphore)
  558. if !ok {
  559. return
  560. }
  561. numConnsMut.Lock()
  562. if allowAdditional == 0 || numConns < allowAdditional {
  563. select {
  564. case s.conns <- conn:
  565. numConns++
  566. if allowAdditional > 0 && numConns >= allowAdditional {
  567. dialCancel()
  568. }
  569. case <-dialCtx.Done():
  570. }
  571. }
  572. numConnsMut.Unlock()
  573. }(queue[i])
  574. }
  575. }
  576. func (s *service) resolveDialTargets(ctx context.Context, now time.Time, cfg config.Configuration, deviceCfg config.DeviceConfiguration, nextDialAt nextDialRegistry, initial bool, priorityCutoff int) []dialTarget {
  577. deviceID := deviceCfg.DeviceID
  578. addrs := s.resolveDeviceAddrs(ctx, deviceCfg)
  579. l.Debugln("Resolved device", deviceID.Short(), "addresses:", addrs)
  580. dialTargets := make([]dialTarget, 0, len(addrs))
  581. for _, addr := range addrs {
  582. // Use both device and address, as you might have two devices connected
  583. // to the same relay
  584. if !initial && nextDialAt.get(deviceID, addr).After(now) {
  585. l.Debugf("Not dialing %s via %v as it's not time yet", deviceID.Short(), addr)
  586. continue
  587. }
  588. // If we fail at any step before actually getting the dialer
  589. // retry in a minute
  590. nextDialAt.set(deviceID, addr, now.Add(time.Minute))
  591. uri, err := url.Parse(addr)
  592. if err != nil {
  593. s.setConnectionStatus(addr, err)
  594. l.Infof("Parsing dialer address %s: %v", addr, err)
  595. continue
  596. }
  597. if len(deviceCfg.AllowedNetworks) > 0 {
  598. if !IsAllowedNetwork(uri.Host, deviceCfg.AllowedNetworks) {
  599. s.setConnectionStatus(addr, errors.New("network disallowed"))
  600. l.Debugln("Network for", uri, "is disallowed")
  601. continue
  602. }
  603. }
  604. dialerFactory, err := getDialerFactory(cfg, uri)
  605. if err != nil {
  606. s.setConnectionStatus(addr, err)
  607. }
  608. if errors.Is(err, errUnsupported) {
  609. l.Debugf("Dialer for %v: %v", uri, err)
  610. continue
  611. } else if err != nil {
  612. l.Infof("Dialer for %v: %v", uri, err)
  613. continue
  614. }
  615. dialer := dialerFactory.New(s.cfg.Options(), s.tlsCfg, s.registry, s.lanChecker)
  616. priority := dialer.Priority(uri.Host)
  617. currentConns := s.numConnectionsForDevice(deviceCfg.DeviceID)
  618. if priority > priorityCutoff {
  619. l.Debugf("Not dialing %s at %s using %s as priority is worse than current connection (%d > %d)", deviceID.Short(), addr, dialerFactory, priority, priorityCutoff)
  620. continue
  621. }
  622. if currentConns > 0 && !dialer.AllowsMultiConns() {
  623. l.Debugf("Not dialing %s at %s using %s as it does not allow multiple connections and we already have a connection", deviceID.Short(), addr, dialerFactory)
  624. continue
  625. }
  626. if currentConns >= s.desiredConnectionsToDevice(deviceCfg.DeviceID) && priority == priorityCutoff {
  627. l.Debugf("Not dialing %s at %s using %s as priority is equal and we already have %d/%d connections", deviceID.Short(), addr, dialerFactory, currentConns, deviceCfg.NumConnections)
  628. continue
  629. }
  630. nextDialAt.set(deviceID, addr, now.Add(dialer.RedialFrequency()))
  631. dialTargets = append(dialTargets, dialTarget{
  632. addr: addr,
  633. dialer: dialer,
  634. priority: priority,
  635. deviceID: deviceID,
  636. uri: uri,
  637. })
  638. }
  639. return dialTargets
  640. }
  641. func (s *service) resolveDeviceAddrs(ctx context.Context, cfg config.DeviceConfiguration) []string {
  642. var addrs []string
  643. for _, addr := range cfg.Addresses {
  644. if addr == "dynamic" {
  645. if s.discoverer != nil {
  646. if t, err := s.discoverer.Lookup(ctx, cfg.DeviceID); err == nil {
  647. addrs = append(addrs, t...)
  648. }
  649. }
  650. } else {
  651. addrs = append(addrs, addr)
  652. }
  653. }
  654. return stringutil.UniqueTrimmedStrings(addrs)
  655. }
  656. type lanChecker struct {
  657. cfg config.Wrapper
  658. }
  659. func (s *lanChecker) isLANHost(host string) bool {
  660. // Probably we are called with an ip:port combo which we can resolve as
  661. // a TCP address.
  662. if addr, err := net.ResolveTCPAddr("tcp", host); err == nil {
  663. return s.isLAN(addr)
  664. }
  665. // ... but this function looks general enough that someone might try
  666. // with just an IP as well in the future so lets allow that.
  667. if addr, err := net.ResolveIPAddr("ip", host); err == nil {
  668. return s.isLAN(addr)
  669. }
  670. return false
  671. }
  672. func (s *lanChecker) isLAN(addr net.Addr) bool {
  673. var ip net.IP
  674. switch addr := addr.(type) {
  675. case *net.IPAddr:
  676. ip = addr.IP
  677. case *net.TCPAddr:
  678. ip = addr.IP
  679. case *net.UDPAddr:
  680. ip = addr.IP
  681. default:
  682. // From the standard library, just Unix sockets.
  683. // If you invent your own, handle it.
  684. return false
  685. }
  686. if ip.IsLoopback() {
  687. return true
  688. }
  689. if ip.IsLinkLocalUnicast() {
  690. return true
  691. }
  692. for _, lan := range s.cfg.Options().AlwaysLocalNets {
  693. _, ipnet, err := net.ParseCIDR(lan)
  694. if err != nil {
  695. l.Debugln("Network", lan, "is malformed:", err)
  696. continue
  697. }
  698. if ipnet.Contains(ip) {
  699. return true
  700. }
  701. }
  702. lans, err := osutil.GetInterfaceAddrs(false)
  703. if err != nil {
  704. l.Debugln("Failed to retrieve interface IPs:", err)
  705. priv := ip.IsPrivate()
  706. l.Debugf("Assuming isLAN=%v for IP %v", priv, ip)
  707. return priv
  708. }
  709. for _, lan := range lans {
  710. if lan.Contains(ip) {
  711. return true
  712. }
  713. }
  714. return false
  715. }
  716. func (s *service) createListener(factory listenerFactory, uri *url.URL) bool {
  717. // must be called with listenerMut held
  718. l.Debugln("Starting listener", uri)
  719. listener := factory.New(uri, s.cfg, s.tlsCfg, s.conns, s.natService, s.registry, s.lanChecker)
  720. listener.OnAddressesChanged(s.logListenAddressesChangedEvent)
  721. // Retrying a listener many times in rapid succession is unlikely to help,
  722. // thus back off quickly. A listener may soon be functional again, e.g. due
  723. // to a network interface coming back online - retry every minute.
  724. spec := svcutil.SpecWithInfoLogger(l)
  725. spec.FailureThreshold = 2
  726. spec.FailureBackoff = time.Minute
  727. sup := suture.New(fmt.Sprintf("listenerSupervisor@%v", listener), spec)
  728. sup.Add(listener)
  729. s.listeners[uri.String()] = listener
  730. s.listenerTokens[uri.String()] = s.Add(sup)
  731. return true
  732. }
  733. func (s *service) logListenAddressesChangedEvent(l ListenerAddresses) {
  734. s.evLogger.Log(events.ListenAddressesChanged, map[string]interface{}{
  735. "address": l.URI,
  736. "lan": l.LANAddresses,
  737. "wan": l.WANAddresses,
  738. })
  739. }
  740. func (s *service) CommitConfiguration(from, to config.Configuration) bool {
  741. newDevices := make(map[protocol.DeviceID]bool, len(to.Devices))
  742. for _, dev := range to.Devices {
  743. newDevices[dev.DeviceID] = true
  744. registerDeviceMetrics(dev.DeviceID.String())
  745. }
  746. for _, dev := range from.Devices {
  747. if !newDevices[dev.DeviceID] {
  748. warningLimitersMut.Lock()
  749. delete(warningLimiters, dev.DeviceID)
  750. warningLimitersMut.Unlock()
  751. metricDeviceActiveConnections.DeleteLabelValues(dev.DeviceID.String())
  752. }
  753. }
  754. s.checkAndSignalConnectLoopOnUpdatedDevices(from, to)
  755. s.listenersMut.Lock()
  756. seen := make(map[string]struct{})
  757. for _, addr := range to.Options.ListenAddresses() {
  758. if addr == "" {
  759. // We can get an empty address if there is an empty listener
  760. // element in the config, indicating no listeners should be
  761. // used. This is not an error.
  762. continue
  763. }
  764. uri, err := url.Parse(addr)
  765. if err != nil {
  766. l.Warnf("Skipping malformed listener URL %q: %v", addr, err)
  767. continue
  768. }
  769. // Make sure we always have the canonical representation of the URL.
  770. // This is for consistency as we use it as a map key, but also to
  771. // avoid misunderstandings. We do not just use the canonicalized
  772. // version, because an URL that looks very similar to a human might
  773. // mean something entirely different to the computer (e.g.,
  774. // tcp:/127.0.0.1:22000 in fact being equivalent to tcp://:22000).
  775. if canonical := uri.String(); canonical != addr {
  776. l.Warnf("Skipping malformed listener URL %q (not canonical)", addr)
  777. continue
  778. }
  779. if _, ok := s.listeners[addr]; ok {
  780. seen[addr] = struct{}{}
  781. continue
  782. }
  783. factory, err := getListenerFactory(to, uri)
  784. if errors.Is(err, errUnsupported) {
  785. l.Debugf("Listener for %v: %v", uri, err)
  786. continue
  787. } else if err != nil {
  788. l.Infof("Listener for %v: %v", uri, err)
  789. continue
  790. }
  791. s.createListener(factory, uri)
  792. seen[addr] = struct{}{}
  793. }
  794. for addr, listener := range s.listeners {
  795. if _, ok := seen[addr]; !ok || listener.Factory().Valid(to) != nil {
  796. l.Debugln("Stopping listener", addr)
  797. s.Remove(s.listenerTokens[addr])
  798. delete(s.listenerTokens, addr)
  799. delete(s.listeners, addr)
  800. }
  801. }
  802. s.listenersMut.Unlock()
  803. return true
  804. }
  805. func (s *service) checkAndSignalConnectLoopOnUpdatedDevices(from, to config.Configuration) {
  806. oldDevices := from.DeviceMap()
  807. dial := false
  808. s.dialNowDevicesMut.Lock()
  809. for _, dev := range to.Devices {
  810. if dev.Paused {
  811. continue
  812. }
  813. if oldDev, ok := oldDevices[dev.DeviceID]; !ok || oldDev.Paused {
  814. s.dialNowDevices[dev.DeviceID] = struct{}{}
  815. dial = true
  816. } else if !slices.Equal(oldDev.Addresses, dev.Addresses) {
  817. dial = true
  818. }
  819. }
  820. if dial {
  821. s.scheduleDialNow()
  822. }
  823. s.dialNowDevicesMut.Unlock()
  824. }
  825. func (s *service) scheduleDialNow() {
  826. select {
  827. case s.dialNow <- struct{}{}:
  828. default:
  829. // channel is blocked - a config update is already pending for the connection loop.
  830. }
  831. }
  832. func (s *service) AllAddresses() []string {
  833. s.listenersMut.RLock()
  834. var addrs []string
  835. for _, listener := range s.listeners {
  836. for _, lanAddr := range listener.LANAddresses() {
  837. addrs = append(addrs, lanAddr.String())
  838. }
  839. for _, wanAddr := range listener.WANAddresses() {
  840. addrs = append(addrs, wanAddr.String())
  841. }
  842. }
  843. s.listenersMut.RUnlock()
  844. return stringutil.UniqueTrimmedStrings(addrs)
  845. }
  846. func (s *service) ExternalAddresses() []string {
  847. if s.cfg.Options().AnnounceLANAddresses {
  848. return s.AllAddresses()
  849. }
  850. s.listenersMut.RLock()
  851. var addrs []string
  852. for _, listener := range s.listeners {
  853. for _, wanAddr := range listener.WANAddresses() {
  854. addrs = append(addrs, wanAddr.String())
  855. }
  856. }
  857. s.listenersMut.RUnlock()
  858. return stringutil.UniqueTrimmedStrings(addrs)
  859. }
  860. func (s *service) ListenerStatus() map[string]ListenerStatusEntry {
  861. result := make(map[string]ListenerStatusEntry)
  862. s.listenersMut.RLock()
  863. for addr, listener := range s.listeners {
  864. var status ListenerStatusEntry
  865. if err := listener.Error(); err != nil {
  866. errStr := err.Error()
  867. status.Error = &errStr
  868. }
  869. status.LANAddresses = urlsToStrings(listener.LANAddresses())
  870. status.WANAddresses = urlsToStrings(listener.WANAddresses())
  871. result[addr] = status
  872. }
  873. s.listenersMut.RUnlock()
  874. return result
  875. }
  876. type connectionStatusHandler struct {
  877. connectionStatusMut sync.RWMutex
  878. connectionStatus map[string]ConnectionStatusEntry // address -> latest error/status
  879. }
  880. func newConnectionStatusHandler() connectionStatusHandler {
  881. return connectionStatusHandler{
  882. connectionStatusMut: sync.NewRWMutex(),
  883. connectionStatus: make(map[string]ConnectionStatusEntry),
  884. }
  885. }
  886. func (s *connectionStatusHandler) ConnectionStatus() map[string]ConnectionStatusEntry {
  887. result := make(map[string]ConnectionStatusEntry)
  888. s.connectionStatusMut.RLock()
  889. for k, v := range s.connectionStatus {
  890. result[k] = v
  891. }
  892. s.connectionStatusMut.RUnlock()
  893. return result
  894. }
  895. func (s *connectionStatusHandler) setConnectionStatus(address string, err error) {
  896. if errors.Is(err, context.Canceled) {
  897. return
  898. }
  899. status := ConnectionStatusEntry{When: time.Now().UTC().Truncate(time.Second)}
  900. if err != nil {
  901. errStr := err.Error()
  902. status.Error = &errStr
  903. }
  904. s.connectionStatusMut.Lock()
  905. s.connectionStatus[address] = status
  906. s.connectionStatusMut.Unlock()
  907. }
  908. func (s *service) NATType() string {
  909. s.listenersMut.RLock()
  910. defer s.listenersMut.RUnlock()
  911. for _, listener := range s.listeners {
  912. natType := listener.NATType()
  913. if natType != "unknown" {
  914. return natType
  915. }
  916. }
  917. return "unknown"
  918. }
  919. func getDialerFactory(cfg config.Configuration, uri *url.URL) (dialerFactory, error) {
  920. dialerFactory, ok := dialers[uri.Scheme]
  921. if !ok {
  922. return nil, fmt.Errorf("unknown address scheme %q", uri.Scheme)
  923. }
  924. if err := dialerFactory.Valid(cfg); err != nil {
  925. return nil, err
  926. }
  927. return dialerFactory, nil
  928. }
  929. func getListenerFactory(cfg config.Configuration, uri *url.URL) (listenerFactory, error) {
  930. listenerFactory, ok := listeners[uri.Scheme]
  931. if !ok {
  932. return nil, fmt.Errorf("unknown address scheme %q", uri.Scheme)
  933. }
  934. if err := listenerFactory.Valid(cfg); err != nil {
  935. return nil, err
  936. }
  937. return listenerFactory, nil
  938. }
  939. func urlsToStrings(urls []*url.URL) []string {
  940. strings := make([]string, len(urls))
  941. for i, url := range urls {
  942. strings[i] = url.String()
  943. }
  944. return strings
  945. }
  946. var (
  947. warningLimiters = make(map[protocol.DeviceID]*rate.Limiter)
  948. warningLimitersMut = sync.NewMutex()
  949. )
  950. func warningFor(dev protocol.DeviceID, msg string) {
  951. warningLimitersMut.Lock()
  952. defer warningLimitersMut.Unlock()
  953. lim, ok := warningLimiters[dev]
  954. if !ok {
  955. lim = rate.NewLimiter(rate.Every(perDeviceWarningIntv), 1)
  956. warningLimiters[dev] = lim
  957. }
  958. if lim.Allow() {
  959. l.Warnln(msg)
  960. }
  961. }
  962. func tlsTimedHandshake(tc *tls.Conn) error {
  963. tc.SetDeadline(time.Now().Add(tlsHandshakeTimeout))
  964. defer tc.SetDeadline(time.Time{})
  965. return tc.Handshake()
  966. }
  967. // IsAllowedNetwork returns true if the given host (IP or resolvable
  968. // hostname) is in the set of allowed networks (CIDR format only).
  969. func IsAllowedNetwork(host string, allowed []string) bool {
  970. if hostNoPort, _, err := net.SplitHostPort(host); err == nil {
  971. host = hostNoPort
  972. }
  973. addr, err := net.ResolveIPAddr("ip", host)
  974. if err != nil {
  975. return false
  976. }
  977. for _, n := range allowed {
  978. result := true
  979. if strings.HasPrefix(n, "!") {
  980. result = false
  981. n = n[1:]
  982. }
  983. _, cidr, err := net.ParseCIDR(n)
  984. if err != nil {
  985. continue
  986. }
  987. if cidr.Contains(addr.IP) {
  988. return result
  989. }
  990. }
  991. return false
  992. }
  993. func (s *service) dialParallel(ctx context.Context, deviceID protocol.DeviceID, dialTargets []dialTarget, parentSema *semaphore.Semaphore) (internalConn, bool) {
  994. // Group targets into buckets by priority
  995. dialTargetBuckets := make(map[int][]dialTarget, len(dialTargets))
  996. for _, tgt := range dialTargets {
  997. dialTargetBuckets[tgt.priority] = append(dialTargetBuckets[tgt.priority], tgt)
  998. }
  999. // Get all available priorities
  1000. priorities := make([]int, 0, len(dialTargetBuckets))
  1001. for prio := range dialTargetBuckets {
  1002. priorities = append(priorities, prio)
  1003. }
  1004. // Sort the priorities so that we dial lowest first (which means highest...)
  1005. slices.Sort(priorities)
  1006. sema := semaphore.MultiSemaphore{semaphore.New(dialMaxParallelPerDevice), parentSema}
  1007. for _, prio := range priorities {
  1008. tgts := dialTargetBuckets[prio]
  1009. res := make(chan internalConn, len(tgts))
  1010. wg := stdsync.WaitGroup{}
  1011. for _, tgt := range tgts {
  1012. sema.Take(1)
  1013. wg.Add(1)
  1014. go func(tgt dialTarget) {
  1015. defer func() {
  1016. wg.Done()
  1017. sema.Give(1)
  1018. }()
  1019. conn, err := tgt.Dial(ctx)
  1020. if err == nil {
  1021. // Closes the connection on error
  1022. err = s.validateIdentity(conn, deviceID)
  1023. }
  1024. s.setConnectionStatus(tgt.addr, err)
  1025. if err != nil {
  1026. l.Debugln("dialing", deviceID, tgt.uri, "error:", err)
  1027. } else {
  1028. l.Debugln("dialing", deviceID, tgt.uri, "success:", conn)
  1029. res <- conn
  1030. }
  1031. }(tgt)
  1032. }
  1033. // Spawn a routine which will unblock main routine in case we fail
  1034. // to connect to anyone.
  1035. go func() {
  1036. wg.Wait()
  1037. close(res)
  1038. }()
  1039. // Wait for the first connection, or for channel closure.
  1040. if conn, ok := <-res; ok {
  1041. // Got a connection, means more might come back, hence spawn a
  1042. // routine that will do the discarding.
  1043. l.Debugln("connected to", deviceID, prio, "using", conn, conn.priority)
  1044. go func(deviceID protocol.DeviceID, prio int) {
  1045. wg.Wait()
  1046. l.Debugln("discarding", len(res), "connections while connecting to", deviceID, prio)
  1047. for conn := range res {
  1048. conn.Close()
  1049. }
  1050. }(deviceID, prio)
  1051. return conn, ok
  1052. }
  1053. // Failed to connect, report that fact.
  1054. l.Debugln("failed to connect to", deviceID, prio)
  1055. }
  1056. return internalConn{}, false
  1057. }
  1058. func (s *service) validateIdentity(c internalConn, expectedID protocol.DeviceID) error {
  1059. cs := c.ConnectionState()
  1060. // We should have received exactly one certificate from the other
  1061. // side. If we didn't, they don't have a device ID and we drop the
  1062. // connection.
  1063. certs := cs.PeerCertificates
  1064. if cl := len(certs); cl != 1 {
  1065. l.Infof("Got peer certificate list of length %d != 1 from peer at %s; protocol error", cl, c)
  1066. c.Close()
  1067. return fmt.Errorf("expected 1 certificate, got %d", cl)
  1068. }
  1069. remoteCert := certs[0]
  1070. remoteID := protocol.NewDeviceID(remoteCert.Raw)
  1071. // The device ID should not be that of ourselves. It can happen
  1072. // though, especially in the presence of NAT hairpinning, multiple
  1073. // clients between the same NAT gateway, and global discovery.
  1074. if remoteID == s.myID {
  1075. l.Debugf("Connected to myself (%s) at %s", remoteID, c)
  1076. c.Close()
  1077. return errors.New("connected to self")
  1078. }
  1079. // We should see the expected device ID
  1080. if !remoteID.Equals(expectedID) {
  1081. c.Close()
  1082. return fmt.Errorf("unexpected device id, expected %s got %s", expectedID, remoteID)
  1083. }
  1084. return nil
  1085. }
  1086. type nextDialRegistry map[protocol.DeviceID]nextDialDevice
  1087. type nextDialDevice struct {
  1088. nextDial map[string]time.Time
  1089. coolDownIntervalStart time.Time
  1090. attempts int
  1091. }
  1092. func (r nextDialRegistry) get(device protocol.DeviceID, addr string) time.Time {
  1093. return r[device].nextDial[addr]
  1094. }
  1095. const (
  1096. dialCoolDownInterval = 2 * time.Minute
  1097. dialCoolDownDelay = 5 * time.Minute
  1098. dialCoolDownMaxAttempts = 3
  1099. )
  1100. // redialDevice marks the device for immediate redial, unless the remote keeps
  1101. // dropping established connections. Thus we keep track of when the first forced
  1102. // re-dial happened, and how many attempts happen in the dialCoolDownInterval
  1103. // after that. If it's more than dialCoolDownMaxAttempts, don't force-redial
  1104. // that device for dialCoolDownDelay (regular dials still happen).
  1105. func (r nextDialRegistry) redialDevice(device protocol.DeviceID, now time.Time) {
  1106. dev, ok := r[device]
  1107. if !ok {
  1108. r[device] = nextDialDevice{
  1109. nextDial: make(map[string]time.Time),
  1110. coolDownIntervalStart: now,
  1111. attempts: 1,
  1112. }
  1113. return
  1114. }
  1115. if dev.attempts == 0 || now.Before(dev.coolDownIntervalStart.Add(dialCoolDownInterval)) {
  1116. if dev.attempts >= dialCoolDownMaxAttempts {
  1117. // Device has been force redialed too often - let it cool down.
  1118. return
  1119. }
  1120. if dev.attempts == 0 {
  1121. dev.coolDownIntervalStart = now
  1122. }
  1123. dev.attempts++
  1124. dev.nextDial = make(map[string]time.Time)
  1125. r[device] = dev
  1126. return
  1127. }
  1128. if dev.attempts >= dialCoolDownMaxAttempts && now.Before(dev.coolDownIntervalStart.Add(dialCoolDownDelay)) {
  1129. return // Still cooling down
  1130. }
  1131. delete(r, device)
  1132. }
  1133. func (r nextDialRegistry) set(device protocol.DeviceID, addr string, next time.Time) {
  1134. if _, ok := r[device]; !ok {
  1135. r[device] = nextDialDevice{nextDial: make(map[string]time.Time)}
  1136. }
  1137. r[device].nextDial[addr] = next
  1138. }
  1139. func (r nextDialRegistry) sleepDurationAndCleanup(now time.Time) time.Duration {
  1140. sleep := stdConnectionLoopSleep
  1141. for id, dev := range r {
  1142. for address, next := range dev.nextDial {
  1143. if next.Before(now) {
  1144. // Expired entry, address was not seen in last pass(es)
  1145. delete(dev.nextDial, address)
  1146. continue
  1147. }
  1148. if cur := next.Sub(now); cur < sleep {
  1149. sleep = cur
  1150. }
  1151. }
  1152. if dev.attempts > 0 {
  1153. interval := dialCoolDownInterval
  1154. if dev.attempts >= dialCoolDownMaxAttempts {
  1155. interval = dialCoolDownDelay
  1156. }
  1157. if now.After(dev.coolDownIntervalStart.Add(interval)) {
  1158. dev.attempts = 0
  1159. }
  1160. }
  1161. if len(dev.nextDial) == 0 && dev.attempts == 0 {
  1162. delete(r, id)
  1163. }
  1164. }
  1165. return sleep
  1166. }
  1167. func (s *service) desiredConnectionsToDevice(deviceID protocol.DeviceID) int {
  1168. cfg, ok := s.cfg.Device(deviceID)
  1169. if !ok {
  1170. // We want no connections to an unknown device.
  1171. return 0
  1172. }
  1173. otherSide := s.wantConnectionsForDevice(deviceID)
  1174. thisSide := cfg.NumConnections()
  1175. switch {
  1176. case otherSide <= 0:
  1177. // The other side doesn't support multiple connections, or we
  1178. // haven't yet connected to them so we don't know what they support
  1179. // or not. Use a single connection until we know better.
  1180. return 1
  1181. case otherSide == 1:
  1182. // The other side supports multiple connections, but only wants
  1183. // one. We should honour that.
  1184. return 1
  1185. case thisSide == 1:
  1186. // We want only one connection, so we should honour that.
  1187. return 1
  1188. // Finally, we allow negotiation and use the higher of the two values,
  1189. // while keeping at or below the max allowed value.
  1190. default:
  1191. return min(max(thisSide, otherSide), maxNumConnections)
  1192. }
  1193. }
  1194. // The deviceConnectionTracker keeps track of how many devices we are
  1195. // connected to and how many connections we have to each device. It also
  1196. // tracks how many connections they are willing to use.
  1197. type deviceConnectionTracker struct {
  1198. connectionsMut stdsync.Mutex
  1199. connections map[protocol.DeviceID][]protocol.Connection // current connections
  1200. wantConnections map[protocol.DeviceID]int // number of connections they want
  1201. }
  1202. func (c *deviceConnectionTracker) accountAddedConnection(conn protocol.Connection, h protocol.Hello, upgradeThreshold int) {
  1203. c.connectionsMut.Lock()
  1204. defer c.connectionsMut.Unlock()
  1205. // Lazily initialize the maps
  1206. if c.connections == nil {
  1207. c.connections = make(map[protocol.DeviceID][]protocol.Connection)
  1208. c.wantConnections = make(map[protocol.DeviceID]int)
  1209. }
  1210. // Add the connection to the list of current connections and remember
  1211. // how many total connections they want
  1212. d := conn.DeviceID()
  1213. c.connections[d] = append(c.connections[d], conn)
  1214. c.wantConnections[d] = int(h.NumConnections)
  1215. l.Debugf("Added connection for %s (now %d), they want %d connections", d.Short(), len(c.connections[d]), h.NumConnections)
  1216. // Update active connections metric
  1217. metricDeviceActiveConnections.WithLabelValues(d.String()).Inc()
  1218. // Close any connections we no longer want to retain.
  1219. c.closeWorsePriorityConnectionsLocked(d, conn.Priority()-upgradeThreshold)
  1220. }
  1221. func (c *deviceConnectionTracker) accountRemovedConnection(conn protocol.Connection) {
  1222. c.connectionsMut.Lock()
  1223. defer c.connectionsMut.Unlock()
  1224. d := conn.DeviceID()
  1225. cid := conn.ConnectionID()
  1226. // Remove the connection from the list of current connections
  1227. for i, conn := range c.connections[d] {
  1228. if conn.ConnectionID() == cid {
  1229. c.connections[d] = sliceutil.RemoveAndZero(c.connections[d], i)
  1230. break
  1231. }
  1232. }
  1233. // Clean up if required
  1234. if len(c.connections[d]) == 0 {
  1235. delete(c.connections, d)
  1236. delete(c.wantConnections, d)
  1237. }
  1238. // Update active connections metric
  1239. metricDeviceActiveConnections.WithLabelValues(d.String()).Dec()
  1240. l.Debugf("Removed connection for %s (now %d)", d.Short(), c.connections[d])
  1241. }
  1242. func (c *deviceConnectionTracker) numConnectionsForDevice(d protocol.DeviceID) int {
  1243. c.connectionsMut.Lock()
  1244. defer c.connectionsMut.Unlock()
  1245. return len(c.connections[d])
  1246. }
  1247. func (c *deviceConnectionTracker) wantConnectionsForDevice(d protocol.DeviceID) int {
  1248. c.connectionsMut.Lock()
  1249. defer c.connectionsMut.Unlock()
  1250. return c.wantConnections[d]
  1251. }
  1252. func (c *deviceConnectionTracker) numConnectedDevices() int {
  1253. c.connectionsMut.Lock()
  1254. defer c.connectionsMut.Unlock()
  1255. return len(c.connections)
  1256. }
  1257. func (c *deviceConnectionTracker) worstConnectionPriority(d protocol.DeviceID) int {
  1258. c.connectionsMut.Lock()
  1259. defer c.connectionsMut.Unlock()
  1260. if len(c.connections[d]) == 0 {
  1261. return math.MaxInt // worst possible priority
  1262. }
  1263. worstPriority := c.connections[d][0].Priority()
  1264. for _, conn := range c.connections[d][1:] {
  1265. if p := conn.Priority(); p > worstPriority {
  1266. worstPriority = p
  1267. }
  1268. }
  1269. return worstPriority
  1270. }
  1271. // closeWorsePriorityConnectionsLocked closes all connections to the given
  1272. // device that are worse than the cutoff priority. Must be called with the
  1273. // lock held.
  1274. func (c *deviceConnectionTracker) closeWorsePriorityConnectionsLocked(d protocol.DeviceID, cutoff int) {
  1275. for _, conn := range c.connections[d] {
  1276. if p := conn.Priority(); p > cutoff {
  1277. l.Debugf("Closing connection %s to %s with priority %d (cutoff %d)", conn, d.Short(), p, cutoff)
  1278. go conn.Close(errReplacingConnection)
  1279. }
  1280. }
  1281. }
  1282. // newConnectionID generates a connection ID. The connection ID is designed
  1283. // to be unique for each connection and chronologically sortable. It is
  1284. // based on the sum of two timestamps: when we think the connection was
  1285. // started, and when the other side thinks the connection was started. We
  1286. // then add some random data for good measure. This way, even if the other
  1287. // side does some funny business with the timestamp, we will get no worse
  1288. // than random connection IDs.
  1289. func newConnectionID(t0, t1 int64) string {
  1290. var buf [16]byte // 8 bytes timestamp, 8 bytes random
  1291. binary.BigEndian.PutUint64(buf[:], uint64(t0+t1))
  1292. _, _ = io.ReadFull(rand.Reader, buf[8:])
  1293. enc := base32.HexEncoding.WithPadding(base32.NoPadding)
  1294. // We encode the two parts separately and concatenate the results. The
  1295. // reason for this is that the timestamp (64 bits) doesn't precisely
  1296. // align to the base32 encoding (5 bits per character), so we'd get a
  1297. // character in the middle that is a mix of bits from the timestamp and
  1298. // from the random. We want the timestamp part deterministic.
  1299. return enc.EncodeToString(buf[:8]) + enc.EncodeToString(buf[8:])
  1300. }