service.go 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189
  1. // Copyright (C) 2015 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. //go:generate -command counterfeiter go run github.com/maxbrunsfeld/counterfeiter/v6
  7. //go:generate counterfeiter -o mocks/service.go --fake-name Service . Service
  8. package connections
  9. import (
  10. "context"
  11. "crypto/tls"
  12. "fmt"
  13. "math"
  14. "net"
  15. "net/url"
  16. "sort"
  17. "strings"
  18. stdsync "sync"
  19. "time"
  20. "github.com/syncthing/syncthing/lib/config"
  21. "github.com/syncthing/syncthing/lib/discover"
  22. "github.com/syncthing/syncthing/lib/events"
  23. "github.com/syncthing/syncthing/lib/nat"
  24. "github.com/syncthing/syncthing/lib/osutil"
  25. "github.com/syncthing/syncthing/lib/protocol"
  26. "github.com/syncthing/syncthing/lib/svcutil"
  27. "github.com/syncthing/syncthing/lib/sync"
  28. "github.com/syncthing/syncthing/lib/util"
  29. // Registers NAT service providers
  30. _ "github.com/syncthing/syncthing/lib/pmp"
  31. _ "github.com/syncthing/syncthing/lib/upnp"
  32. "github.com/pkg/errors"
  33. "github.com/thejerf/suture/v4"
  34. "golang.org/x/time/rate"
  35. )
  36. var (
  37. dialers = make(map[string]dialerFactory)
  38. listeners = make(map[string]listenerFactory)
  39. )
  40. var (
  41. // Dialers and listeners return errUnsupported (or a wrapped variant)
  42. // when they are intentionally out of service due to configuration,
  43. // build, etc. This is not logged loudly.
  44. errUnsupported = errors.New("unsupported protocol")
  45. // These are specific explanations for errUnsupported.
  46. errDisabled = fmt.Errorf("%w: disabled by configuration", errUnsupported)
  47. errDeprecated = fmt.Errorf("%w: deprecated", errUnsupported)
  48. errNotInBuild = fmt.Errorf("%w: disabled at build time", errUnsupported)
  49. )
  50. const (
  51. perDeviceWarningIntv = 15 * time.Minute
  52. tlsHandshakeTimeout = 10 * time.Second
  53. minConnectionReplaceAge = 10 * time.Second
  54. minConnectionLoopSleep = 5 * time.Second
  55. stdConnectionLoopSleep = time.Minute
  56. worstDialerPriority = math.MaxInt32
  57. recentlySeenCutoff = 7 * 24 * time.Hour
  58. shortLivedConnectionThreshold = 5 * time.Second
  59. dialMaxParallel = 64
  60. dialMaxParallelPerDevice = 8
  61. )
  62. // From go/src/crypto/tls/cipher_suites.go
  63. var tlsCipherSuiteNames = map[uint16]string{
  64. // TLS 1.2
  65. 0x0005: "TLS_RSA_WITH_RC4_128_SHA",
  66. 0x000a: "TLS_RSA_WITH_3DES_EDE_CBC_SHA",
  67. 0x002f: "TLS_RSA_WITH_AES_128_CBC_SHA",
  68. 0x0035: "TLS_RSA_WITH_AES_256_CBC_SHA",
  69. 0x003c: "TLS_RSA_WITH_AES_128_CBC_SHA256",
  70. 0x009c: "TLS_RSA_WITH_AES_128_GCM_SHA256",
  71. 0x009d: "TLS_RSA_WITH_AES_256_GCM_SHA384",
  72. 0xc007: "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",
  73. 0xc009: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
  74. 0xc00a: "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
  75. 0xc011: "TLS_ECDHE_RSA_WITH_RC4_128_SHA",
  76. 0xc012: "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",
  77. 0xc013: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
  78. 0xc014: "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",
  79. 0xc023: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
  80. 0xc027: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
  81. 0xc02f: "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
  82. 0xc02b: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
  83. 0xc030: "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
  84. 0xc02c: "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
  85. 0xcca8: "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305",
  86. 0xcca9: "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305",
  87. // TLS 1.3
  88. 0x1301: "TLS_AES_128_GCM_SHA256",
  89. 0x1302: "TLS_AES_256_GCM_SHA384",
  90. 0x1303: "TLS_CHACHA20_POLY1305_SHA256",
  91. }
  92. var tlsVersionNames = map[uint16]string{
  93. tls.VersionTLS12: "TLS1.2",
  94. tls.VersionTLS13: "TLS1.3",
  95. }
  96. // Service listens and dials all configured unconnected devices, via supported
  97. // dialers. Successful connections are handed to the model.
  98. type Service interface {
  99. suture.Service
  100. discover.AddressLister
  101. ListenerStatus() map[string]ListenerStatusEntry
  102. ConnectionStatus() map[string]ConnectionStatusEntry
  103. NATType() string
  104. }
  105. type ListenerStatusEntry struct {
  106. Error *string `json:"error"`
  107. LANAddresses []string `json:"lanAddresses"`
  108. WANAddresses []string `json:"wanAddresses"`
  109. }
  110. type ConnectionStatusEntry struct {
  111. When time.Time `json:"when"`
  112. Error *string `json:"error"`
  113. }
  114. type service struct {
  115. *suture.Supervisor
  116. connectionStatusHandler
  117. cfg config.Wrapper
  118. myID protocol.DeviceID
  119. model Model
  120. tlsCfg *tls.Config
  121. discoverer discover.Finder
  122. conns chan internalConn
  123. bepProtocolName string
  124. tlsDefaultCommonName string
  125. limiter *limiter
  126. natService *nat.Service
  127. evLogger events.Logger
  128. dialNow chan struct{}
  129. dialNowDevices map[protocol.DeviceID]struct{}
  130. dialNowDevicesMut sync.Mutex
  131. listenersMut sync.RWMutex
  132. listeners map[string]genericListener
  133. listenerTokens map[string]suture.ServiceToken
  134. }
  135. func NewService(cfg config.Wrapper, myID protocol.DeviceID, mdl Model, tlsCfg *tls.Config, discoverer discover.Finder, bepProtocolName string, tlsDefaultCommonName string, evLogger events.Logger) Service {
  136. spec := svcutil.SpecWithInfoLogger(l)
  137. service := &service{
  138. Supervisor: suture.New("connections.Service", spec),
  139. connectionStatusHandler: newConnectionStatusHandler(),
  140. cfg: cfg,
  141. myID: myID,
  142. model: mdl,
  143. tlsCfg: tlsCfg,
  144. discoverer: discoverer,
  145. conns: make(chan internalConn),
  146. bepProtocolName: bepProtocolName,
  147. tlsDefaultCommonName: tlsDefaultCommonName,
  148. limiter: newLimiter(myID, cfg),
  149. natService: nat.NewService(myID, cfg),
  150. evLogger: evLogger,
  151. dialNowDevicesMut: sync.NewMutex(),
  152. dialNow: make(chan struct{}, 1),
  153. dialNowDevices: make(map[protocol.DeviceID]struct{}),
  154. listenersMut: sync.NewRWMutex(),
  155. listeners: make(map[string]genericListener),
  156. listenerTokens: make(map[string]suture.ServiceToken),
  157. }
  158. cfg.Subscribe(service)
  159. raw := cfg.RawCopy()
  160. // Actually starts the listeners and NAT service
  161. // Need to start this before service.connect so that any dials that
  162. // try punch through already have a listener to cling on.
  163. service.CommitConfiguration(raw, raw)
  164. // There are several moving parts here; one routine per listening address
  165. // (handled in configuration changing) to handle incoming connections,
  166. // one routine to periodically attempt outgoing connections, one routine to
  167. // the common handling regardless of whether the connection was
  168. // incoming or outgoing.
  169. service.Add(svcutil.AsService(service.connect, fmt.Sprintf("%s/connect", service)))
  170. service.Add(svcutil.AsService(service.handle, fmt.Sprintf("%s/handle", service)))
  171. service.Add(service.natService)
  172. svcutil.OnSupervisorDone(service.Supervisor, func() {
  173. service.cfg.Unsubscribe(service.limiter)
  174. service.cfg.Unsubscribe(service)
  175. })
  176. return service
  177. }
  178. func (s *service) handle(ctx context.Context) error {
  179. var c internalConn
  180. for {
  181. select {
  182. case <-ctx.Done():
  183. return ctx.Err()
  184. case c = <-s.conns:
  185. }
  186. cs := c.ConnectionState()
  187. // We should have negotiated the next level protocol "bep/1.0" as part
  188. // of the TLS handshake. Unfortunately this can't be a hard error,
  189. // because there are implementations out there that don't support
  190. // protocol negotiation (iOS for one...).
  191. if !cs.NegotiatedProtocolIsMutual || cs.NegotiatedProtocol != s.bepProtocolName {
  192. l.Infof("Peer at %s did not negotiate bep/1.0", c)
  193. }
  194. // We should have received exactly one certificate from the other
  195. // side. If we didn't, they don't have a device ID and we drop the
  196. // connection.
  197. certs := cs.PeerCertificates
  198. if cl := len(certs); cl != 1 {
  199. l.Infof("Got peer certificate list of length %d != 1 from peer at %s; protocol error", cl, c)
  200. c.Close()
  201. continue
  202. }
  203. remoteCert := certs[0]
  204. remoteID := protocol.NewDeviceID(remoteCert.Raw)
  205. // The device ID should not be that of ourselves. It can happen
  206. // though, especially in the presence of NAT hairpinning, multiple
  207. // clients between the same NAT gateway, and global discovery.
  208. if remoteID == s.myID {
  209. l.Debugf("Connected to myself (%s) at %s", remoteID, c)
  210. c.Close()
  211. continue
  212. }
  213. _ = c.SetDeadline(time.Now().Add(20 * time.Second))
  214. hello, err := protocol.ExchangeHello(c, s.model.GetHello(remoteID))
  215. if err != nil {
  216. if protocol.IsVersionMismatch(err) {
  217. // The error will be a relatively user friendly description
  218. // of what's wrong with the version compatibility. By
  219. // default identify the other side by device ID and IP.
  220. remote := fmt.Sprintf("%v (%v)", remoteID, c.RemoteAddr())
  221. if hello.DeviceName != "" {
  222. // If the name was set in the hello return, use that to
  223. // give the user more info about which device is the
  224. // affected one. It probably says more than the remote
  225. // IP.
  226. remote = fmt.Sprintf("%q (%s %s, %v)", hello.DeviceName, hello.ClientName, hello.ClientVersion, remoteID)
  227. }
  228. msg := fmt.Sprintf("Connecting to %s: %s", remote, err)
  229. warningFor(remoteID, msg)
  230. } else {
  231. // It's something else - connection reset or whatever
  232. l.Infof("Failed to exchange Hello messages with %s at %s: %s", remoteID, c, err)
  233. }
  234. c.Close()
  235. continue
  236. }
  237. _ = c.SetDeadline(time.Time{})
  238. // The Model will return an error for devices that we don't want to
  239. // have a connection with for whatever reason, for example unknown devices.
  240. if err := s.model.OnHello(remoteID, c.RemoteAddr(), hello); err != nil {
  241. l.Infof("Connection from %s at %s (%s) rejected: %v", remoteID, c.RemoteAddr(), c.Type(), err)
  242. c.Close()
  243. continue
  244. }
  245. // If we have a relay connection, and the new incoming connection is
  246. // not a relay connection, we should drop that, and prefer this one.
  247. ct, connected := s.model.Connection(remoteID)
  248. // Lower priority is better, just like nice etc.
  249. if connected && (ct.Priority() > c.priority || time.Since(ct.Statistics().StartedAt) > minConnectionReplaceAge) {
  250. l.Debugf("Switching connections %s (existing: %s new: %s)", remoteID, ct, c)
  251. } else if connected {
  252. // We should not already be connected to the other party. TODO: This
  253. // could use some better handling. If the old connection is dead but
  254. // hasn't timed out yet we may want to drop *that* connection and keep
  255. // this one. But in case we are two devices connecting to each other
  256. // in parallel we don't want to do that or we end up with no
  257. // connections still established...
  258. l.Infof("Connected to already connected device %s (existing: %s new: %s)", remoteID, ct, c)
  259. c.Close()
  260. continue
  261. }
  262. deviceCfg, ok := s.cfg.Device(remoteID)
  263. if !ok {
  264. l.Infof("Device %s removed from config during connection attempt at %s", remoteID, c)
  265. c.Close()
  266. continue
  267. }
  268. // Verify the name on the certificate. By default we set it to
  269. // "syncthing" when generating, but the user may have replaced
  270. // the certificate and used another name.
  271. certName := deviceCfg.CertName
  272. if certName == "" {
  273. certName = s.tlsDefaultCommonName
  274. }
  275. if remoteCert.Subject.CommonName == certName {
  276. // All good. We do this check because our old style certificates
  277. // have "syncthing" in the CommonName field and no SANs, which
  278. // is not accepted by VerifyHostname() any more as of Go 1.15.
  279. } else if err := remoteCert.VerifyHostname(certName); err != nil {
  280. // Incorrect certificate name is something the user most
  281. // likely wants to know about, since it's an advanced
  282. // config. Warn instead of Info.
  283. l.Warnf("Bad certificate from %s at %s: %v", remoteID, c, err)
  284. c.Close()
  285. continue
  286. }
  287. // Wrap the connection in rate limiters. The limiter itself will
  288. // keep up with config changes to the rate and whether or not LAN
  289. // connections are limited.
  290. isLAN := s.isLAN(c.RemoteAddr())
  291. rd, wr := s.limiter.getLimiters(remoteID, c, isLAN)
  292. protoConn := protocol.NewConnection(remoteID, rd, wr, c, s.model, c, deviceCfg.Compression, s.cfg.FolderPasswords(remoteID))
  293. go func() {
  294. <-protoConn.Closed()
  295. s.dialNowDevicesMut.Lock()
  296. s.dialNowDevices[remoteID] = struct{}{}
  297. s.scheduleDialNow()
  298. s.dialNowDevicesMut.Unlock()
  299. }()
  300. l.Infof("Established secure connection to %s at %s", remoteID, c)
  301. s.model.AddConnection(protoConn, hello)
  302. continue
  303. }
  304. }
  305. func (s *service) connect(ctx context.Context) error {
  306. // Map of when to earliest dial each given device + address again
  307. nextDialAt := make(nextDialRegistry)
  308. // Used as delay for the first few connection attempts (adjusted up to
  309. // minConnectionLoopSleep), increased exponentially until it reaches
  310. // stdConnectionLoopSleep, at which time the normal sleep mechanism
  311. // kicks in.
  312. initialRampup := time.Second
  313. for {
  314. cfg := s.cfg.RawCopy()
  315. bestDialerPriority := s.bestDialerPriority(cfg)
  316. isInitialRampup := initialRampup < stdConnectionLoopSleep
  317. l.Debugln("Connection loop")
  318. if isInitialRampup {
  319. l.Debugln("Connection loop in initial rampup")
  320. }
  321. // Used for consistency throughout this loop run, as time passes
  322. // while we try connections etc.
  323. now := time.Now()
  324. // Attempt to dial all devices that are unconnected or can be connection-upgraded
  325. s.dialDevices(ctx, now, cfg, bestDialerPriority, nextDialAt, isInitialRampup)
  326. var sleep time.Duration
  327. if isInitialRampup {
  328. // We are in the initial rampup time, so we slowly, statically
  329. // increase the sleep time.
  330. sleep = initialRampup
  331. initialRampup *= 2
  332. } else {
  333. // The sleep time is until the next dial scheduled in nextDialAt,
  334. // clamped by stdConnectionLoopSleep as we don't want to sleep too
  335. // long (config changes might happen).
  336. sleep = nextDialAt.sleepDurationAndCleanup(now)
  337. }
  338. // ... while making sure not to loop too quickly either.
  339. if sleep < minConnectionLoopSleep {
  340. sleep = minConnectionLoopSleep
  341. }
  342. l.Debugln("Next connection loop in", sleep)
  343. timeout := time.NewTimer(sleep)
  344. select {
  345. case <-s.dialNow:
  346. // Remove affected devices from nextDialAt to dial immediately,
  347. // regardless of when we last dialed it (there's cool down in the
  348. // registry for too many repeat dials).
  349. s.dialNowDevicesMut.Lock()
  350. for device := range s.dialNowDevices {
  351. nextDialAt.redialDevice(device, now)
  352. }
  353. s.dialNowDevices = make(map[protocol.DeviceID]struct{})
  354. s.dialNowDevicesMut.Unlock()
  355. timeout.Stop()
  356. case <-timeout.C:
  357. case <-ctx.Done():
  358. return ctx.Err()
  359. }
  360. }
  361. }
  362. func (s *service) bestDialerPriority(cfg config.Configuration) int {
  363. bestDialerPriority := worstDialerPriority
  364. for _, df := range dialers {
  365. if df.Valid(cfg) != nil {
  366. continue
  367. }
  368. if prio := df.Priority(); prio < bestDialerPriority {
  369. bestDialerPriority = prio
  370. }
  371. }
  372. return bestDialerPriority
  373. }
  374. func (s *service) dialDevices(ctx context.Context, now time.Time, cfg config.Configuration, bestDialerPriority int, nextDialAt nextDialRegistry, initial bool) {
  375. // Figure out current connection limits up front to see if there's any
  376. // point in resolving devices and such at all.
  377. allowAdditional := 0 // no limit
  378. connectionLimit := cfg.Options.LowestConnectionLimit()
  379. if connectionLimit > 0 {
  380. current := s.model.NumConnections()
  381. allowAdditional = connectionLimit - current
  382. if allowAdditional <= 0 {
  383. l.Debugf("Skipping dial because we've reached the connection limit, current %d >= limit %d", current, connectionLimit)
  384. return
  385. }
  386. }
  387. // Get device statistics for the last seen time of each device. This
  388. // isn't critical, so ignore the potential error.
  389. stats, _ := s.model.DeviceStatistics()
  390. queue := make(dialQueue, 0, len(cfg.Devices))
  391. for _, deviceCfg := range cfg.Devices {
  392. // Don't attempt to connect to ourselves...
  393. if deviceCfg.DeviceID == s.myID {
  394. continue
  395. }
  396. // Don't attempt to connect to paused devices...
  397. if deviceCfg.Paused {
  398. continue
  399. }
  400. // See if we are already connected and, if so, what our cutoff is
  401. // for dialer priority.
  402. priorityCutoff := worstDialerPriority
  403. connection, connected := s.model.Connection(deviceCfg.DeviceID)
  404. if connected {
  405. priorityCutoff = connection.Priority()
  406. if bestDialerPriority >= priorityCutoff {
  407. // Our best dialer is not any better than what we already
  408. // have, so nothing to do here.
  409. continue
  410. }
  411. }
  412. dialTargets := s.resolveDialTargets(ctx, now, cfg, deviceCfg, nextDialAt, initial, priorityCutoff)
  413. if len(dialTargets) > 0 {
  414. queue = append(queue, dialQueueEntry{
  415. id: deviceCfg.DeviceID,
  416. lastSeen: stats[deviceCfg.DeviceID].LastSeen,
  417. shortLived: stats[deviceCfg.DeviceID].LastConnectionDurationS < shortLivedConnectionThreshold.Seconds(),
  418. targets: dialTargets,
  419. })
  420. }
  421. }
  422. // Sort the queue in an order we think will be useful (most recent
  423. // first, deprioriting unstable devices, randomizing those we haven't
  424. // seen in a long while). If we don't do connection limiting the sorting
  425. // doesn't have much effect, but it may result in getting up and running
  426. // quicker if only a subset of configured devices are actually reachable
  427. // (by prioritizing those that were reachable recently).
  428. queue.Sort()
  429. // Perform dials according to the queue, stopping when we've reached the
  430. // allowed additional number of connections (if limited).
  431. numConns := 0
  432. var numConnsMut stdsync.Mutex
  433. dialSemaphore := util.NewSemaphore(dialMaxParallel)
  434. dialWG := new(stdsync.WaitGroup)
  435. dialCtx, dialCancel := context.WithCancel(ctx)
  436. defer func() {
  437. dialWG.Wait()
  438. dialCancel()
  439. }()
  440. for i := range queue {
  441. select {
  442. case <-dialCtx.Done():
  443. return
  444. default:
  445. }
  446. dialWG.Add(1)
  447. go func(entry dialQueueEntry) {
  448. defer dialWG.Done()
  449. conn, ok := s.dialParallel(dialCtx, entry.id, entry.targets, dialSemaphore)
  450. if !ok {
  451. return
  452. }
  453. numConnsMut.Lock()
  454. if allowAdditional == 0 || numConns < allowAdditional {
  455. select {
  456. case s.conns <- conn:
  457. numConns++
  458. if allowAdditional > 0 && numConns >= allowAdditional {
  459. dialCancel()
  460. }
  461. case <-dialCtx.Done():
  462. }
  463. }
  464. numConnsMut.Unlock()
  465. }(queue[i])
  466. }
  467. }
  468. func (s *service) resolveDialTargets(ctx context.Context, now time.Time, cfg config.Configuration, deviceCfg config.DeviceConfiguration, nextDialAt nextDialRegistry, initial bool, priorityCutoff int) []dialTarget {
  469. deviceID := deviceCfg.DeviceID
  470. addrs := s.resolveDeviceAddrs(ctx, deviceCfg)
  471. l.Debugln("Resolved device", deviceID, "addresses:", addrs)
  472. dialTargets := make([]dialTarget, 0, len(addrs))
  473. for _, addr := range addrs {
  474. // Use both device and address, as you might have two devices connected
  475. // to the same relay
  476. if !initial && nextDialAt.get(deviceID, addr).After(now) {
  477. l.Debugf("Not dialing %s via %v as it's not time yet", deviceID, addr)
  478. continue
  479. }
  480. // If we fail at any step before actually getting the dialer
  481. // retry in a minute
  482. nextDialAt.set(deviceID, addr, now.Add(time.Minute))
  483. uri, err := url.Parse(addr)
  484. if err != nil {
  485. s.setConnectionStatus(addr, err)
  486. l.Infof("Parsing dialer address %s: %v", addr, err)
  487. continue
  488. }
  489. if len(deviceCfg.AllowedNetworks) > 0 {
  490. if !IsAllowedNetwork(uri.Host, deviceCfg.AllowedNetworks) {
  491. s.setConnectionStatus(addr, errors.New("network disallowed"))
  492. l.Debugln("Network for", uri, "is disallowed")
  493. continue
  494. }
  495. }
  496. dialerFactory, err := getDialerFactory(cfg, uri)
  497. if err != nil {
  498. s.setConnectionStatus(addr, err)
  499. }
  500. if errors.Is(err, errUnsupported) {
  501. l.Debugf("Dialer for %v: %v", uri, err)
  502. continue
  503. } else if err != nil {
  504. l.Infof("Dialer for %v: %v", uri, err)
  505. continue
  506. }
  507. priority := dialerFactory.Priority()
  508. if priority >= priorityCutoff {
  509. l.Debugf("Not dialing using %s as priority is not better than current connection (%d >= %d)", dialerFactory, dialerFactory.Priority(), priorityCutoff)
  510. continue
  511. }
  512. dialer := dialerFactory.New(s.cfg.Options(), s.tlsCfg)
  513. nextDialAt.set(deviceID, addr, now.Add(dialer.RedialFrequency()))
  514. // For LAN addresses, increase the priority so that we
  515. // try these first.
  516. switch {
  517. case dialerFactory.AlwaysWAN():
  518. // Do nothing.
  519. case s.isLANHost(uri.Host):
  520. priority--
  521. }
  522. dialTargets = append(dialTargets, dialTarget{
  523. addr: addr,
  524. dialer: dialer,
  525. priority: priority,
  526. deviceID: deviceID,
  527. uri: uri,
  528. })
  529. }
  530. return dialTargets
  531. }
  532. func (s *service) resolveDeviceAddrs(ctx context.Context, cfg config.DeviceConfiguration) []string {
  533. var addrs []string
  534. for _, addr := range cfg.Addresses {
  535. if addr == "dynamic" {
  536. if s.discoverer != nil {
  537. if t, err := s.discoverer.Lookup(ctx, cfg.DeviceID); err == nil {
  538. addrs = append(addrs, t...)
  539. }
  540. }
  541. } else {
  542. addrs = append(addrs, addr)
  543. }
  544. }
  545. return util.UniqueTrimmedStrings(addrs)
  546. }
  547. func (s *service) isLANHost(host string) bool {
  548. // Probably we are called with an ip:port combo which we can resolve as
  549. // a TCP address.
  550. if addr, err := net.ResolveTCPAddr("tcp", host); err == nil {
  551. return s.isLAN(addr)
  552. }
  553. // ... but this function looks general enough that someone might try
  554. // with just an IP as well in the future so lets allow that.
  555. if addr, err := net.ResolveIPAddr("ip", host); err == nil {
  556. return s.isLAN(addr)
  557. }
  558. return false
  559. }
  560. func (s *service) isLAN(addr net.Addr) bool {
  561. var ip net.IP
  562. switch addr := addr.(type) {
  563. case *net.IPAddr:
  564. ip = addr.IP
  565. case *net.TCPAddr:
  566. ip = addr.IP
  567. case *net.UDPAddr:
  568. ip = addr.IP
  569. default:
  570. // From the standard library, just Unix sockets.
  571. // If you invent your own, handle it.
  572. return false
  573. }
  574. if ip.IsLoopback() {
  575. return true
  576. }
  577. for _, lan := range s.cfg.Options().AlwaysLocalNets {
  578. _, ipnet, err := net.ParseCIDR(lan)
  579. if err != nil {
  580. l.Debugln("Network", lan, "is malformed:", err)
  581. continue
  582. }
  583. if ipnet.Contains(ip) {
  584. return true
  585. }
  586. }
  587. lans, _ := osutil.GetLans()
  588. for _, lan := range lans {
  589. if lan.Contains(ip) {
  590. return true
  591. }
  592. }
  593. return false
  594. }
  595. func (s *service) createListener(factory listenerFactory, uri *url.URL) bool {
  596. // must be called with listenerMut held
  597. l.Debugln("Starting listener", uri)
  598. listener := factory.New(uri, s.cfg, s.tlsCfg, s.conns, s.natService)
  599. listener.OnAddressesChanged(s.logListenAddressesChangedEvent)
  600. // Retrying a listener many times in rapid succession is unlikely to help,
  601. // thus back off quickly. A listener may soon be functional again, e.g. due
  602. // to a network interface coming back online - retry every minute.
  603. spec := svcutil.SpecWithInfoLogger(l)
  604. spec.FailureThreshold = 2
  605. spec.FailureBackoff = time.Minute
  606. sup := suture.New(fmt.Sprintf("listenerSupervisor@%v", listener), spec)
  607. sup.Add(listener)
  608. s.listeners[uri.String()] = listener
  609. s.listenerTokens[uri.String()] = s.Add(sup)
  610. return true
  611. }
  612. func (s *service) logListenAddressesChangedEvent(l ListenerAddresses) {
  613. s.evLogger.Log(events.ListenAddressesChanged, map[string]interface{}{
  614. "address": l.URI,
  615. "lan": l.LANAddresses,
  616. "wan": l.WANAddresses,
  617. })
  618. }
  619. func (s *service) VerifyConfiguration(from, to config.Configuration) error {
  620. return nil
  621. }
  622. func (s *service) CommitConfiguration(from, to config.Configuration) bool {
  623. newDevices := make(map[protocol.DeviceID]bool, len(to.Devices))
  624. for _, dev := range to.Devices {
  625. newDevices[dev.DeviceID] = true
  626. }
  627. for _, dev := range from.Devices {
  628. if !newDevices[dev.DeviceID] {
  629. warningLimitersMut.Lock()
  630. delete(warningLimiters, dev.DeviceID)
  631. warningLimitersMut.Unlock()
  632. }
  633. }
  634. s.checkAndSignalConnectLoopOnUpdatedDevices(from, to)
  635. s.listenersMut.Lock()
  636. seen := make(map[string]struct{})
  637. for _, addr := range to.Options.ListenAddresses() {
  638. if addr == "" {
  639. // We can get an empty address if there is an empty listener
  640. // element in the config, indicating no listeners should be
  641. // used. This is not an error.
  642. continue
  643. }
  644. uri, err := url.Parse(addr)
  645. if err != nil {
  646. l.Warnf("Skipping malformed listener URL %q: %v", addr, err)
  647. continue
  648. }
  649. // Make sure we always have the canonical representation of the URL.
  650. // This is for consistency as we use it as a map key, but also to
  651. // avoid misunderstandings. We do not just use the canonicalized
  652. // version, because an URL that looks very similar to a human might
  653. // mean something entirely different to the computer (e.g.,
  654. // tcp:/127.0.0.1:22000 in fact being equivalent to tcp://:22000).
  655. if canonical := uri.String(); canonical != addr {
  656. l.Warnf("Skipping malformed listener URL %q (not canonical)", addr)
  657. continue
  658. }
  659. if _, ok := s.listeners[addr]; ok {
  660. seen[addr] = struct{}{}
  661. continue
  662. }
  663. factory, err := getListenerFactory(to, uri)
  664. if errors.Is(err, errUnsupported) {
  665. l.Debugf("Listener for %v: %v", uri, err)
  666. continue
  667. } else if err != nil {
  668. l.Infof("Listener for %v: %v", uri, err)
  669. continue
  670. }
  671. s.createListener(factory, uri)
  672. seen[addr] = struct{}{}
  673. }
  674. for addr, listener := range s.listeners {
  675. if _, ok := seen[addr]; !ok || listener.Factory().Valid(to) != nil {
  676. l.Debugln("Stopping listener", addr)
  677. s.Remove(s.listenerTokens[addr])
  678. delete(s.listenerTokens, addr)
  679. delete(s.listeners, addr)
  680. }
  681. }
  682. s.listenersMut.Unlock()
  683. return true
  684. }
  685. func (s *service) checkAndSignalConnectLoopOnUpdatedDevices(from, to config.Configuration) {
  686. oldDevices := from.DeviceMap()
  687. dial := false
  688. s.dialNowDevicesMut.Lock()
  689. for _, dev := range to.Devices {
  690. if dev.Paused {
  691. continue
  692. }
  693. if oldDev, ok := oldDevices[dev.DeviceID]; !ok || oldDev.Paused {
  694. s.dialNowDevices[dev.DeviceID] = struct{}{}
  695. dial = true
  696. } else if !util.EqualStrings(oldDev.Addresses, dev.Addresses) {
  697. dial = true
  698. }
  699. }
  700. if dial {
  701. s.scheduleDialNow()
  702. }
  703. s.dialNowDevicesMut.Unlock()
  704. }
  705. func (s *service) scheduleDialNow() {
  706. select {
  707. case s.dialNow <- struct{}{}:
  708. default:
  709. // channel is blocked - a config update is already pending for the connection loop.
  710. }
  711. }
  712. func (s *service) AllAddresses() []string {
  713. s.listenersMut.RLock()
  714. var addrs []string
  715. for _, listener := range s.listeners {
  716. for _, lanAddr := range listener.LANAddresses() {
  717. addrs = append(addrs, lanAddr.String())
  718. }
  719. for _, wanAddr := range listener.WANAddresses() {
  720. addrs = append(addrs, wanAddr.String())
  721. }
  722. }
  723. s.listenersMut.RUnlock()
  724. return util.UniqueTrimmedStrings(addrs)
  725. }
  726. func (s *service) ExternalAddresses() []string {
  727. if s.cfg.Options().AnnounceLANAddresses {
  728. return s.AllAddresses()
  729. }
  730. s.listenersMut.RLock()
  731. var addrs []string
  732. for _, listener := range s.listeners {
  733. for _, wanAddr := range listener.WANAddresses() {
  734. addrs = append(addrs, wanAddr.String())
  735. }
  736. }
  737. s.listenersMut.RUnlock()
  738. return util.UniqueTrimmedStrings(addrs)
  739. }
  740. func (s *service) ListenerStatus() map[string]ListenerStatusEntry {
  741. result := make(map[string]ListenerStatusEntry)
  742. s.listenersMut.RLock()
  743. for addr, listener := range s.listeners {
  744. var status ListenerStatusEntry
  745. if err := listener.Error(); err != nil {
  746. errStr := err.Error()
  747. status.Error = &errStr
  748. }
  749. status.LANAddresses = urlsToStrings(listener.LANAddresses())
  750. status.WANAddresses = urlsToStrings(listener.WANAddresses())
  751. result[addr] = status
  752. }
  753. s.listenersMut.RUnlock()
  754. return result
  755. }
  756. type connectionStatusHandler struct {
  757. connectionStatusMut sync.RWMutex
  758. connectionStatus map[string]ConnectionStatusEntry // address -> latest error/status
  759. }
  760. func newConnectionStatusHandler() connectionStatusHandler {
  761. return connectionStatusHandler{
  762. connectionStatusMut: sync.NewRWMutex(),
  763. connectionStatus: make(map[string]ConnectionStatusEntry),
  764. }
  765. }
  766. func (s *connectionStatusHandler) ConnectionStatus() map[string]ConnectionStatusEntry {
  767. result := make(map[string]ConnectionStatusEntry)
  768. s.connectionStatusMut.RLock()
  769. for k, v := range s.connectionStatus {
  770. result[k] = v
  771. }
  772. s.connectionStatusMut.RUnlock()
  773. return result
  774. }
  775. func (s *connectionStatusHandler) setConnectionStatus(address string, err error) {
  776. if errors.Cause(err) == context.Canceled {
  777. return
  778. }
  779. status := ConnectionStatusEntry{When: time.Now().UTC().Truncate(time.Second)}
  780. if err != nil {
  781. errStr := err.Error()
  782. status.Error = &errStr
  783. }
  784. s.connectionStatusMut.Lock()
  785. s.connectionStatus[address] = status
  786. s.connectionStatusMut.Unlock()
  787. }
  788. func (s *service) NATType() string {
  789. s.listenersMut.RLock()
  790. defer s.listenersMut.RUnlock()
  791. for _, listener := range s.listeners {
  792. natType := listener.NATType()
  793. if natType != "unknown" {
  794. return natType
  795. }
  796. }
  797. return "unknown"
  798. }
  799. func getDialerFactory(cfg config.Configuration, uri *url.URL) (dialerFactory, error) {
  800. dialerFactory, ok := dialers[uri.Scheme]
  801. if !ok {
  802. return nil, fmt.Errorf("unknown address scheme %q", uri.Scheme)
  803. }
  804. if err := dialerFactory.Valid(cfg); err != nil {
  805. return nil, err
  806. }
  807. return dialerFactory, nil
  808. }
  809. func getListenerFactory(cfg config.Configuration, uri *url.URL) (listenerFactory, error) {
  810. listenerFactory, ok := listeners[uri.Scheme]
  811. if !ok {
  812. return nil, fmt.Errorf("unknown address scheme %q", uri.Scheme)
  813. }
  814. if err := listenerFactory.Valid(cfg); err != nil {
  815. return nil, err
  816. }
  817. return listenerFactory, nil
  818. }
  819. func urlsToStrings(urls []*url.URL) []string {
  820. strings := make([]string, len(urls))
  821. for i, url := range urls {
  822. strings[i] = url.String()
  823. }
  824. return strings
  825. }
  826. var warningLimiters = make(map[protocol.DeviceID]*rate.Limiter)
  827. var warningLimitersMut = sync.NewMutex()
  828. func warningFor(dev protocol.DeviceID, msg string) {
  829. warningLimitersMut.Lock()
  830. defer warningLimitersMut.Unlock()
  831. lim, ok := warningLimiters[dev]
  832. if !ok {
  833. lim = rate.NewLimiter(rate.Every(perDeviceWarningIntv), 1)
  834. warningLimiters[dev] = lim
  835. }
  836. if lim.Allow() {
  837. l.Warnln(msg)
  838. }
  839. }
  840. func tlsTimedHandshake(tc *tls.Conn) error {
  841. tc.SetDeadline(time.Now().Add(tlsHandshakeTimeout))
  842. defer tc.SetDeadline(time.Time{})
  843. return tc.Handshake()
  844. }
  845. // IsAllowedNetwork returns true if the given host (IP or resolvable
  846. // hostname) is in the set of allowed networks (CIDR format only).
  847. func IsAllowedNetwork(host string, allowed []string) bool {
  848. if hostNoPort, _, err := net.SplitHostPort(host); err == nil {
  849. host = hostNoPort
  850. }
  851. addr, err := net.ResolveIPAddr("ip", host)
  852. if err != nil {
  853. return false
  854. }
  855. for _, n := range allowed {
  856. result := true
  857. if strings.HasPrefix(n, "!") {
  858. result = false
  859. n = n[1:]
  860. }
  861. _, cidr, err := net.ParseCIDR(n)
  862. if err != nil {
  863. continue
  864. }
  865. if cidr.Contains(addr.IP) {
  866. return result
  867. }
  868. }
  869. return false
  870. }
  871. func (s *service) dialParallel(ctx context.Context, deviceID protocol.DeviceID, dialTargets []dialTarget, parentSema *util.Semaphore) (internalConn, bool) {
  872. // Group targets into buckets by priority
  873. dialTargetBuckets := make(map[int][]dialTarget, len(dialTargets))
  874. for _, tgt := range dialTargets {
  875. dialTargetBuckets[tgt.priority] = append(dialTargetBuckets[tgt.priority], tgt)
  876. }
  877. // Get all available priorities
  878. priorities := make([]int, 0, len(dialTargetBuckets))
  879. for prio := range dialTargetBuckets {
  880. priorities = append(priorities, prio)
  881. }
  882. // Sort the priorities so that we dial lowest first (which means highest...)
  883. sort.Ints(priorities)
  884. sema := util.MultiSemaphore{util.NewSemaphore(dialMaxParallelPerDevice), parentSema}
  885. for _, prio := range priorities {
  886. tgts := dialTargetBuckets[prio]
  887. res := make(chan internalConn, len(tgts))
  888. wg := stdsync.WaitGroup{}
  889. for _, tgt := range tgts {
  890. sema.Take(1)
  891. wg.Add(1)
  892. go func(tgt dialTarget) {
  893. defer func() {
  894. wg.Done()
  895. sema.Give(1)
  896. }()
  897. conn, err := tgt.Dial(ctx)
  898. if err == nil {
  899. // Closes the connection on error
  900. err = s.validateIdentity(conn, deviceID)
  901. }
  902. s.setConnectionStatus(tgt.addr, err)
  903. if err != nil {
  904. l.Debugln("dialing", deviceID, tgt.uri, "error:", err)
  905. } else {
  906. l.Debugln("dialing", deviceID, tgt.uri, "success:", conn)
  907. res <- conn
  908. }
  909. }(tgt)
  910. }
  911. // Spawn a routine which will unblock main routine in case we fail
  912. // to connect to anyone.
  913. go func() {
  914. wg.Wait()
  915. close(res)
  916. }()
  917. // Wait for the first connection, or for channel closure.
  918. if conn, ok := <-res; ok {
  919. // Got a connection, means more might come back, hence spawn a
  920. // routine that will do the discarding.
  921. l.Debugln("connected to", deviceID, prio, "using", conn, conn.priority)
  922. go func(deviceID protocol.DeviceID, prio int) {
  923. wg.Wait()
  924. l.Debugln("discarding", len(res), "connections while connecting to", deviceID, prio)
  925. for conn := range res {
  926. conn.Close()
  927. }
  928. }(deviceID, prio)
  929. return conn, ok
  930. }
  931. // Failed to connect, report that fact.
  932. l.Debugln("failed to connect to", deviceID, prio)
  933. }
  934. return internalConn{}, false
  935. }
  936. func (s *service) validateIdentity(c internalConn, expectedID protocol.DeviceID) error {
  937. cs := c.ConnectionState()
  938. // We should have received exactly one certificate from the other
  939. // side. If we didn't, they don't have a device ID and we drop the
  940. // connection.
  941. certs := cs.PeerCertificates
  942. if cl := len(certs); cl != 1 {
  943. l.Infof("Got peer certificate list of length %d != 1 from peer at %s; protocol error", cl, c)
  944. c.Close()
  945. return fmt.Errorf("expected 1 certificate, got %d", cl)
  946. }
  947. remoteCert := certs[0]
  948. remoteID := protocol.NewDeviceID(remoteCert.Raw)
  949. // The device ID should not be that of ourselves. It can happen
  950. // though, especially in the presence of NAT hairpinning, multiple
  951. // clients between the same NAT gateway, and global discovery.
  952. if remoteID == s.myID {
  953. l.Debugf("Connected to myself (%s) at %s", remoteID, c)
  954. c.Close()
  955. return errors.New("connected to self")
  956. }
  957. // We should see the expected device ID
  958. if !remoteID.Equals(expectedID) {
  959. c.Close()
  960. return fmt.Errorf("unexpected device id, expected %s got %s", expectedID, remoteID)
  961. }
  962. return nil
  963. }
  964. type nextDialRegistry map[protocol.DeviceID]nextDialDevice
  965. type nextDialDevice struct {
  966. nextDial map[string]time.Time
  967. coolDownIntervalStart time.Time
  968. attempts int
  969. }
  970. func (r nextDialRegistry) get(device protocol.DeviceID, addr string) time.Time {
  971. return r[device].nextDial[addr]
  972. }
  973. const (
  974. dialCoolDownInterval = 2 * time.Minute
  975. dialCoolDownDelay = 5 * time.Minute
  976. dialCoolDownMaxAttemps = 3
  977. )
  978. // redialDevice marks the device for immediate redial, unless the remote keeps
  979. // dropping established connections. Thus we keep track of when the first forced
  980. // re-dial happened, and how many attempts happen in the dialCoolDownInterval
  981. // after that. If it's more than dialCoolDownMaxAttempts, don't force-redial
  982. // that device for dialCoolDownDelay (regular dials still happen).
  983. func (r nextDialRegistry) redialDevice(device protocol.DeviceID, now time.Time) {
  984. dev, ok := r[device]
  985. if !ok {
  986. r[device] = nextDialDevice{
  987. nextDial: make(map[string]time.Time),
  988. coolDownIntervalStart: now,
  989. attempts: 1,
  990. }
  991. return
  992. }
  993. if dev.attempts == 0 || now.Before(dev.coolDownIntervalStart.Add(dialCoolDownInterval)) {
  994. if dev.attempts >= dialCoolDownMaxAttemps {
  995. // Device has been force redialed too often - let it cool down.
  996. return
  997. }
  998. if dev.attempts == 0 {
  999. dev.coolDownIntervalStart = now
  1000. }
  1001. dev.attempts++
  1002. dev.nextDial = make(map[string]time.Time)
  1003. return
  1004. }
  1005. if dev.attempts >= dialCoolDownMaxAttemps && now.Before(dev.coolDownIntervalStart.Add(dialCoolDownDelay)) {
  1006. return // Still cooling down
  1007. }
  1008. delete(r, device)
  1009. }
  1010. func (r nextDialRegistry) set(device protocol.DeviceID, addr string, next time.Time) {
  1011. if _, ok := r[device]; !ok {
  1012. r[device] = nextDialDevice{nextDial: make(map[string]time.Time)}
  1013. }
  1014. r[device].nextDial[addr] = next
  1015. }
  1016. func (r nextDialRegistry) sleepDurationAndCleanup(now time.Time) time.Duration {
  1017. sleep := stdConnectionLoopSleep
  1018. for id, dev := range r {
  1019. for address, next := range dev.nextDial {
  1020. if next.Before(now) {
  1021. // Expired entry, address was not seen in last pass(es)
  1022. delete(dev.nextDial, address)
  1023. continue
  1024. }
  1025. if cur := next.Sub(now); cur < sleep {
  1026. sleep = cur
  1027. }
  1028. }
  1029. if dev.attempts > 0 {
  1030. interval := dialCoolDownInterval
  1031. if dev.attempts >= dialCoolDownMaxAttemps {
  1032. interval = dialCoolDownDelay
  1033. }
  1034. if now.After(dev.coolDownIntervalStart.Add(interval)) {
  1035. dev.attempts = 0
  1036. }
  1037. }
  1038. if len(dev.nextDial) == 0 && dev.attempts == 0 {
  1039. delete(r, id)
  1040. }
  1041. }
  1042. return sleep
  1043. }