service.go 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145
  1. // Copyright (C) 2015 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. //go:generate -command counterfeiter go run github.com/maxbrunsfeld/counterfeiter/v6
  7. //go:generate counterfeiter -o mocks/service.go --fake-name Service . Service
  8. package connections
  9. import (
  10. "context"
  11. "crypto/tls"
  12. "fmt"
  13. "math"
  14. "net"
  15. "net/url"
  16. "sort"
  17. "strings"
  18. stdsync "sync"
  19. "time"
  20. "github.com/syncthing/syncthing/lib/config"
  21. "github.com/syncthing/syncthing/lib/discover"
  22. "github.com/syncthing/syncthing/lib/events"
  23. "github.com/syncthing/syncthing/lib/nat"
  24. "github.com/syncthing/syncthing/lib/osutil"
  25. "github.com/syncthing/syncthing/lib/protocol"
  26. "github.com/syncthing/syncthing/lib/svcutil"
  27. "github.com/syncthing/syncthing/lib/sync"
  28. "github.com/syncthing/syncthing/lib/util"
  29. // Registers NAT service providers
  30. _ "github.com/syncthing/syncthing/lib/pmp"
  31. _ "github.com/syncthing/syncthing/lib/upnp"
  32. "github.com/pkg/errors"
  33. "github.com/thejerf/suture/v4"
  34. "golang.org/x/time/rate"
  35. )
  36. var (
  37. dialers = make(map[string]dialerFactory)
  38. listeners = make(map[string]listenerFactory)
  39. )
  40. var (
  41. // Dialers and listeners return errUnsupported (or a wrapped variant)
  42. // when they are intentionally out of service due to configuration,
  43. // build, etc. This is not logged loudly.
  44. errUnsupported = errors.New("unsupported protocol")
  45. // These are specific explanations for errUnsupported.
  46. errDisabled = fmt.Errorf("%w: disabled by configuration", errUnsupported)
  47. errDeprecated = fmt.Errorf("%w: deprecated", errUnsupported)
  48. errNotInBuild = fmt.Errorf("%w: disabled at build time", errUnsupported)
  49. )
  50. const (
  51. perDeviceWarningIntv = 15 * time.Minute
  52. tlsHandshakeTimeout = 10 * time.Second
  53. minConnectionReplaceAge = 10 * time.Second
  54. minConnectionLoopSleep = 5 * time.Second
  55. stdConnectionLoopSleep = time.Minute
  56. worstDialerPriority = math.MaxInt32
  57. recentlySeenCutoff = 7 * 24 * time.Hour
  58. shortLivedConnectionThreshold = 5 * time.Second
  59. )
  60. // From go/src/crypto/tls/cipher_suites.go
  61. var tlsCipherSuiteNames = map[uint16]string{
  62. // TLS 1.2
  63. 0x0005: "TLS_RSA_WITH_RC4_128_SHA",
  64. 0x000a: "TLS_RSA_WITH_3DES_EDE_CBC_SHA",
  65. 0x002f: "TLS_RSA_WITH_AES_128_CBC_SHA",
  66. 0x0035: "TLS_RSA_WITH_AES_256_CBC_SHA",
  67. 0x003c: "TLS_RSA_WITH_AES_128_CBC_SHA256",
  68. 0x009c: "TLS_RSA_WITH_AES_128_GCM_SHA256",
  69. 0x009d: "TLS_RSA_WITH_AES_256_GCM_SHA384",
  70. 0xc007: "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",
  71. 0xc009: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
  72. 0xc00a: "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
  73. 0xc011: "TLS_ECDHE_RSA_WITH_RC4_128_SHA",
  74. 0xc012: "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",
  75. 0xc013: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
  76. 0xc014: "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",
  77. 0xc023: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
  78. 0xc027: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
  79. 0xc02f: "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
  80. 0xc02b: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
  81. 0xc030: "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
  82. 0xc02c: "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
  83. 0xcca8: "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305",
  84. 0xcca9: "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305",
  85. // TLS 1.3
  86. 0x1301: "TLS_AES_128_GCM_SHA256",
  87. 0x1302: "TLS_AES_256_GCM_SHA384",
  88. 0x1303: "TLS_CHACHA20_POLY1305_SHA256",
  89. }
  90. var tlsVersionNames = map[uint16]string{
  91. tls.VersionTLS12: "TLS1.2",
  92. tls.VersionTLS13: "TLS1.3",
  93. }
  94. // Service listens and dials all configured unconnected devices, via supported
  95. // dialers. Successful connections are handed to the model.
  96. type Service interface {
  97. suture.Service
  98. discover.AddressLister
  99. ListenerStatus() map[string]ListenerStatusEntry
  100. ConnectionStatus() map[string]ConnectionStatusEntry
  101. NATType() string
  102. }
  103. type ListenerStatusEntry struct {
  104. Error *string `json:"error"`
  105. LANAddresses []string `json:"lanAddresses"`
  106. WANAddresses []string `json:"wanAddresses"`
  107. }
  108. type ConnectionStatusEntry struct {
  109. When time.Time `json:"when"`
  110. Error *string `json:"error"`
  111. }
  112. type service struct {
  113. *suture.Supervisor
  114. connectionStatusHandler
  115. cfg config.Wrapper
  116. myID protocol.DeviceID
  117. model Model
  118. tlsCfg *tls.Config
  119. discoverer discover.Finder
  120. conns chan internalConn
  121. bepProtocolName string
  122. tlsDefaultCommonName string
  123. limiter *limiter
  124. natService *nat.Service
  125. evLogger events.Logger
  126. dialNow chan struct{}
  127. dialNowDevices map[protocol.DeviceID]struct{}
  128. dialNowDevicesMut sync.Mutex
  129. listenersMut sync.RWMutex
  130. listeners map[string]genericListener
  131. listenerTokens map[string]suture.ServiceToken
  132. }
  133. func NewService(cfg config.Wrapper, myID protocol.DeviceID, mdl Model, tlsCfg *tls.Config, discoverer discover.Finder, bepProtocolName string, tlsDefaultCommonName string, evLogger events.Logger) Service {
  134. spec := svcutil.SpecWithInfoLogger(l)
  135. service := &service{
  136. Supervisor: suture.New("connections.Service", spec),
  137. connectionStatusHandler: newConnectionStatusHandler(),
  138. cfg: cfg,
  139. myID: myID,
  140. model: mdl,
  141. tlsCfg: tlsCfg,
  142. discoverer: discoverer,
  143. conns: make(chan internalConn),
  144. bepProtocolName: bepProtocolName,
  145. tlsDefaultCommonName: tlsDefaultCommonName,
  146. limiter: newLimiter(myID, cfg),
  147. natService: nat.NewService(myID, cfg),
  148. evLogger: evLogger,
  149. dialNowDevicesMut: sync.NewMutex(),
  150. dialNow: make(chan struct{}, 1),
  151. dialNowDevices: make(map[protocol.DeviceID]struct{}),
  152. listenersMut: sync.NewRWMutex(),
  153. listeners: make(map[string]genericListener),
  154. listenerTokens: make(map[string]suture.ServiceToken),
  155. }
  156. cfg.Subscribe(service)
  157. raw := cfg.RawCopy()
  158. // Actually starts the listeners and NAT service
  159. // Need to start this before service.connect so that any dials that
  160. // try punch through already have a listener to cling on.
  161. service.CommitConfiguration(raw, raw)
  162. // There are several moving parts here; one routine per listening address
  163. // (handled in configuration changing) to handle incoming connections,
  164. // one routine to periodically attempt outgoing connections, one routine to
  165. // the common handling regardless of whether the connection was
  166. // incoming or outgoing.
  167. service.Add(svcutil.AsService(service.connect, fmt.Sprintf("%s/connect", service)))
  168. service.Add(svcutil.AsService(service.handle, fmt.Sprintf("%s/handle", service)))
  169. service.Add(service.natService)
  170. svcutil.OnSupervisorDone(service.Supervisor, func() {
  171. service.cfg.Unsubscribe(service.limiter)
  172. service.cfg.Unsubscribe(service)
  173. })
  174. return service
  175. }
  176. func (s *service) handle(ctx context.Context) error {
  177. var c internalConn
  178. for {
  179. select {
  180. case <-ctx.Done():
  181. return ctx.Err()
  182. case c = <-s.conns:
  183. }
  184. cs := c.ConnectionState()
  185. // We should have negotiated the next level protocol "bep/1.0" as part
  186. // of the TLS handshake. Unfortunately this can't be a hard error,
  187. // because there are implementations out there that don't support
  188. // protocol negotiation (iOS for one...).
  189. if !cs.NegotiatedProtocolIsMutual || cs.NegotiatedProtocol != s.bepProtocolName {
  190. l.Infof("Peer at %s did not negotiate bep/1.0", c)
  191. }
  192. // We should have received exactly one certificate from the other
  193. // side. If we didn't, they don't have a device ID and we drop the
  194. // connection.
  195. certs := cs.PeerCertificates
  196. if cl := len(certs); cl != 1 {
  197. l.Infof("Got peer certificate list of length %d != 1 from peer at %s; protocol error", cl, c)
  198. c.Close()
  199. continue
  200. }
  201. remoteCert := certs[0]
  202. remoteID := protocol.NewDeviceID(remoteCert.Raw)
  203. // The device ID should not be that of ourselves. It can happen
  204. // though, especially in the presence of NAT hairpinning, multiple
  205. // clients between the same NAT gateway, and global discovery.
  206. if remoteID == s.myID {
  207. l.Debugf("Connected to myself (%s) at %s", remoteID, c)
  208. c.Close()
  209. continue
  210. }
  211. _ = c.SetDeadline(time.Now().Add(20 * time.Second))
  212. hello, err := protocol.ExchangeHello(c, s.model.GetHello(remoteID))
  213. if err != nil {
  214. if protocol.IsVersionMismatch(err) {
  215. // The error will be a relatively user friendly description
  216. // of what's wrong with the version compatibility. By
  217. // default identify the other side by device ID and IP.
  218. remote := fmt.Sprintf("%v (%v)", remoteID, c.RemoteAddr())
  219. if hello.DeviceName != "" {
  220. // If the name was set in the hello return, use that to
  221. // give the user more info about which device is the
  222. // affected one. It probably says more than the remote
  223. // IP.
  224. remote = fmt.Sprintf("%q (%s %s, %v)", hello.DeviceName, hello.ClientName, hello.ClientVersion, remoteID)
  225. }
  226. msg := fmt.Sprintf("Connecting to %s: %s", remote, err)
  227. warningFor(remoteID, msg)
  228. } else {
  229. // It's something else - connection reset or whatever
  230. l.Infof("Failed to exchange Hello messages with %s at %s: %s", remoteID, c, err)
  231. }
  232. c.Close()
  233. continue
  234. }
  235. _ = c.SetDeadline(time.Time{})
  236. // The Model will return an error for devices that we don't want to
  237. // have a connection with for whatever reason, for example unknown devices.
  238. if err := s.model.OnHello(remoteID, c.RemoteAddr(), hello); err != nil {
  239. l.Infof("Connection from %s at %s (%s) rejected: %v", remoteID, c.RemoteAddr(), c.Type(), err)
  240. c.Close()
  241. continue
  242. }
  243. // If we have a relay connection, and the new incoming connection is
  244. // not a relay connection, we should drop that, and prefer this one.
  245. ct, connected := s.model.Connection(remoteID)
  246. // Lower priority is better, just like nice etc.
  247. if connected && (ct.Priority() > c.priority || time.Since(ct.Statistics().StartedAt) > minConnectionReplaceAge) {
  248. l.Debugf("Switching connections %s (existing: %s new: %s)", remoteID, ct, c)
  249. } else if connected {
  250. // We should not already be connected to the other party. TODO: This
  251. // could use some better handling. If the old connection is dead but
  252. // hasn't timed out yet we may want to drop *that* connection and keep
  253. // this one. But in case we are two devices connecting to each other
  254. // in parallel we don't want to do that or we end up with no
  255. // connections still established...
  256. l.Infof("Connected to already connected device %s (existing: %s new: %s)", remoteID, ct, c)
  257. c.Close()
  258. continue
  259. }
  260. deviceCfg, ok := s.cfg.Device(remoteID)
  261. if !ok {
  262. l.Infof("Device %s removed from config during connection attempt at %s", remoteID, c)
  263. c.Close()
  264. continue
  265. }
  266. // Verify the name on the certificate. By default we set it to
  267. // "syncthing" when generating, but the user may have replaced
  268. // the certificate and used another name.
  269. certName := deviceCfg.CertName
  270. if certName == "" {
  271. certName = s.tlsDefaultCommonName
  272. }
  273. if remoteCert.Subject.CommonName == certName {
  274. // All good. We do this check because our old style certificates
  275. // have "syncthing" in the CommonName field and no SANs, which
  276. // is not accepted by VerifyHostname() any more as of Go 1.15.
  277. } else if err := remoteCert.VerifyHostname(certName); err != nil {
  278. // Incorrect certificate name is something the user most
  279. // likely wants to know about, since it's an advanced
  280. // config. Warn instead of Info.
  281. l.Warnf("Bad certificate from %s at %s: %v", remoteID, c, err)
  282. c.Close()
  283. continue
  284. }
  285. // Wrap the connection in rate limiters. The limiter itself will
  286. // keep up with config changes to the rate and whether or not LAN
  287. // connections are limited.
  288. isLAN := s.isLAN(c.RemoteAddr())
  289. rd, wr := s.limiter.getLimiters(remoteID, c, isLAN)
  290. protoConn := protocol.NewConnection(remoteID, rd, wr, c, s.model, c, deviceCfg.Compression, s.cfg.FolderPasswords(remoteID))
  291. go func() {
  292. <-protoConn.Closed()
  293. s.dialNowDevicesMut.Lock()
  294. s.dialNowDevices[remoteID] = struct{}{}
  295. s.scheduleDialNow()
  296. s.dialNowDevicesMut.Unlock()
  297. }()
  298. l.Infof("Established secure connection to %s at %s", remoteID, c)
  299. s.model.AddConnection(protoConn, hello)
  300. continue
  301. }
  302. }
  303. func (s *service) connect(ctx context.Context) error {
  304. // Map of when to earliest dial each given device + address again
  305. nextDialAt := make(nextDialRegistry)
  306. // Used as delay for the first few connection attempts (adjusted up to
  307. // minConnectionLoopSleep), increased exponentially until it reaches
  308. // stdConnectionLoopSleep, at which time the normal sleep mechanism
  309. // kicks in.
  310. initialRampup := time.Second
  311. for {
  312. cfg := s.cfg.RawCopy()
  313. bestDialerPriority := s.bestDialerPriority(cfg)
  314. isInitialRampup := initialRampup < stdConnectionLoopSleep
  315. l.Debugln("Connection loop")
  316. if isInitialRampup {
  317. l.Debugln("Connection loop in initial rampup")
  318. }
  319. // Used for consistency throughout this loop run, as time passes
  320. // while we try connections etc.
  321. now := time.Now()
  322. // Attempt to dial all devices that are unconnected or can be connection-upgraded
  323. s.dialDevices(ctx, now, cfg, bestDialerPriority, nextDialAt, isInitialRampup)
  324. var sleep time.Duration
  325. if isInitialRampup {
  326. // We are in the initial rampup time, so we slowly, statically
  327. // increase the sleep time.
  328. sleep = initialRampup
  329. initialRampup *= 2
  330. } else {
  331. // The sleep time is until the next dial scheduled in nextDialAt,
  332. // clamped by stdConnectionLoopSleep as we don't want to sleep too
  333. // long (config changes might happen).
  334. sleep = nextDialAt.sleepDurationAndCleanup(now)
  335. }
  336. // ... while making sure not to loop too quickly either.
  337. if sleep < minConnectionLoopSleep {
  338. sleep = minConnectionLoopSleep
  339. }
  340. l.Debugln("Next connection loop in", sleep)
  341. timeout := time.NewTimer(sleep)
  342. select {
  343. case <-s.dialNow:
  344. // Remove affected devices from nextDialAt to dial immediately,
  345. // regardless of when we last dialed it (there's cool down in the
  346. // registry for too many repeat dials).
  347. s.dialNowDevicesMut.Lock()
  348. for device := range s.dialNowDevices {
  349. nextDialAt.redialDevice(device, now)
  350. }
  351. s.dialNowDevices = make(map[protocol.DeviceID]struct{})
  352. s.dialNowDevicesMut.Unlock()
  353. timeout.Stop()
  354. case <-timeout.C:
  355. case <-ctx.Done():
  356. return ctx.Err()
  357. }
  358. }
  359. }
  360. func (s *service) bestDialerPriority(cfg config.Configuration) int {
  361. bestDialerPriority := worstDialerPriority
  362. for _, df := range dialers {
  363. if df.Valid(cfg) != nil {
  364. continue
  365. }
  366. if prio := df.Priority(); prio < bestDialerPriority {
  367. bestDialerPriority = prio
  368. }
  369. }
  370. return bestDialerPriority
  371. }
  372. func (s *service) dialDevices(ctx context.Context, now time.Time, cfg config.Configuration, bestDialerPriority int, nextDialAt nextDialRegistry, initial bool) {
  373. // Figure out current connection limits up front to see if there's any
  374. // point in resolving devices and such at all.
  375. allowAdditional := 0 // no limit
  376. connectionLimit := cfg.Options.LowestConnectionLimit()
  377. if connectionLimit > 0 {
  378. current := s.model.NumConnections()
  379. allowAdditional = connectionLimit - current
  380. if allowAdditional <= 0 {
  381. l.Debugf("Skipping dial because we've reached the connection limit, current %d >= limit %d", current, connectionLimit)
  382. return
  383. }
  384. }
  385. // Get device statistics for the last seen time of each device. This
  386. // isn't critical, so ignore the potential error.
  387. stats, _ := s.model.DeviceStatistics()
  388. queue := make(dialQueue, 0, len(cfg.Devices))
  389. for _, deviceCfg := range cfg.Devices {
  390. // Don't attempt to connect to ourselves...
  391. if deviceCfg.DeviceID == s.myID {
  392. continue
  393. }
  394. // Don't attempt to connect to paused devices...
  395. if deviceCfg.Paused {
  396. continue
  397. }
  398. // See if we are already connected and, if so, what our cutoff is
  399. // for dialer priority.
  400. priorityCutoff := worstDialerPriority
  401. connection, connected := s.model.Connection(deviceCfg.DeviceID)
  402. if connected {
  403. priorityCutoff = connection.Priority()
  404. if bestDialerPriority >= priorityCutoff {
  405. // Our best dialer is not any better than what we already
  406. // have, so nothing to do here.
  407. continue
  408. }
  409. }
  410. dialTargets := s.resolveDialTargets(ctx, now, cfg, deviceCfg, nextDialAt, initial, priorityCutoff)
  411. if len(dialTargets) > 0 {
  412. queue = append(queue, dialQueueEntry{
  413. id: deviceCfg.DeviceID,
  414. lastSeen: stats[deviceCfg.DeviceID].LastSeen,
  415. shortLived: stats[deviceCfg.DeviceID].LastConnectionDurationS < shortLivedConnectionThreshold.Seconds(),
  416. targets: dialTargets,
  417. })
  418. }
  419. }
  420. // Sort the queue in an order we think will be useful (most recent
  421. // first, deprioriting unstable devices, randomizing those we haven't
  422. // seen in a long while). If we don't do connection limiting the sorting
  423. // doesn't have much effect, but it may result in getting up and running
  424. // quicker if only a subset of configured devices are actually reachable
  425. // (by prioritizing those that were reachable recently).
  426. queue.Sort()
  427. // Perform dials according to the queue, stopping when we've reached the
  428. // allowed additional number of connections (if limited).
  429. numConns := 0
  430. for _, entry := range queue {
  431. if conn, ok := s.dialParallel(ctx, entry.id, entry.targets); ok {
  432. s.conns <- conn
  433. numConns++
  434. if allowAdditional > 0 && numConns >= allowAdditional {
  435. break
  436. }
  437. }
  438. }
  439. }
  440. func (s *service) resolveDialTargets(ctx context.Context, now time.Time, cfg config.Configuration, deviceCfg config.DeviceConfiguration, nextDialAt nextDialRegistry, initial bool, priorityCutoff int) []dialTarget {
  441. deviceID := deviceCfg.DeviceID
  442. addrs := s.resolveDeviceAddrs(ctx, deviceCfg)
  443. l.Debugln("Resolved device", deviceID, "addresses:", addrs)
  444. dialTargets := make([]dialTarget, 0, len(addrs))
  445. for _, addr := range addrs {
  446. // Use both device and address, as you might have two devices connected
  447. // to the same relay
  448. if !initial && nextDialAt.get(deviceID, addr).After(now) {
  449. l.Debugf("Not dialing %s via %v as it's not time yet", deviceID, addr)
  450. continue
  451. }
  452. // If we fail at any step before actually getting the dialer
  453. // retry in a minute
  454. nextDialAt.set(deviceID, addr, now.Add(time.Minute))
  455. uri, err := url.Parse(addr)
  456. if err != nil {
  457. s.setConnectionStatus(addr, err)
  458. l.Infof("Parsing dialer address %s: %v", addr, err)
  459. continue
  460. }
  461. if len(deviceCfg.AllowedNetworks) > 0 {
  462. if !IsAllowedNetwork(uri.Host, deviceCfg.AllowedNetworks) {
  463. s.setConnectionStatus(addr, errors.New("network disallowed"))
  464. l.Debugln("Network for", uri, "is disallowed")
  465. continue
  466. }
  467. }
  468. dialerFactory, err := getDialerFactory(cfg, uri)
  469. if err != nil {
  470. s.setConnectionStatus(addr, err)
  471. }
  472. if errors.Is(err, errUnsupported) {
  473. l.Debugf("Dialer for %v: %v", uri, err)
  474. continue
  475. } else if err != nil {
  476. l.Infof("Dialer for %v: %v", uri, err)
  477. continue
  478. }
  479. priority := dialerFactory.Priority()
  480. if priority >= priorityCutoff {
  481. l.Debugf("Not dialing using %s as priority is not better than current connection (%d >= %d)", dialerFactory, dialerFactory.Priority(), priorityCutoff)
  482. continue
  483. }
  484. dialer := dialerFactory.New(s.cfg.Options(), s.tlsCfg)
  485. nextDialAt.set(deviceID, addr, now.Add(dialer.RedialFrequency()))
  486. // For LAN addresses, increase the priority so that we
  487. // try these first.
  488. switch {
  489. case dialerFactory.AlwaysWAN():
  490. // Do nothing.
  491. case s.isLANHost(uri.Host):
  492. priority--
  493. }
  494. dialTargets = append(dialTargets, dialTarget{
  495. addr: addr,
  496. dialer: dialer,
  497. priority: priority,
  498. deviceID: deviceID,
  499. uri: uri,
  500. })
  501. }
  502. return dialTargets
  503. }
  504. func (s *service) resolveDeviceAddrs(ctx context.Context, cfg config.DeviceConfiguration) []string {
  505. var addrs []string
  506. for _, addr := range cfg.Addresses {
  507. if addr == "dynamic" {
  508. if s.discoverer != nil {
  509. if t, err := s.discoverer.Lookup(ctx, cfg.DeviceID); err == nil {
  510. addrs = append(addrs, t...)
  511. }
  512. }
  513. } else {
  514. addrs = append(addrs, addr)
  515. }
  516. }
  517. return util.UniqueTrimmedStrings(addrs)
  518. }
  519. func (s *service) isLANHost(host string) bool {
  520. // Probably we are called with an ip:port combo which we can resolve as
  521. // a TCP address.
  522. if addr, err := net.ResolveTCPAddr("tcp", host); err == nil {
  523. return s.isLAN(addr)
  524. }
  525. // ... but this function looks general enough that someone might try
  526. // with just an IP as well in the future so lets allow that.
  527. if addr, err := net.ResolveIPAddr("ip", host); err == nil {
  528. return s.isLAN(addr)
  529. }
  530. return false
  531. }
  532. func (s *service) isLAN(addr net.Addr) bool {
  533. var ip net.IP
  534. switch addr := addr.(type) {
  535. case *net.IPAddr:
  536. ip = addr.IP
  537. case *net.TCPAddr:
  538. ip = addr.IP
  539. case *net.UDPAddr:
  540. ip = addr.IP
  541. default:
  542. // From the standard library, just Unix sockets.
  543. // If you invent your own, handle it.
  544. return false
  545. }
  546. if ip.IsLoopback() {
  547. return true
  548. }
  549. for _, lan := range s.cfg.Options().AlwaysLocalNets {
  550. _, ipnet, err := net.ParseCIDR(lan)
  551. if err != nil {
  552. l.Debugln("Network", lan, "is malformed:", err)
  553. continue
  554. }
  555. if ipnet.Contains(ip) {
  556. return true
  557. }
  558. }
  559. lans, _ := osutil.GetLans()
  560. for _, lan := range lans {
  561. if lan.Contains(ip) {
  562. return true
  563. }
  564. }
  565. return false
  566. }
  567. func (s *service) createListener(factory listenerFactory, uri *url.URL) bool {
  568. // must be called with listenerMut held
  569. l.Debugln("Starting listener", uri)
  570. listener := factory.New(uri, s.cfg, s.tlsCfg, s.conns, s.natService)
  571. listener.OnAddressesChanged(s.logListenAddressesChangedEvent)
  572. // Retrying a listener many times in rapid succession is unlikely to help,
  573. // thus back off quickly. A listener may soon be functional again, e.g. due
  574. // to a network interface coming back online - retry every minute.
  575. spec := svcutil.SpecWithInfoLogger(l)
  576. spec.FailureThreshold = 2
  577. spec.FailureBackoff = time.Minute
  578. sup := suture.New(fmt.Sprintf("listenerSupervisor@%v", listener), spec)
  579. sup.Add(listener)
  580. s.listeners[uri.String()] = listener
  581. s.listenerTokens[uri.String()] = s.Add(sup)
  582. return true
  583. }
  584. func (s *service) logListenAddressesChangedEvent(l ListenerAddresses) {
  585. s.evLogger.Log(events.ListenAddressesChanged, map[string]interface{}{
  586. "address": l.URI,
  587. "lan": l.LANAddresses,
  588. "wan": l.WANAddresses,
  589. })
  590. }
  591. func (s *service) VerifyConfiguration(from, to config.Configuration) error {
  592. return nil
  593. }
  594. func (s *service) CommitConfiguration(from, to config.Configuration) bool {
  595. newDevices := make(map[protocol.DeviceID]bool, len(to.Devices))
  596. for _, dev := range to.Devices {
  597. newDevices[dev.DeviceID] = true
  598. }
  599. for _, dev := range from.Devices {
  600. if !newDevices[dev.DeviceID] {
  601. warningLimitersMut.Lock()
  602. delete(warningLimiters, dev.DeviceID)
  603. warningLimitersMut.Unlock()
  604. }
  605. }
  606. s.checkAndSignalConnectLoopOnUpdatedDevices(from, to)
  607. s.listenersMut.Lock()
  608. seen := make(map[string]struct{})
  609. for _, addr := range to.Options.ListenAddresses() {
  610. if addr == "" {
  611. // We can get an empty address if there is an empty listener
  612. // element in the config, indicating no listeners should be
  613. // used. This is not an error.
  614. continue
  615. }
  616. uri, err := url.Parse(addr)
  617. if err != nil {
  618. l.Warnf("Skipping malformed listener URL %q: %v", addr, err)
  619. continue
  620. }
  621. // Make sure we always have the canonical representation of the URL.
  622. // This is for consistency as we use it as a map key, but also to
  623. // avoid misunderstandings. We do not just use the canonicalized
  624. // version, because an URL that looks very similar to a human might
  625. // mean something entirely different to the computer (e.g.,
  626. // tcp:/127.0.0.1:22000 in fact being equivalent to tcp://:22000).
  627. if canonical := uri.String(); canonical != addr {
  628. l.Warnf("Skipping malformed listener URL %q (not canonical)", addr)
  629. continue
  630. }
  631. if _, ok := s.listeners[addr]; ok {
  632. seen[addr] = struct{}{}
  633. continue
  634. }
  635. factory, err := getListenerFactory(to, uri)
  636. if errors.Is(err, errUnsupported) {
  637. l.Debugf("Listener for %v: %v", uri, err)
  638. continue
  639. } else if err != nil {
  640. l.Infof("Listener for %v: %v", uri, err)
  641. continue
  642. }
  643. s.createListener(factory, uri)
  644. seen[addr] = struct{}{}
  645. }
  646. for addr, listener := range s.listeners {
  647. if _, ok := seen[addr]; !ok || listener.Factory().Valid(to) != nil {
  648. l.Debugln("Stopping listener", addr)
  649. s.Remove(s.listenerTokens[addr])
  650. delete(s.listenerTokens, addr)
  651. delete(s.listeners, addr)
  652. }
  653. }
  654. s.listenersMut.Unlock()
  655. return true
  656. }
  657. func (s *service) checkAndSignalConnectLoopOnUpdatedDevices(from, to config.Configuration) {
  658. oldDevices := from.DeviceMap()
  659. for _, dev := range to.Devices {
  660. oldDev, ok := oldDevices[dev.DeviceID]
  661. if !ok || !util.EqualStrings(oldDev.Addresses, dev.Addresses) {
  662. s.scheduleDialNow()
  663. break
  664. }
  665. }
  666. }
  667. func (s *service) scheduleDialNow() {
  668. select {
  669. case s.dialNow <- struct{}{}:
  670. default:
  671. // channel is blocked - a config update is already pending for the connection loop.
  672. }
  673. }
  674. func (s *service) AllAddresses() []string {
  675. s.listenersMut.RLock()
  676. var addrs []string
  677. for _, listener := range s.listeners {
  678. for _, lanAddr := range listener.LANAddresses() {
  679. addrs = append(addrs, lanAddr.String())
  680. }
  681. for _, wanAddr := range listener.WANAddresses() {
  682. addrs = append(addrs, wanAddr.String())
  683. }
  684. }
  685. s.listenersMut.RUnlock()
  686. return util.UniqueTrimmedStrings(addrs)
  687. }
  688. func (s *service) ExternalAddresses() []string {
  689. if s.cfg.Options().AnnounceLANAddresses {
  690. return s.AllAddresses()
  691. }
  692. s.listenersMut.RLock()
  693. var addrs []string
  694. for _, listener := range s.listeners {
  695. for _, wanAddr := range listener.WANAddresses() {
  696. addrs = append(addrs, wanAddr.String())
  697. }
  698. }
  699. s.listenersMut.RUnlock()
  700. return util.UniqueTrimmedStrings(addrs)
  701. }
  702. func (s *service) ListenerStatus() map[string]ListenerStatusEntry {
  703. result := make(map[string]ListenerStatusEntry)
  704. s.listenersMut.RLock()
  705. for addr, listener := range s.listeners {
  706. var status ListenerStatusEntry
  707. if err := listener.Error(); err != nil {
  708. errStr := err.Error()
  709. status.Error = &errStr
  710. }
  711. status.LANAddresses = urlsToStrings(listener.LANAddresses())
  712. status.WANAddresses = urlsToStrings(listener.WANAddresses())
  713. result[addr] = status
  714. }
  715. s.listenersMut.RUnlock()
  716. return result
  717. }
  718. type connectionStatusHandler struct {
  719. connectionStatusMut sync.RWMutex
  720. connectionStatus map[string]ConnectionStatusEntry // address -> latest error/status
  721. }
  722. func newConnectionStatusHandler() connectionStatusHandler {
  723. return connectionStatusHandler{
  724. connectionStatusMut: sync.NewRWMutex(),
  725. connectionStatus: make(map[string]ConnectionStatusEntry),
  726. }
  727. }
  728. func (s *connectionStatusHandler) ConnectionStatus() map[string]ConnectionStatusEntry {
  729. result := make(map[string]ConnectionStatusEntry)
  730. s.connectionStatusMut.RLock()
  731. for k, v := range s.connectionStatus {
  732. result[k] = v
  733. }
  734. s.connectionStatusMut.RUnlock()
  735. return result
  736. }
  737. func (s *connectionStatusHandler) setConnectionStatus(address string, err error) {
  738. if errors.Cause(err) == context.Canceled {
  739. return
  740. }
  741. status := ConnectionStatusEntry{When: time.Now().UTC().Truncate(time.Second)}
  742. if err != nil {
  743. errStr := err.Error()
  744. status.Error = &errStr
  745. }
  746. s.connectionStatusMut.Lock()
  747. s.connectionStatus[address] = status
  748. s.connectionStatusMut.Unlock()
  749. }
  750. func (s *service) NATType() string {
  751. s.listenersMut.RLock()
  752. defer s.listenersMut.RUnlock()
  753. for _, listener := range s.listeners {
  754. natType := listener.NATType()
  755. if natType != "unknown" {
  756. return natType
  757. }
  758. }
  759. return "unknown"
  760. }
  761. func getDialerFactory(cfg config.Configuration, uri *url.URL) (dialerFactory, error) {
  762. dialerFactory, ok := dialers[uri.Scheme]
  763. if !ok {
  764. return nil, fmt.Errorf("unknown address scheme %q", uri.Scheme)
  765. }
  766. if err := dialerFactory.Valid(cfg); err != nil {
  767. return nil, err
  768. }
  769. return dialerFactory, nil
  770. }
  771. func getListenerFactory(cfg config.Configuration, uri *url.URL) (listenerFactory, error) {
  772. listenerFactory, ok := listeners[uri.Scheme]
  773. if !ok {
  774. return nil, fmt.Errorf("unknown address scheme %q", uri.Scheme)
  775. }
  776. if err := listenerFactory.Valid(cfg); err != nil {
  777. return nil, err
  778. }
  779. return listenerFactory, nil
  780. }
  781. func urlsToStrings(urls []*url.URL) []string {
  782. strings := make([]string, len(urls))
  783. for i, url := range urls {
  784. strings[i] = url.String()
  785. }
  786. return strings
  787. }
  788. var warningLimiters = make(map[protocol.DeviceID]*rate.Limiter)
  789. var warningLimitersMut = sync.NewMutex()
  790. func warningFor(dev protocol.DeviceID, msg string) {
  791. warningLimitersMut.Lock()
  792. defer warningLimitersMut.Unlock()
  793. lim, ok := warningLimiters[dev]
  794. if !ok {
  795. lim = rate.NewLimiter(rate.Every(perDeviceWarningIntv), 1)
  796. warningLimiters[dev] = lim
  797. }
  798. if lim.Allow() {
  799. l.Warnln(msg)
  800. }
  801. }
  802. func tlsTimedHandshake(tc *tls.Conn) error {
  803. tc.SetDeadline(time.Now().Add(tlsHandshakeTimeout))
  804. defer tc.SetDeadline(time.Time{})
  805. return tc.Handshake()
  806. }
  807. // IsAllowedNetwork returns true if the given host (IP or resolvable
  808. // hostname) is in the set of allowed networks (CIDR format only).
  809. func IsAllowedNetwork(host string, allowed []string) bool {
  810. if hostNoPort, _, err := net.SplitHostPort(host); err == nil {
  811. host = hostNoPort
  812. }
  813. addr, err := net.ResolveIPAddr("ip", host)
  814. if err != nil {
  815. return false
  816. }
  817. for _, n := range allowed {
  818. result := true
  819. if strings.HasPrefix(n, "!") {
  820. result = false
  821. n = n[1:]
  822. }
  823. _, cidr, err := net.ParseCIDR(n)
  824. if err != nil {
  825. continue
  826. }
  827. if cidr.Contains(addr.IP) {
  828. return result
  829. }
  830. }
  831. return false
  832. }
  833. func (s *service) dialParallel(ctx context.Context, deviceID protocol.DeviceID, dialTargets []dialTarget) (internalConn, bool) {
  834. // Group targets into buckets by priority
  835. dialTargetBuckets := make(map[int][]dialTarget, len(dialTargets))
  836. for _, tgt := range dialTargets {
  837. dialTargetBuckets[tgt.priority] = append(dialTargetBuckets[tgt.priority], tgt)
  838. }
  839. // Get all available priorities
  840. priorities := make([]int, 0, len(dialTargetBuckets))
  841. for prio := range dialTargetBuckets {
  842. priorities = append(priorities, prio)
  843. }
  844. // Sort the priorities so that we dial lowest first (which means highest...)
  845. sort.Ints(priorities)
  846. for _, prio := range priorities {
  847. tgts := dialTargetBuckets[prio]
  848. res := make(chan internalConn, len(tgts))
  849. wg := stdsync.WaitGroup{}
  850. for _, tgt := range tgts {
  851. wg.Add(1)
  852. go func(tgt dialTarget) {
  853. conn, err := tgt.Dial(ctx)
  854. if err == nil {
  855. // Closes the connection on error
  856. err = s.validateIdentity(conn, deviceID)
  857. }
  858. s.setConnectionStatus(tgt.addr, err)
  859. if err != nil {
  860. l.Debugln("dialing", deviceID, tgt.uri, "error:", err)
  861. } else {
  862. l.Debugln("dialing", deviceID, tgt.uri, "success:", conn)
  863. res <- conn
  864. }
  865. wg.Done()
  866. }(tgt)
  867. }
  868. // Spawn a routine which will unblock main routine in case we fail
  869. // to connect to anyone.
  870. go func() {
  871. wg.Wait()
  872. close(res)
  873. }()
  874. // Wait for the first connection, or for channel closure.
  875. if conn, ok := <-res; ok {
  876. // Got a connection, means more might come back, hence spawn a
  877. // routine that will do the discarding.
  878. l.Debugln("connected to", deviceID, prio, "using", conn, conn.priority)
  879. go func(deviceID protocol.DeviceID, prio int) {
  880. wg.Wait()
  881. l.Debugln("discarding", len(res), "connections while connecting to", deviceID, prio)
  882. for conn := range res {
  883. conn.Close()
  884. }
  885. }(deviceID, prio)
  886. return conn, ok
  887. }
  888. // Failed to connect, report that fact.
  889. l.Debugln("failed to connect to", deviceID, prio)
  890. }
  891. return internalConn{}, false
  892. }
  893. func (s *service) validateIdentity(c internalConn, expectedID protocol.DeviceID) error {
  894. cs := c.ConnectionState()
  895. // We should have received exactly one certificate from the other
  896. // side. If we didn't, they don't have a device ID and we drop the
  897. // connection.
  898. certs := cs.PeerCertificates
  899. if cl := len(certs); cl != 1 {
  900. l.Infof("Got peer certificate list of length %d != 1 from peer at %s; protocol error", cl, c)
  901. c.Close()
  902. return fmt.Errorf("expected 1 certificate, got %d", cl)
  903. }
  904. remoteCert := certs[0]
  905. remoteID := protocol.NewDeviceID(remoteCert.Raw)
  906. // The device ID should not be that of ourselves. It can happen
  907. // though, especially in the presence of NAT hairpinning, multiple
  908. // clients between the same NAT gateway, and global discovery.
  909. if remoteID == s.myID {
  910. l.Debugf("Connected to myself (%s) at %s", remoteID, c)
  911. c.Close()
  912. return errors.New("connected to self")
  913. }
  914. // We should see the expected device ID
  915. if !remoteID.Equals(expectedID) {
  916. c.Close()
  917. return fmt.Errorf("unexpected device id, expected %s got %s", expectedID, remoteID)
  918. }
  919. return nil
  920. }
  921. type nextDialRegistry map[protocol.DeviceID]nextDialDevice
  922. type nextDialDevice struct {
  923. nextDial map[string]time.Time
  924. coolDownIntervalStart time.Time
  925. attempts int
  926. }
  927. func (r nextDialRegistry) get(device protocol.DeviceID, addr string) time.Time {
  928. return r[device].nextDial[addr]
  929. }
  930. const (
  931. dialCoolDownInterval = 2 * time.Minute
  932. dialCoolDownDelay = 5 * time.Minute
  933. dialCoolDownMaxAttemps = 3
  934. )
  935. // redialDevice marks the device for immediate redial, unless the remote keeps
  936. // dropping established connections. Thus we keep track of when the first forced
  937. // re-dial happened, and how many attempts happen in the dialCoolDownInterval
  938. // after that. If it's more than dialCoolDownMaxAttempts, don't force-redial
  939. // that device for dialCoolDownDelay (regular dials still happen).
  940. func (r nextDialRegistry) redialDevice(device protocol.DeviceID, now time.Time) {
  941. dev, ok := r[device]
  942. if !ok {
  943. r[device] = nextDialDevice{
  944. coolDownIntervalStart: now,
  945. attempts: 1,
  946. }
  947. return
  948. }
  949. if dev.attempts == 0 || now.Before(dev.coolDownIntervalStart.Add(dialCoolDownInterval)) {
  950. if dev.attempts >= dialCoolDownMaxAttemps {
  951. // Device has been force redialed too often - let it cool down.
  952. return
  953. }
  954. if dev.attempts == 0 {
  955. dev.coolDownIntervalStart = now
  956. }
  957. dev.attempts++
  958. dev.nextDial = make(map[string]time.Time)
  959. return
  960. }
  961. if dev.attempts >= dialCoolDownMaxAttemps && now.Before(dev.coolDownIntervalStart.Add(dialCoolDownDelay)) {
  962. return // Still cooling down
  963. }
  964. delete(r, device)
  965. }
  966. func (r nextDialRegistry) set(device protocol.DeviceID, addr string, next time.Time) {
  967. if _, ok := r[device]; !ok {
  968. r[device] = nextDialDevice{nextDial: make(map[string]time.Time)}
  969. }
  970. r[device].nextDial[addr] = next
  971. }
  972. func (r nextDialRegistry) sleepDurationAndCleanup(now time.Time) time.Duration {
  973. sleep := stdConnectionLoopSleep
  974. for id, dev := range r {
  975. for address, next := range dev.nextDial {
  976. if next.Before(now) {
  977. // Expired entry, address was not seen in last pass(es)
  978. delete(dev.nextDial, address)
  979. continue
  980. }
  981. if cur := next.Sub(now); cur < sleep {
  982. sleep = cur
  983. }
  984. }
  985. if dev.attempts > 0 {
  986. interval := dialCoolDownInterval
  987. if dev.attempts >= dialCoolDownMaxAttemps {
  988. interval = dialCoolDownDelay
  989. }
  990. if now.After(dev.coolDownIntervalStart.Add(interval)) {
  991. dev.attempts = 0
  992. }
  993. }
  994. if len(dev.nextDial) == 0 && dev.attempts == 0 {
  995. delete(r, id)
  996. }
  997. }
  998. return sleep
  999. }