hostmap.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905
  1. package nebula
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "net"
  7. "sync"
  8. "sync/atomic"
  9. "time"
  10. "github.com/rcrowley/go-metrics"
  11. "github.com/sirupsen/logrus"
  12. "github.com/slackhq/nebula/cert"
  13. "github.com/slackhq/nebula/cidr"
  14. "github.com/slackhq/nebula/header"
  15. "github.com/slackhq/nebula/iputil"
  16. "github.com/slackhq/nebula/udp"
  17. )
  18. // const ProbeLen = 100
  19. const PromoteEvery = 1000
  20. const ReQueryEvery = 5000
  21. const MaxRemotes = 10
  22. // MaxHostInfosPerVpnIp is the max number of hostinfos we will track for a given vpn ip
  23. // 5 allows for an initial handshake and each host pair re-handshaking twice
  24. const MaxHostInfosPerVpnIp = 5
  25. // How long we should prevent roaming back to the previous IP.
  26. // This helps prevent flapping due to packets already in flight
  27. const RoamingSuppressSeconds = 2
  28. const (
  29. Requested = iota
  30. Established
  31. )
  32. const (
  33. Unknowntype = iota
  34. ForwardingType
  35. TerminalType
  36. )
  37. type Relay struct {
  38. Type int
  39. State int
  40. LocalIndex uint32
  41. RemoteIndex uint32
  42. PeerIp iputil.VpnIp
  43. }
  44. type HostMap struct {
  45. sync.RWMutex //Because we concurrently read and write to our maps
  46. name string
  47. Indexes map[uint32]*HostInfo
  48. Relays map[uint32]*HostInfo // Maps a Relay IDX to a Relay HostInfo object
  49. RemoteIndexes map[uint32]*HostInfo
  50. Hosts map[iputil.VpnIp]*HostInfo
  51. preferredRanges []*net.IPNet
  52. vpnCIDR *net.IPNet
  53. metricsEnabled bool
  54. l *logrus.Logger
  55. }
  56. type RelayState struct {
  57. sync.RWMutex
  58. relays map[iputil.VpnIp]struct{} // Set of VpnIp's of Hosts to use as relays to access this peer
  59. relayForByIp map[iputil.VpnIp]*Relay // Maps VpnIps of peers for which this HostInfo is a relay to some Relay info
  60. relayForByIdx map[uint32]*Relay // Maps a local index to some Relay info
  61. }
  62. func (rs *RelayState) DeleteRelay(ip iputil.VpnIp) {
  63. rs.Lock()
  64. defer rs.Unlock()
  65. delete(rs.relays, ip)
  66. }
  67. func (rs *RelayState) GetRelayForByIp(ip iputil.VpnIp) (*Relay, bool) {
  68. rs.RLock()
  69. defer rs.RUnlock()
  70. r, ok := rs.relayForByIp[ip]
  71. return r, ok
  72. }
  73. func (rs *RelayState) InsertRelayTo(ip iputil.VpnIp) {
  74. rs.Lock()
  75. defer rs.Unlock()
  76. rs.relays[ip] = struct{}{}
  77. }
  78. func (rs *RelayState) CopyRelayIps() []iputil.VpnIp {
  79. rs.RLock()
  80. defer rs.RUnlock()
  81. ret := make([]iputil.VpnIp, 0, len(rs.relays))
  82. for ip := range rs.relays {
  83. ret = append(ret, ip)
  84. }
  85. return ret
  86. }
  87. func (rs *RelayState) CopyRelayForIps() []iputil.VpnIp {
  88. rs.RLock()
  89. defer rs.RUnlock()
  90. currentRelays := make([]iputil.VpnIp, 0, len(rs.relayForByIp))
  91. for relayIp := range rs.relayForByIp {
  92. currentRelays = append(currentRelays, relayIp)
  93. }
  94. return currentRelays
  95. }
  96. func (rs *RelayState) CopyRelayForIdxs() []uint32 {
  97. rs.RLock()
  98. defer rs.RUnlock()
  99. ret := make([]uint32, 0, len(rs.relayForByIdx))
  100. for i := range rs.relayForByIdx {
  101. ret = append(ret, i)
  102. }
  103. return ret
  104. }
  105. func (rs *RelayState) RemoveRelay(localIdx uint32) (iputil.VpnIp, bool) {
  106. rs.Lock()
  107. defer rs.Unlock()
  108. relay, ok := rs.relayForByIdx[localIdx]
  109. if !ok {
  110. return iputil.VpnIp(0), false
  111. }
  112. delete(rs.relayForByIdx, localIdx)
  113. delete(rs.relayForByIp, relay.PeerIp)
  114. return relay.PeerIp, true
  115. }
  116. func (rs *RelayState) QueryRelayForByIp(vpnIp iputil.VpnIp) (*Relay, bool) {
  117. rs.RLock()
  118. defer rs.RUnlock()
  119. r, ok := rs.relayForByIp[vpnIp]
  120. return r, ok
  121. }
  122. func (rs *RelayState) QueryRelayForByIdx(idx uint32) (*Relay, bool) {
  123. rs.RLock()
  124. defer rs.RUnlock()
  125. r, ok := rs.relayForByIdx[idx]
  126. return r, ok
  127. }
  128. func (rs *RelayState) InsertRelay(ip iputil.VpnIp, idx uint32, r *Relay) {
  129. rs.Lock()
  130. defer rs.Unlock()
  131. rs.relayForByIp[ip] = r
  132. rs.relayForByIdx[idx] = r
  133. }
  134. type HostInfo struct {
  135. sync.RWMutex
  136. remote *udp.Addr
  137. remotes *RemoteList
  138. promoteCounter atomic.Uint32
  139. ConnectionState *ConnectionState
  140. handshakeStart time.Time //todo: this an entry in the handshake manager
  141. HandshakeReady bool //todo: being in the manager means you are ready
  142. HandshakeCounter int //todo: another handshake manager entry
  143. HandshakeLastRemotes []*udp.Addr //todo: another handshake manager entry, which remotes we sent to last time
  144. HandshakeComplete bool //todo: this should go away in favor of ConnectionState.ready
  145. HandshakePacket map[uint8][]byte //todo: this is other handshake manager entry
  146. packetStore []*cachedPacket //todo: this is other handshake manager entry
  147. remoteIndexId uint32
  148. localIndexId uint32
  149. vpnIp iputil.VpnIp
  150. recvError int
  151. remoteCidr *cidr.Tree4
  152. relayState RelayState
  153. // lastRebindCount is the other side of Interface.rebindCount, if these values don't match then we need to ask LH
  154. // for a punch from the remote end of this tunnel. The goal being to prime their conntrack for our traffic just like
  155. // with a handshake
  156. lastRebindCount int8
  157. // lastHandshakeTime records the time the remote side told us about at the stage when the handshake was completed locally
  158. // Stage 1 packet will contain it if I am a responder, stage 2 packet if I am an initiator
  159. // This is used to avoid an attack where a handshake packet is replayed after some time
  160. lastHandshakeTime uint64
  161. lastRoam time.Time
  162. lastRoamRemote *udp.Addr
  163. // Used to track other hostinfos for this vpn ip since only 1 can be primary
  164. // Synchronised via hostmap lock and not the hostinfo lock.
  165. next, prev *HostInfo
  166. }
  167. type ViaSender struct {
  168. relayHI *HostInfo // relayHI is the host info object of the relay
  169. remoteIdx uint32 // remoteIdx is the index included in the header of the received packet
  170. relay *Relay // relay contains the rest of the relay information, including the PeerIP of the host trying to communicate with us.
  171. }
  172. type cachedPacket struct {
  173. messageType header.MessageType
  174. messageSubType header.MessageSubType
  175. callback packetCallback
  176. packet []byte
  177. }
  178. type packetCallback func(t header.MessageType, st header.MessageSubType, h *HostInfo, p, nb, out []byte)
  179. type cachedPacketMetrics struct {
  180. sent metrics.Counter
  181. dropped metrics.Counter
  182. }
  183. func NewHostMap(l *logrus.Logger, name string, vpnCIDR *net.IPNet, preferredRanges []*net.IPNet) *HostMap {
  184. h := map[iputil.VpnIp]*HostInfo{}
  185. i := map[uint32]*HostInfo{}
  186. r := map[uint32]*HostInfo{}
  187. relays := map[uint32]*HostInfo{}
  188. m := HostMap{
  189. name: name,
  190. Indexes: i,
  191. Relays: relays,
  192. RemoteIndexes: r,
  193. Hosts: h,
  194. preferredRanges: preferredRanges,
  195. vpnCIDR: vpnCIDR,
  196. l: l,
  197. }
  198. return &m
  199. }
  200. // UpdateStats takes a name and reports host and index counts to the stats collection system
  201. func (hm *HostMap) EmitStats(name string) {
  202. hm.RLock()
  203. hostLen := len(hm.Hosts)
  204. indexLen := len(hm.Indexes)
  205. remoteIndexLen := len(hm.RemoteIndexes)
  206. relaysLen := len(hm.Relays)
  207. hm.RUnlock()
  208. metrics.GetOrRegisterGauge("hostmap."+name+".hosts", nil).Update(int64(hostLen))
  209. metrics.GetOrRegisterGauge("hostmap."+name+".indexes", nil).Update(int64(indexLen))
  210. metrics.GetOrRegisterGauge("hostmap."+name+".remoteIndexes", nil).Update(int64(remoteIndexLen))
  211. metrics.GetOrRegisterGauge("hostmap."+name+".relayIndexes", nil).Update(int64(relaysLen))
  212. }
  213. func (hm *HostMap) RemoveRelay(localIdx uint32) {
  214. hm.Lock()
  215. hiRelay, ok := hm.Relays[localIdx]
  216. if !ok {
  217. hm.Unlock()
  218. return
  219. }
  220. delete(hm.Relays, localIdx)
  221. hm.Unlock()
  222. ip, ok := hiRelay.relayState.RemoveRelay(localIdx)
  223. if !ok {
  224. return
  225. }
  226. hiPeer, err := hm.QueryVpnIp(ip)
  227. if err != nil {
  228. return
  229. }
  230. var otherPeerIdx uint32
  231. hiPeer.relayState.DeleteRelay(hiRelay.vpnIp)
  232. relay, ok := hiPeer.relayState.GetRelayForByIp(hiRelay.vpnIp)
  233. if ok {
  234. otherPeerIdx = relay.LocalIndex
  235. }
  236. // I am a relaying host. I need to remove the other relay, too.
  237. hm.RemoveRelay(otherPeerIdx)
  238. }
  239. func (hm *HostMap) GetIndexByVpnIp(vpnIp iputil.VpnIp) (uint32, error) {
  240. hm.RLock()
  241. if i, ok := hm.Hosts[vpnIp]; ok {
  242. index := i.localIndexId
  243. hm.RUnlock()
  244. return index, nil
  245. }
  246. hm.RUnlock()
  247. return 0, errors.New("vpn IP not found")
  248. }
  249. func (hm *HostMap) Add(ip iputil.VpnIp, hostinfo *HostInfo) {
  250. hm.Lock()
  251. hm.Hosts[ip] = hostinfo
  252. hm.Unlock()
  253. }
  254. func (hm *HostMap) AddVpnIp(vpnIp iputil.VpnIp, init func(hostinfo *HostInfo)) (hostinfo *HostInfo, created bool) {
  255. hm.RLock()
  256. if h, ok := hm.Hosts[vpnIp]; !ok {
  257. hm.RUnlock()
  258. h = &HostInfo{
  259. vpnIp: vpnIp,
  260. HandshakePacket: make(map[uint8][]byte, 0),
  261. relayState: RelayState{
  262. relays: map[iputil.VpnIp]struct{}{},
  263. relayForByIp: map[iputil.VpnIp]*Relay{},
  264. relayForByIdx: map[uint32]*Relay{},
  265. },
  266. }
  267. if init != nil {
  268. init(h)
  269. }
  270. hm.Lock()
  271. hm.Hosts[vpnIp] = h
  272. hm.Unlock()
  273. return h, true
  274. } else {
  275. hm.RUnlock()
  276. return h, false
  277. }
  278. }
  279. func (hm *HostMap) DeleteVpnIp(vpnIp iputil.VpnIp) {
  280. hm.Lock()
  281. delete(hm.Hosts, vpnIp)
  282. if len(hm.Hosts) == 0 {
  283. hm.Hosts = map[iputil.VpnIp]*HostInfo{}
  284. }
  285. hm.Unlock()
  286. if hm.l.Level >= logrus.DebugLevel {
  287. hm.l.WithField("hostMap", m{"mapName": hm.name, "vpnIp": vpnIp, "mapTotalSize": len(hm.Hosts)}).
  288. Debug("Hostmap vpnIp deleted")
  289. }
  290. }
  291. // Only used by pendingHostMap when the remote index is not initially known
  292. func (hm *HostMap) addRemoteIndexHostInfo(index uint32, h *HostInfo) {
  293. hm.Lock()
  294. h.remoteIndexId = index
  295. hm.RemoteIndexes[index] = h
  296. hm.Unlock()
  297. if hm.l.Level > logrus.DebugLevel {
  298. hm.l.WithField("hostMap", m{"mapName": hm.name, "indexNumber": index, "mapTotalSize": len(hm.Indexes),
  299. "hostinfo": m{"existing": true, "localIndexId": h.localIndexId, "hostId": h.vpnIp}}).
  300. Debug("Hostmap remoteIndex added")
  301. }
  302. }
  303. func (hm *HostMap) AddVpnIpHostInfo(vpnIp iputil.VpnIp, h *HostInfo) {
  304. hm.Lock()
  305. h.vpnIp = vpnIp
  306. hm.Hosts[vpnIp] = h
  307. hm.Indexes[h.localIndexId] = h
  308. hm.RemoteIndexes[h.remoteIndexId] = h
  309. hm.Unlock()
  310. if hm.l.Level > logrus.DebugLevel {
  311. hm.l.WithField("hostMap", m{"mapName": hm.name, "vpnIp": vpnIp, "mapTotalSize": len(hm.Hosts),
  312. "hostinfo": m{"existing": true, "localIndexId": h.localIndexId, "vpnIp": h.vpnIp}}).
  313. Debug("Hostmap vpnIp added")
  314. }
  315. }
  316. // This is only called in pendingHostmap, to cleanup an inbound handshake
  317. func (hm *HostMap) DeleteIndex(index uint32) {
  318. hm.Lock()
  319. hostinfo, ok := hm.Indexes[index]
  320. if ok {
  321. delete(hm.Indexes, index)
  322. delete(hm.RemoteIndexes, hostinfo.remoteIndexId)
  323. // Check if we have an entry under hostId that matches the same hostinfo
  324. // instance. Clean it up as well if we do.
  325. hostinfo2, ok := hm.Hosts[hostinfo.vpnIp]
  326. if ok && hostinfo2 == hostinfo {
  327. delete(hm.Hosts, hostinfo.vpnIp)
  328. }
  329. }
  330. hm.Unlock()
  331. if hm.l.Level >= logrus.DebugLevel {
  332. hm.l.WithField("hostMap", m{"mapName": hm.name, "indexNumber": index, "mapTotalSize": len(hm.Indexes)}).
  333. Debug("Hostmap index deleted")
  334. }
  335. }
  336. // This is used to cleanup on recv_error
  337. func (hm *HostMap) DeleteReverseIndex(index uint32) {
  338. hm.Lock()
  339. hostinfo, ok := hm.RemoteIndexes[index]
  340. if ok {
  341. delete(hm.Indexes, hostinfo.localIndexId)
  342. delete(hm.RemoteIndexes, index)
  343. // Check if we have an entry under hostId that matches the same hostinfo
  344. // instance. Clean it up as well if we do (they might not match in pendingHostmap)
  345. var hostinfo2 *HostInfo
  346. hostinfo2, ok = hm.Hosts[hostinfo.vpnIp]
  347. if ok && hostinfo2 == hostinfo {
  348. delete(hm.Hosts, hostinfo.vpnIp)
  349. }
  350. }
  351. hm.Unlock()
  352. if hm.l.Level >= logrus.DebugLevel {
  353. hm.l.WithField("hostMap", m{"mapName": hm.name, "indexNumber": index, "mapTotalSize": len(hm.Indexes)}).
  354. Debug("Hostmap remote index deleted")
  355. }
  356. }
  357. // DeleteHostInfo will fully unlink the hostinfo and return true if it was the final hostinfo for this vpn ip
  358. func (hm *HostMap) DeleteHostInfo(hostinfo *HostInfo) bool {
  359. // Delete the host itself, ensuring it's not modified anymore
  360. hm.Lock()
  361. // If we have a previous or next hostinfo then we are not the last one for this vpn ip
  362. final := (hostinfo.next == nil && hostinfo.prev == nil)
  363. hm.unlockedDeleteHostInfo(hostinfo)
  364. hm.Unlock()
  365. // And tear down all the relays going through this host
  366. for _, localIdx := range hostinfo.relayState.CopyRelayForIdxs() {
  367. hm.RemoveRelay(localIdx)
  368. }
  369. // And tear down the relays this deleted hostInfo was using to be reached
  370. teardownRelayIdx := []uint32{}
  371. for _, relayIp := range hostinfo.relayState.CopyRelayIps() {
  372. relayHostInfo, err := hm.QueryVpnIp(relayIp)
  373. if err != nil {
  374. hm.l.WithError(err).WithField("relay", relayIp).Info("Missing relay host in hostmap")
  375. } else {
  376. if r, ok := relayHostInfo.relayState.QueryRelayForByIp(hostinfo.vpnIp); ok {
  377. teardownRelayIdx = append(teardownRelayIdx, r.LocalIndex)
  378. }
  379. }
  380. }
  381. for _, localIdx := range teardownRelayIdx {
  382. hm.RemoveRelay(localIdx)
  383. }
  384. return final
  385. }
  386. func (hm *HostMap) DeleteRelayIdx(localIdx uint32) {
  387. hm.Lock()
  388. defer hm.Unlock()
  389. delete(hm.RemoteIndexes, localIdx)
  390. }
  391. func (hm *HostMap) MakePrimary(hostinfo *HostInfo) {
  392. hm.Lock()
  393. defer hm.Unlock()
  394. hm.unlockedMakePrimary(hostinfo)
  395. }
  396. func (hm *HostMap) unlockedMakePrimary(hostinfo *HostInfo) {
  397. oldHostinfo := hm.Hosts[hostinfo.vpnIp]
  398. if oldHostinfo == hostinfo {
  399. return
  400. }
  401. if hostinfo.prev != nil {
  402. hostinfo.prev.next = hostinfo.next
  403. }
  404. if hostinfo.next != nil {
  405. hostinfo.next.prev = hostinfo.prev
  406. }
  407. hm.Hosts[hostinfo.vpnIp] = hostinfo
  408. if oldHostinfo == nil {
  409. return
  410. }
  411. hostinfo.next = oldHostinfo
  412. oldHostinfo.prev = hostinfo
  413. hostinfo.prev = nil
  414. }
  415. func (hm *HostMap) unlockedDeleteHostInfo(hostinfo *HostInfo) {
  416. primary, ok := hm.Hosts[hostinfo.vpnIp]
  417. if ok && primary == hostinfo {
  418. // The vpnIp pointer points to the same hostinfo as the local index id, we can remove it
  419. delete(hm.Hosts, hostinfo.vpnIp)
  420. if len(hm.Hosts) == 0 {
  421. hm.Hosts = map[iputil.VpnIp]*HostInfo{}
  422. }
  423. if hostinfo.next != nil {
  424. // We had more than 1 hostinfo at this vpnip, promote the next in the list to primary
  425. hm.Hosts[hostinfo.vpnIp] = hostinfo.next
  426. // It is primary, there is no previous hostinfo now
  427. hostinfo.next.prev = nil
  428. }
  429. } else {
  430. // Relink if we were in the middle of multiple hostinfos for this vpn ip
  431. if hostinfo.prev != nil {
  432. hostinfo.prev.next = hostinfo.next
  433. }
  434. if hostinfo.next != nil {
  435. hostinfo.next.prev = hostinfo.prev
  436. }
  437. }
  438. hostinfo.next = nil
  439. hostinfo.prev = nil
  440. // The remote index uses index ids outside our control so lets make sure we are only removing
  441. // the remote index pointer here if it points to the hostinfo we are deleting
  442. hostinfo2, ok := hm.RemoteIndexes[hostinfo.remoteIndexId]
  443. if ok && hostinfo2 == hostinfo {
  444. delete(hm.RemoteIndexes, hostinfo.remoteIndexId)
  445. if len(hm.RemoteIndexes) == 0 {
  446. hm.RemoteIndexes = map[uint32]*HostInfo{}
  447. }
  448. }
  449. delete(hm.Indexes, hostinfo.localIndexId)
  450. if len(hm.Indexes) == 0 {
  451. hm.Indexes = map[uint32]*HostInfo{}
  452. }
  453. if hm.l.Level >= logrus.DebugLevel {
  454. hm.l.WithField("hostMap", m{"mapName": hm.name, "mapTotalSize": len(hm.Hosts),
  455. "vpnIp": hostinfo.vpnIp, "indexNumber": hostinfo.localIndexId, "remoteIndexNumber": hostinfo.remoteIndexId}).
  456. Debug("Hostmap hostInfo deleted")
  457. }
  458. }
  459. func (hm *HostMap) QueryIndex(index uint32) (*HostInfo, error) {
  460. //TODO: we probably just want to return bool instead of error, or at least a static error
  461. hm.RLock()
  462. if h, ok := hm.Indexes[index]; ok {
  463. hm.RUnlock()
  464. return h, nil
  465. } else {
  466. hm.RUnlock()
  467. return nil, errors.New("unable to find index")
  468. }
  469. }
  470. func (hm *HostMap) QueryRelayIndex(index uint32) (*HostInfo, error) {
  471. //TODO: we probably just want to return bool instead of error, or at least a static error
  472. hm.RLock()
  473. if h, ok := hm.Relays[index]; ok {
  474. hm.RUnlock()
  475. return h, nil
  476. } else {
  477. hm.RUnlock()
  478. return nil, errors.New("unable to find index")
  479. }
  480. }
  481. func (hm *HostMap) QueryReverseIndex(index uint32) (*HostInfo, error) {
  482. hm.RLock()
  483. if h, ok := hm.RemoteIndexes[index]; ok {
  484. hm.RUnlock()
  485. return h, nil
  486. } else {
  487. hm.RUnlock()
  488. return nil, fmt.Errorf("unable to find reverse index or connectionstate nil in %s hostmap", hm.name)
  489. }
  490. }
  491. func (hm *HostMap) QueryVpnIp(vpnIp iputil.VpnIp) (*HostInfo, error) {
  492. return hm.queryVpnIp(vpnIp, nil)
  493. }
  494. // PromoteBestQueryVpnIp will attempt to lazily switch to the best remote every
  495. // `PromoteEvery` calls to this function for a given host.
  496. func (hm *HostMap) PromoteBestQueryVpnIp(vpnIp iputil.VpnIp, ifce *Interface) (*HostInfo, error) {
  497. return hm.queryVpnIp(vpnIp, ifce)
  498. }
  499. func (hm *HostMap) queryVpnIp(vpnIp iputil.VpnIp, promoteIfce *Interface) (*HostInfo, error) {
  500. hm.RLock()
  501. if h, ok := hm.Hosts[vpnIp]; ok {
  502. hm.RUnlock()
  503. // Do not attempt promotion if you are a lighthouse
  504. if promoteIfce != nil && !promoteIfce.lightHouse.amLighthouse {
  505. h.TryPromoteBest(hm.preferredRanges, promoteIfce)
  506. }
  507. return h, nil
  508. }
  509. hm.RUnlock()
  510. return nil, errors.New("unable to find host")
  511. }
  512. // unlockedAddHostInfo assumes you have a write-lock and will add a hostinfo object to the hostmap Indexes and RemoteIndexes maps.
  513. // If an entry exists for the Hosts table (vpnIp -> hostinfo) then the provided hostinfo will be made primary
  514. func (hm *HostMap) unlockedAddHostInfo(hostinfo *HostInfo, f *Interface) {
  515. if f.serveDns {
  516. remoteCert := hostinfo.ConnectionState.peerCert
  517. dnsR.Add(remoteCert.Details.Name+".", remoteCert.Details.Ips[0].IP.String())
  518. }
  519. existing := hm.Hosts[hostinfo.vpnIp]
  520. hm.Hosts[hostinfo.vpnIp] = hostinfo
  521. if existing != nil {
  522. hostinfo.next = existing
  523. existing.prev = hostinfo
  524. }
  525. hm.Indexes[hostinfo.localIndexId] = hostinfo
  526. hm.RemoteIndexes[hostinfo.remoteIndexId] = hostinfo
  527. if hm.l.Level >= logrus.DebugLevel {
  528. hm.l.WithField("hostMap", m{"mapName": hm.name, "vpnIp": hostinfo.vpnIp, "mapTotalSize": len(hm.Hosts),
  529. "hostinfo": m{"existing": true, "localIndexId": hostinfo.localIndexId, "hostId": hostinfo.vpnIp}}).
  530. Debug("Hostmap vpnIp added")
  531. }
  532. i := 1
  533. check := hostinfo
  534. for check != nil {
  535. if i > MaxHostInfosPerVpnIp {
  536. hm.unlockedDeleteHostInfo(check)
  537. }
  538. check = check.next
  539. i++
  540. }
  541. }
  542. // punchList assembles a list of all non nil RemoteList pointer entries in this hostmap
  543. // The caller can then do the its work outside of the read lock
  544. func (hm *HostMap) punchList(rl []*RemoteList) []*RemoteList {
  545. hm.RLock()
  546. defer hm.RUnlock()
  547. for _, v := range hm.Hosts {
  548. if v.remotes != nil {
  549. rl = append(rl, v.remotes)
  550. }
  551. }
  552. return rl
  553. }
  554. // Punchy iterates through the result of punchList() to assemble all known addresses and sends a hole punch packet to them
  555. func (hm *HostMap) Punchy(ctx context.Context, conn *udp.Conn) {
  556. var metricsTxPunchy metrics.Counter
  557. if hm.metricsEnabled {
  558. metricsTxPunchy = metrics.GetOrRegisterCounter("messages.tx.punchy", nil)
  559. } else {
  560. metricsTxPunchy = metrics.NilCounter{}
  561. }
  562. var remotes []*RemoteList
  563. b := []byte{1}
  564. clockSource := time.NewTicker(time.Second * 10)
  565. defer clockSource.Stop()
  566. for {
  567. remotes = hm.punchList(remotes[:0])
  568. for _, rl := range remotes {
  569. //TODO: CopyAddrs generates garbage but ForEach locks for the work here, figure out which way is better
  570. for _, addr := range rl.CopyAddrs(hm.preferredRanges) {
  571. metricsTxPunchy.Inc(1)
  572. conn.WriteTo(b, addr)
  573. }
  574. }
  575. select {
  576. case <-ctx.Done():
  577. return
  578. case <-clockSource.C:
  579. continue
  580. }
  581. }
  582. }
  583. // TryPromoteBest handles re-querying lighthouses and probing for better paths
  584. // NOTE: It is an error to call this if you are a lighthouse since they should not roam clients!
  585. func (i *HostInfo) TryPromoteBest(preferredRanges []*net.IPNet, ifce *Interface) {
  586. c := i.promoteCounter.Add(1)
  587. if c%PromoteEvery == 0 {
  588. // The lock here is currently protecting i.remote access
  589. i.RLock()
  590. remote := i.remote
  591. i.RUnlock()
  592. // return early if we are already on a preferred remote
  593. if remote != nil {
  594. rIP := remote.IP
  595. for _, l := range preferredRanges {
  596. if l.Contains(rIP) {
  597. return
  598. }
  599. }
  600. }
  601. i.remotes.ForEach(preferredRanges, func(addr *udp.Addr, preferred bool) {
  602. if remote != nil && (addr == nil || !preferred) {
  603. return
  604. }
  605. // Try to send a test packet to that host, this should
  606. // cause it to detect a roaming event and switch remotes
  607. ifce.sendTo(header.Test, header.TestRequest, i.ConnectionState, i, addr, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  608. })
  609. }
  610. // Re query our lighthouses for new remotes occasionally
  611. if c%ReQueryEvery == 0 && ifce.lightHouse != nil {
  612. ifce.lightHouse.QueryServer(i.vpnIp, ifce)
  613. }
  614. }
  615. func (i *HostInfo) cachePacket(l *logrus.Logger, t header.MessageType, st header.MessageSubType, packet []byte, f packetCallback, m *cachedPacketMetrics) {
  616. //TODO: return the error so we can log with more context
  617. if len(i.packetStore) < 100 {
  618. tempPacket := make([]byte, len(packet))
  619. copy(tempPacket, packet)
  620. //l.WithField("trace", string(debug.Stack())).Error("Caching packet", tempPacket)
  621. i.packetStore = append(i.packetStore, &cachedPacket{t, st, f, tempPacket})
  622. if l.Level >= logrus.DebugLevel {
  623. i.logger(l).
  624. WithField("length", len(i.packetStore)).
  625. WithField("stored", true).
  626. Debugf("Packet store")
  627. }
  628. } else if l.Level >= logrus.DebugLevel {
  629. m.dropped.Inc(1)
  630. i.logger(l).
  631. WithField("length", len(i.packetStore)).
  632. WithField("stored", false).
  633. Debugf("Packet store")
  634. }
  635. }
  636. // handshakeComplete will set the connection as ready to communicate, as well as flush any stored packets
  637. func (i *HostInfo) handshakeComplete(l *logrus.Logger, m *cachedPacketMetrics) {
  638. //TODO: I'm not certain the distinction between handshake complete and ConnectionState being ready matters because:
  639. //TODO: HandshakeComplete means send stored packets and ConnectionState.ready means we are ready to send
  640. //TODO: if the transition from HandhsakeComplete to ConnectionState.ready happens all within this function they are identical
  641. i.ConnectionState.queueLock.Lock()
  642. i.HandshakeComplete = true
  643. //TODO: this should be managed by the handshake state machine to set it based on how many handshake were seen.
  644. // Clamping it to 2 gets us out of the woods for now
  645. i.ConnectionState.messageCounter.Store(2)
  646. if l.Level >= logrus.DebugLevel {
  647. i.logger(l).Debugf("Sending %d stored packets", len(i.packetStore))
  648. }
  649. if len(i.packetStore) > 0 {
  650. nb := make([]byte, 12, 12)
  651. out := make([]byte, mtu)
  652. for _, cp := range i.packetStore {
  653. cp.callback(cp.messageType, cp.messageSubType, i, cp.packet, nb, out)
  654. }
  655. m.sent.Inc(int64(len(i.packetStore)))
  656. }
  657. i.remotes.ResetBlockedRemotes()
  658. i.packetStore = make([]*cachedPacket, 0)
  659. i.ConnectionState.ready = true
  660. i.ConnectionState.queueLock.Unlock()
  661. i.ConnectionState.certState = nil
  662. }
  663. func (i *HostInfo) GetCert() *cert.NebulaCertificate {
  664. if i.ConnectionState != nil {
  665. return i.ConnectionState.peerCert
  666. }
  667. return nil
  668. }
  669. func (i *HostInfo) SetRemote(remote *udp.Addr) {
  670. // We copy here because we likely got this remote from a source that reuses the object
  671. if !i.remote.Equals(remote) {
  672. i.remote = remote.Copy()
  673. i.remotes.LearnRemote(i.vpnIp, remote.Copy())
  674. }
  675. }
  676. // SetRemoteIfPreferred returns true if the remote was changed. The lastRoam
  677. // time on the HostInfo will also be updated.
  678. func (i *HostInfo) SetRemoteIfPreferred(hm *HostMap, newRemote *udp.Addr) bool {
  679. if newRemote == nil {
  680. // relays have nil udp Addrs
  681. return false
  682. }
  683. currentRemote := i.remote
  684. if currentRemote == nil {
  685. i.SetRemote(newRemote)
  686. return true
  687. }
  688. // NOTE: We do this loop here instead of calling `isPreferred` in
  689. // remote_list.go so that we only have to loop over preferredRanges once.
  690. newIsPreferred := false
  691. for _, l := range hm.preferredRanges {
  692. // return early if we are already on a preferred remote
  693. if l.Contains(currentRemote.IP) {
  694. return false
  695. }
  696. if l.Contains(newRemote.IP) {
  697. newIsPreferred = true
  698. }
  699. }
  700. if newIsPreferred {
  701. // Consider this a roaming event
  702. i.lastRoam = time.Now()
  703. i.lastRoamRemote = currentRemote.Copy()
  704. i.SetRemote(newRemote)
  705. return true
  706. }
  707. return false
  708. }
  709. func (i *HostInfo) RecvErrorExceeded() bool {
  710. if i.recvError < 3 {
  711. i.recvError += 1
  712. return false
  713. }
  714. return true
  715. }
  716. func (i *HostInfo) CreateRemoteCIDR(c *cert.NebulaCertificate) {
  717. if len(c.Details.Ips) == 1 && len(c.Details.Subnets) == 0 {
  718. // Simple case, no CIDRTree needed
  719. return
  720. }
  721. remoteCidr := cidr.NewTree4()
  722. for _, ip := range c.Details.Ips {
  723. remoteCidr.AddCIDR(&net.IPNet{IP: ip.IP, Mask: net.IPMask{255, 255, 255, 255}}, struct{}{})
  724. }
  725. for _, n := range c.Details.Subnets {
  726. remoteCidr.AddCIDR(n, struct{}{})
  727. }
  728. i.remoteCidr = remoteCidr
  729. }
  730. func (i *HostInfo) logger(l *logrus.Logger) *logrus.Entry {
  731. if i == nil {
  732. return logrus.NewEntry(l)
  733. }
  734. li := l.WithField("vpnIp", i.vpnIp).
  735. WithField("localIndex", i.localIndexId).
  736. WithField("remoteIndex", i.remoteIndexId)
  737. if connState := i.ConnectionState; connState != nil {
  738. if peerCert := connState.peerCert; peerCert != nil {
  739. li = li.WithField("certName", peerCert.Details.Name)
  740. }
  741. }
  742. return li
  743. }
  744. // Utility functions
  745. func localIps(l *logrus.Logger, allowList *LocalAllowList) *[]net.IP {
  746. //FIXME: This function is pretty garbage
  747. var ips []net.IP
  748. ifaces, _ := net.Interfaces()
  749. for _, i := range ifaces {
  750. allow := allowList.AllowName(i.Name)
  751. if l.Level >= logrus.TraceLevel {
  752. l.WithField("interfaceName", i.Name).WithField("allow", allow).Trace("localAllowList.AllowName")
  753. }
  754. if !allow {
  755. continue
  756. }
  757. addrs, _ := i.Addrs()
  758. for _, addr := range addrs {
  759. var ip net.IP
  760. switch v := addr.(type) {
  761. case *net.IPNet:
  762. //continue
  763. ip = v.IP
  764. case *net.IPAddr:
  765. ip = v.IP
  766. }
  767. //TODO: Filtering out link local for now, this is probably the most correct thing
  768. //TODO: Would be nice to filter out SLAAC MAC based ips as well
  769. if ip.IsLoopback() == false && !ip.IsLinkLocalUnicast() {
  770. allow := allowList.Allow(ip)
  771. if l.Level >= logrus.TraceLevel {
  772. l.WithField("localIp", ip).WithField("allow", allow).Trace("localAllowList.Allow")
  773. }
  774. if !allow {
  775. continue
  776. }
  777. ips = append(ips, ip)
  778. }
  779. }
  780. }
  781. return &ips
  782. }