lighthouse.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. package nebula
  2. import (
  3. "errors"
  4. "fmt"
  5. "net"
  6. "sync"
  7. "time"
  8. "github.com/golang/protobuf/proto"
  9. "github.com/rcrowley/go-metrics"
  10. "github.com/slackhq/nebula/cert"
  11. )
  12. var ErrHostNotKnown = errors.New("host not known")
  13. type LightHouse struct {
  14. sync.RWMutex //Because we concurrently read and write to our maps
  15. amLighthouse bool
  16. myIp uint32
  17. punchConn *udpConn
  18. // Local cache of answers from light houses
  19. addrMap map[uint32][]udpAddr
  20. // filters remote addresses allowed for each host
  21. // - When we are a lighthouse, this filters what addresses we store and
  22. // respond with.
  23. // - When we are not a lighthouse, this filters which addresses we accept
  24. // from lighthouses.
  25. remoteAllowList *AllowList
  26. // filters local addresses that we advertise to lighthouses
  27. localAllowList *AllowList
  28. // used to trigger the HandshakeManager when we receive HostQueryReply
  29. handshakeTrigger chan<- uint32
  30. // staticList exists to avoid having a bool in each addrMap entry
  31. // since static should be rare
  32. staticList map[uint32]struct{}
  33. lighthouses map[uint32]struct{}
  34. interval int
  35. nebulaPort int
  36. punchBack bool
  37. punchDelay time.Duration
  38. metrics *MessageMetrics
  39. metricHolepunchTx metrics.Counter
  40. }
  41. type EncWriter interface {
  42. SendMessageToVpnIp(t NebulaMessageType, st NebulaMessageSubType, vpnIp uint32, p, nb, out []byte)
  43. SendMessageToAll(t NebulaMessageType, st NebulaMessageSubType, vpnIp uint32, p, nb, out []byte)
  44. }
  45. func NewLightHouse(amLighthouse bool, myIp uint32, ips []uint32, interval int, nebulaPort int, pc *udpConn, punchBack bool, punchDelay time.Duration, metricsEnabled bool) *LightHouse {
  46. h := LightHouse{
  47. amLighthouse: amLighthouse,
  48. myIp: myIp,
  49. addrMap: make(map[uint32][]udpAddr),
  50. nebulaPort: nebulaPort,
  51. lighthouses: make(map[uint32]struct{}),
  52. staticList: make(map[uint32]struct{}),
  53. interval: interval,
  54. punchConn: pc,
  55. punchBack: punchBack,
  56. punchDelay: punchDelay,
  57. }
  58. if metricsEnabled {
  59. h.metrics = newLighthouseMetrics()
  60. h.metricHolepunchTx = metrics.GetOrRegisterCounter("messages.tx.holepunch", nil)
  61. } else {
  62. h.metricHolepunchTx = metrics.NilCounter{}
  63. }
  64. for _, ip := range ips {
  65. h.lighthouses[ip] = struct{}{}
  66. }
  67. return &h
  68. }
  69. func (lh *LightHouse) SetRemoteAllowList(allowList *AllowList) {
  70. lh.Lock()
  71. defer lh.Unlock()
  72. lh.remoteAllowList = allowList
  73. }
  74. func (lh *LightHouse) SetLocalAllowList(allowList *AllowList) {
  75. lh.Lock()
  76. defer lh.Unlock()
  77. lh.localAllowList = allowList
  78. }
  79. func (lh *LightHouse) ValidateLHStaticEntries() error {
  80. for lhIP, _ := range lh.lighthouses {
  81. if _, ok := lh.staticList[lhIP]; !ok {
  82. return fmt.Errorf("Lighthouse %s does not have a static_host_map entry", IntIp(lhIP))
  83. }
  84. }
  85. return nil
  86. }
  87. func (lh *LightHouse) Query(ip uint32, f EncWriter) ([]udpAddr, error) {
  88. if !lh.IsLighthouseIP(ip) {
  89. lh.QueryServer(ip, f)
  90. }
  91. lh.RLock()
  92. if v, ok := lh.addrMap[ip]; ok {
  93. lh.RUnlock()
  94. return v, nil
  95. }
  96. lh.RUnlock()
  97. return nil, ErrHostNotKnown
  98. }
  99. // This is asynchronous so no reply should be expected
  100. func (lh *LightHouse) QueryServer(ip uint32, f EncWriter) {
  101. if !lh.amLighthouse {
  102. // Send a query to the lighthouses and hope for the best next time
  103. query, err := proto.Marshal(NewLhQueryByInt(ip))
  104. if err != nil {
  105. l.WithError(err).WithField("vpnIp", IntIp(ip)).Error("Failed to marshal lighthouse query payload")
  106. return
  107. }
  108. lh.metricTx(NebulaMeta_HostQuery, int64(len(lh.lighthouses)))
  109. nb := make([]byte, 12, 12)
  110. out := make([]byte, mtu)
  111. for n := range lh.lighthouses {
  112. f.SendMessageToVpnIp(lightHouse, 0, n, query, nb, out)
  113. }
  114. }
  115. }
  116. // Query our local lighthouse cached results
  117. func (lh *LightHouse) QueryCache(ip uint32) []udpAddr {
  118. lh.RLock()
  119. if v, ok := lh.addrMap[ip]; ok {
  120. lh.RUnlock()
  121. return v
  122. }
  123. lh.RUnlock()
  124. return nil
  125. }
  126. func (lh *LightHouse) DeleteVpnIP(vpnIP uint32) {
  127. // First we check the static mapping
  128. // and do nothing if it is there
  129. if _, ok := lh.staticList[vpnIP]; ok {
  130. return
  131. }
  132. lh.Lock()
  133. //l.Debugln(lh.addrMap)
  134. delete(lh.addrMap, vpnIP)
  135. l.Debugf("deleting %s from lighthouse.", IntIp(vpnIP))
  136. lh.Unlock()
  137. }
  138. func (lh *LightHouse) AddRemote(vpnIP uint32, toIp *udpAddr, static bool) {
  139. // First we check if the sender thinks this is a static entry
  140. // and do nothing if it is not, but should be considered static
  141. if static == false {
  142. if _, ok := lh.staticList[vpnIP]; ok {
  143. return
  144. }
  145. }
  146. lh.Lock()
  147. for _, v := range lh.addrMap[vpnIP] {
  148. if v.Equals(toIp) {
  149. lh.Unlock()
  150. return
  151. }
  152. }
  153. allow := lh.remoteAllowList.Allow(udp2ipInt(toIp))
  154. l.WithField("remoteIp", toIp).WithField("allow", allow).Debug("remoteAllowList.Allow")
  155. if !allow {
  156. return
  157. }
  158. //l.Debugf("Adding reply of %s as %s\n", IntIp(vpnIP), toIp)
  159. if static {
  160. lh.staticList[vpnIP] = struct{}{}
  161. }
  162. lh.addrMap[vpnIP] = append(lh.addrMap[vpnIP], *toIp)
  163. lh.Unlock()
  164. }
  165. func (lh *LightHouse) AddRemoteAndReset(vpnIP uint32, toIp *udpAddr) {
  166. if lh.amLighthouse {
  167. lh.DeleteVpnIP(vpnIP)
  168. lh.AddRemote(vpnIP, toIp, false)
  169. }
  170. }
  171. func (lh *LightHouse) IsLighthouseIP(vpnIP uint32) bool {
  172. if _, ok := lh.lighthouses[vpnIP]; ok {
  173. return true
  174. }
  175. return false
  176. }
  177. // Quick generators for protobuf
  178. func NewLhQueryByIpString(VpnIp string) *NebulaMeta {
  179. return NewLhQueryByInt(ip2int(net.ParseIP(VpnIp)))
  180. }
  181. func NewLhQueryByInt(VpnIp uint32) *NebulaMeta {
  182. return &NebulaMeta{
  183. Type: NebulaMeta_HostQuery,
  184. Details: &NebulaMetaDetails{
  185. VpnIp: VpnIp,
  186. },
  187. }
  188. }
  189. func NewLhWhoami() *NebulaMeta {
  190. return &NebulaMeta{
  191. Type: NebulaMeta_HostWhoami,
  192. Details: &NebulaMetaDetails{},
  193. }
  194. }
  195. // End Quick generators for protobuf
  196. func NewIpAndPortFromUDPAddr(addr udpAddr) IpAndPort {
  197. return IpAndPort{Ip: udp2ipInt(&addr), Port: uint32(addr.Port)}
  198. }
  199. func (lh *LightHouse) LhUpdateWorker(f EncWriter) {
  200. if lh.amLighthouse || lh.interval == 0 {
  201. return
  202. }
  203. for {
  204. ipp := []*IpAndPort{}
  205. for _, e := range *localIps(lh.localAllowList) {
  206. // Only add IPs that aren't my VPN/tun IP
  207. if ip2int(e) != lh.myIp {
  208. ipp = append(ipp, &IpAndPort{Ip: ip2int(e), Port: uint32(lh.nebulaPort)})
  209. //fmt.Println(e)
  210. }
  211. }
  212. m := &NebulaMeta{
  213. Type: NebulaMeta_HostUpdateNotification,
  214. Details: &NebulaMetaDetails{
  215. VpnIp: lh.myIp,
  216. IpAndPorts: ipp,
  217. },
  218. }
  219. lh.metricTx(NebulaMeta_HostUpdateNotification, int64(len(lh.lighthouses)))
  220. nb := make([]byte, 12, 12)
  221. out := make([]byte, mtu)
  222. for vpnIp := range lh.lighthouses {
  223. mm, err := proto.Marshal(m)
  224. if err != nil {
  225. l.Debugf("Invalid marshal to update")
  226. }
  227. //l.Error("LIGHTHOUSE PACKET SEND", mm)
  228. f.SendMessageToVpnIp(lightHouse, 0, vpnIp, mm, nb, out)
  229. }
  230. time.Sleep(time.Second * time.Duration(lh.interval))
  231. }
  232. }
  233. type LightHouseHandler struct {
  234. lh *LightHouse
  235. nb []byte
  236. out []byte
  237. meta *NebulaMeta
  238. iap []IpAndPort
  239. iapp []*IpAndPort
  240. }
  241. func (lh *LightHouse) NewRequestHandler() *LightHouseHandler {
  242. lhh := &LightHouseHandler{
  243. lh: lh,
  244. nb: make([]byte, 12, 12),
  245. out: make([]byte, mtu),
  246. meta: &NebulaMeta{
  247. Details: &NebulaMetaDetails{},
  248. },
  249. }
  250. lhh.resizeIpAndPorts(10)
  251. return lhh
  252. }
  253. // This method is similar to Reset(), but it re-uses the pointer structs
  254. // so that we don't have to re-allocate them
  255. func (lhh *LightHouseHandler) resetMeta() *NebulaMeta {
  256. details := lhh.meta.Details
  257. details.Reset()
  258. lhh.meta.Reset()
  259. lhh.meta.Details = details
  260. return lhh.meta
  261. }
  262. func (lhh *LightHouseHandler) resizeIpAndPorts(n int) {
  263. if cap(lhh.iap) < n {
  264. lhh.iap = make([]IpAndPort, n)
  265. lhh.iapp = make([]*IpAndPort, n)
  266. for i := range lhh.iap {
  267. lhh.iapp[i] = &lhh.iap[i]
  268. }
  269. }
  270. lhh.iap = lhh.iap[:n]
  271. lhh.iapp = lhh.iapp[:n]
  272. }
  273. func (lhh *LightHouseHandler) setIpAndPortsFromNetIps(ips []udpAddr) []*IpAndPort {
  274. lhh.resizeIpAndPorts(len(ips))
  275. for i, e := range ips {
  276. lhh.iap[i] = NewIpAndPortFromUDPAddr(e)
  277. }
  278. return lhh.iapp
  279. }
  280. func (lhh *LightHouseHandler) HandleRequest(rAddr *udpAddr, vpnIp uint32, p []byte, c *cert.NebulaCertificate, f EncWriter) {
  281. lh := lhh.lh
  282. n := lhh.resetMeta()
  283. err := proto.UnmarshalMerge(p, n)
  284. if err != nil {
  285. l.WithError(err).WithField("vpnIp", IntIp(vpnIp)).WithField("udpAddr", rAddr).
  286. Error("Failed to unmarshal lighthouse packet")
  287. //TODO: send recv_error?
  288. return
  289. }
  290. if n.Details == nil {
  291. l.WithField("vpnIp", IntIp(vpnIp)).WithField("udpAddr", rAddr).
  292. Error("Invalid lighthouse update")
  293. //TODO: send recv_error?
  294. return
  295. }
  296. lh.metricRx(n.Type, 1)
  297. switch n.Type {
  298. case NebulaMeta_HostQuery:
  299. // Exit if we don't answer queries
  300. if !lh.amLighthouse {
  301. l.Debugln("I don't answer queries, but received from: ", rAddr)
  302. return
  303. }
  304. //l.Debugln("Got Query")
  305. ips, err := lh.Query(n.Details.VpnIp, f)
  306. if err != nil {
  307. //l.Debugf("Can't answer query %s from %s because error: %s", IntIp(n.Details.VpnIp), rAddr, err)
  308. return
  309. } else {
  310. reqVpnIP := n.Details.VpnIp
  311. n = lhh.resetMeta()
  312. n.Type = NebulaMeta_HostQueryReply
  313. n.Details.VpnIp = reqVpnIP
  314. n.Details.IpAndPorts = lhh.setIpAndPortsFromNetIps(ips)
  315. reply, err := proto.Marshal(n)
  316. if err != nil {
  317. l.WithError(err).WithField("vpnIp", IntIp(vpnIp)).Error("Failed to marshal lighthouse host query reply")
  318. return
  319. }
  320. lh.metricTx(NebulaMeta_HostQueryReply, 1)
  321. f.SendMessageToVpnIp(lightHouse, 0, vpnIp, reply, lhh.nb, lhh.out[:0])
  322. // This signals the other side to punch some zero byte udp packets
  323. ips, err = lh.Query(vpnIp, f)
  324. if err != nil {
  325. l.WithField("vpnIp", IntIp(vpnIp)).Debugln("Can't notify host to punch")
  326. return
  327. } else {
  328. //l.Debugln("Notify host to punch", iap)
  329. n = lhh.resetMeta()
  330. n.Type = NebulaMeta_HostPunchNotification
  331. n.Details.VpnIp = vpnIp
  332. n.Details.IpAndPorts = lhh.setIpAndPortsFromNetIps(ips)
  333. reply, _ := proto.Marshal(n)
  334. lh.metricTx(NebulaMeta_HostPunchNotification, 1)
  335. f.SendMessageToVpnIp(lightHouse, 0, reqVpnIP, reply, lhh.nb, lhh.out[:0])
  336. }
  337. //fmt.Println(reply, remoteaddr)
  338. }
  339. case NebulaMeta_HostQueryReply:
  340. if !lh.IsLighthouseIP(vpnIp) {
  341. return
  342. }
  343. for _, a := range n.Details.IpAndPorts {
  344. //first := n.Details.IpAndPorts[0]
  345. ans := NewUDPAddr(a.Ip, uint16(a.Port))
  346. lh.AddRemote(n.Details.VpnIp, ans, false)
  347. }
  348. // Non-blocking attempt to trigger, skip if it would block
  349. select {
  350. case lh.handshakeTrigger <- n.Details.VpnIp:
  351. default:
  352. }
  353. case NebulaMeta_HostUpdateNotification:
  354. //Simple check that the host sent this not someone else
  355. if n.Details.VpnIp != vpnIp {
  356. l.WithField("vpnIp", IntIp(vpnIp)).WithField("answer", IntIp(n.Details.VpnIp)).Debugln("Host sent invalid update")
  357. return
  358. }
  359. for _, a := range n.Details.IpAndPorts {
  360. ans := NewUDPAddr(a.Ip, uint16(a.Port))
  361. lh.AddRemote(n.Details.VpnIp, ans, false)
  362. }
  363. case NebulaMeta_HostMovedNotification:
  364. case NebulaMeta_HostPunchNotification:
  365. if !lh.IsLighthouseIP(vpnIp) {
  366. return
  367. }
  368. empty := []byte{0}
  369. for _, a := range n.Details.IpAndPorts {
  370. vpnPeer := NewUDPAddr(a.Ip, uint16(a.Port))
  371. go func() {
  372. time.Sleep(lh.punchDelay)
  373. lh.metricHolepunchTx.Inc(1)
  374. lh.punchConn.WriteTo(empty, vpnPeer)
  375. }()
  376. l.Debugf("Punching %s on %d for %s", IntIp(a.Ip), a.Port, IntIp(n.Details.VpnIp))
  377. }
  378. // This sends a nebula test packet to the host trying to contact us. In the case
  379. // of a double nat or other difficult scenario, this may help establish
  380. // a tunnel.
  381. if lh.punchBack {
  382. go func() {
  383. time.Sleep(time.Second * 5)
  384. l.Debugf("Sending a nebula test packet to vpn ip %s", IntIp(n.Details.VpnIp))
  385. // TODO we have to allocate a new output buffer here since we are spawning a new goroutine
  386. // for each punchBack packet. We should move this into a timerwheel or a single goroutine
  387. // managed by a channel.
  388. f.SendMessageToVpnIp(test, testRequest, n.Details.VpnIp, []byte(""), make([]byte, 12, 12), make([]byte, mtu))
  389. }()
  390. }
  391. }
  392. }
  393. func (lh *LightHouse) metricRx(t NebulaMeta_MessageType, i int64) {
  394. lh.metrics.Rx(NebulaMessageType(t), 0, i)
  395. }
  396. func (lh *LightHouse) metricTx(t NebulaMeta_MessageType, i int64) {
  397. lh.metrics.Tx(NebulaMessageType(t), 0, i)
  398. }
  399. /*
  400. func (f *Interface) sendPathCheck(ci *ConnectionState, endpoint *net.UDPAddr, counter int) {
  401. c := ci.messageCounter
  402. b := HeaderEncode(nil, Version, uint8(path_check), 0, ci.remoteIndex, c)
  403. ci.messageCounter++
  404. if ci.eKey != nil {
  405. msg := ci.eKey.EncryptDanger(b, nil, []byte(strconv.Itoa(counter)), c)
  406. //msg := ci.eKey.EncryptDanger(b, nil, []byte(fmt.Sprintf("%d", counter)), c)
  407. f.outside.WriteTo(msg, endpoint)
  408. l.Debugf("path_check sent, remote index: %d, pathCounter %d", ci.remoteIndex, counter)
  409. }
  410. }
  411. func (f *Interface) sendPathCheckReply(ci *ConnectionState, endpoint *net.UDPAddr, counter []byte) {
  412. c := ci.messageCounter
  413. b := HeaderEncode(nil, Version, uint8(path_check_reply), 0, ci.remoteIndex, c)
  414. ci.messageCounter++
  415. if ci.eKey != nil {
  416. msg := ci.eKey.EncryptDanger(b, nil, counter, c)
  417. f.outside.WriteTo(msg, endpoint)
  418. l.Debugln("path_check sent, remote index: ", ci.remoteIndex)
  419. }
  420. }
  421. */