2
0

filter.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. // Package filter is a stateful packet filter.
  4. package filter
  5. import (
  6. "fmt"
  7. "net/netip"
  8. "slices"
  9. "sync"
  10. "time"
  11. "go4.org/netipx"
  12. "tailscale.com/envknob"
  13. "tailscale.com/net/flowtrack"
  14. "tailscale.com/net/netaddr"
  15. "tailscale.com/net/packet"
  16. "tailscale.com/tailcfg"
  17. "tailscale.com/tstime/rate"
  18. "tailscale.com/types/ipproto"
  19. "tailscale.com/types/logger"
  20. "tailscale.com/util/mak"
  21. )
  22. // Filter is a stateful packet filter.
  23. type Filter struct {
  24. logf logger.Logf
  25. // local is the set of IPs prefixes that we know to be "local" to
  26. // this node. All packets coming in over tailscale must have a
  27. // destination within local, regardless of the policy filter
  28. // below.
  29. local *netipx.IPSet
  30. // logIPs is the set of IPs that are allowed to appear in flow
  31. // logs. If a packet is to or from an IP not in logIPs, it will
  32. // never be logged.
  33. logIPs *netipx.IPSet
  34. // matches4 and matches6 are lists of match->action rules
  35. // applied to all packets arriving over tailscale
  36. // tunnels. Matches are checked in order, and processing stops
  37. // at the first matching rule. The default policy if no rules
  38. // match is to drop the packet.
  39. matches4 matches
  40. matches6 matches
  41. // cap4 and cap6 are the subsets of the matches that are about
  42. // capability grants, partitioned by source IP address family.
  43. cap4, cap6 matches
  44. // state is the connection tracking state attached to this
  45. // filter. It is used to allow incoming traffic that is a response
  46. // to an outbound connection that this node made, even if those
  47. // incoming packets don't get accepted by matches above.
  48. state *filterState
  49. shieldsUp bool
  50. }
  51. // filterState is a state cache of past seen packets.
  52. type filterState struct {
  53. mu sync.Mutex
  54. lru *flowtrack.Cache[struct{}] // from flowtrack.Tuple -> struct{}
  55. }
  56. // lruMax is the size of the LRU cache in filterState.
  57. const lruMax = 512
  58. // Response is a verdict from the packet filter.
  59. type Response int
  60. const (
  61. Drop Response = iota // do not continue processing packet.
  62. DropSilently // do not continue processing packet, but also don't log
  63. Accept // continue processing packet.
  64. noVerdict // no verdict yet, continue running filter
  65. )
  66. func (r Response) String() string {
  67. switch r {
  68. case Drop:
  69. return "Drop"
  70. case DropSilently:
  71. return "DropSilently"
  72. case Accept:
  73. return "Accept"
  74. case noVerdict:
  75. return "noVerdict"
  76. default:
  77. return "???"
  78. }
  79. }
  80. func (r Response) IsDrop() bool {
  81. return r == Drop || r == DropSilently
  82. }
  83. // RunFlags controls the filter's debug log verbosity at runtime.
  84. type RunFlags int
  85. const (
  86. LogDrops RunFlags = 1 << iota // write dropped packet info to logf
  87. LogAccepts // write accepted packet info to logf
  88. HexdumpDrops // print packet hexdump when logging drops
  89. HexdumpAccepts // print packet hexdump when logging accepts
  90. )
  91. // NewAllowAllForTest returns a packet filter that accepts
  92. // everything. Use in tests only, as it permits some kinds of spoofing
  93. // attacks to reach the OS network stack.
  94. func NewAllowAllForTest(logf logger.Logf) *Filter {
  95. any4 := netip.PrefixFrom(netaddr.IPv4(0, 0, 0, 0), 0)
  96. any6 := netip.PrefixFrom(netip.AddrFrom16([16]byte{}), 0)
  97. ms := []Match{
  98. {
  99. IPProto: []ipproto.Proto{ipproto.TCP, ipproto.UDP, ipproto.ICMPv4},
  100. Srcs: []netip.Prefix{any4},
  101. Dsts: []NetPortRange{
  102. {
  103. Net: any4,
  104. Ports: PortRange{
  105. First: 0,
  106. Last: 65535,
  107. },
  108. },
  109. },
  110. },
  111. {
  112. IPProto: []ipproto.Proto{ipproto.TCP, ipproto.UDP, ipproto.ICMPv6},
  113. Srcs: []netip.Prefix{any6},
  114. Dsts: []NetPortRange{
  115. {
  116. Net: any6,
  117. Ports: PortRange{
  118. First: 0,
  119. Last: 65535,
  120. },
  121. },
  122. },
  123. },
  124. }
  125. var sb netipx.IPSetBuilder
  126. sb.AddPrefix(any4)
  127. sb.AddPrefix(any6)
  128. ipSet, _ := sb.IPSet()
  129. return New(ms, ipSet, ipSet, nil, logf)
  130. }
  131. // NewAllowNone returns a packet filter that rejects everything.
  132. func NewAllowNone(logf logger.Logf, logIPs *netipx.IPSet) *Filter {
  133. return New(nil, &netipx.IPSet{}, logIPs, nil, logf)
  134. }
  135. // NewShieldsUpFilter returns a packet filter that rejects incoming connections.
  136. //
  137. // If shareStateWith is non-nil, the returned filter shares state with the previous one,
  138. // as long as the previous one was also a shields up filter.
  139. func NewShieldsUpFilter(localNets *netipx.IPSet, logIPs *netipx.IPSet, shareStateWith *Filter, logf logger.Logf) *Filter {
  140. // Don't permit sharing state with a prior filter that wasn't a shields-up filter.
  141. if shareStateWith != nil && !shareStateWith.shieldsUp {
  142. shareStateWith = nil
  143. }
  144. f := New(nil, localNets, logIPs, shareStateWith, logf)
  145. f.shieldsUp = true
  146. return f
  147. }
  148. // New creates a new packet filter. The filter enforces that incoming
  149. // packets must be destined to an IP in localNets, and must be allowed
  150. // by matches. If shareStateWith is non-nil, the returned filter
  151. // shares state with the previous one, to enable changing rules at
  152. // runtime without breaking existing stateful flows.
  153. func New(matches []Match, localNets *netipx.IPSet, logIPs *netipx.IPSet, shareStateWith *Filter, logf logger.Logf) *Filter {
  154. var state *filterState
  155. if shareStateWith != nil {
  156. state = shareStateWith.state
  157. } else {
  158. state = &filterState{
  159. lru: &flowtrack.Cache[struct{}]{MaxEntries: lruMax},
  160. }
  161. }
  162. f := &Filter{
  163. logf: logf,
  164. matches4: matchesFamily(matches, netip.Addr.Is4),
  165. matches6: matchesFamily(matches, netip.Addr.Is6),
  166. cap4: capMatchesFunc(matches, netip.Addr.Is4),
  167. cap6: capMatchesFunc(matches, netip.Addr.Is6),
  168. local: localNets,
  169. logIPs: logIPs,
  170. state: state,
  171. }
  172. return f
  173. }
  174. // matchesFamily returns the subset of ms for which keep(srcNet.IP)
  175. // and keep(dstNet.IP) are both true.
  176. func matchesFamily(ms matches, keep func(netip.Addr) bool) matches {
  177. var ret matches
  178. for _, m := range ms {
  179. var retm Match
  180. retm.IPProto = m.IPProto
  181. for _, src := range m.Srcs {
  182. if keep(src.Addr()) {
  183. retm.Srcs = append(retm.Srcs, src)
  184. }
  185. }
  186. for _, dst := range m.Dsts {
  187. if keep(dst.Net.Addr()) {
  188. retm.Dsts = append(retm.Dsts, dst)
  189. }
  190. }
  191. if len(retm.Srcs) > 0 && len(retm.Dsts) > 0 {
  192. ret = append(ret, retm)
  193. }
  194. }
  195. return ret
  196. }
  197. // capMatchesFunc returns a copy of the subset of ms for which keep(srcNet.IP)
  198. // and the match is a capability grant.
  199. func capMatchesFunc(ms matches, keep func(netip.Addr) bool) matches {
  200. var ret matches
  201. for _, m := range ms {
  202. if len(m.Caps) == 0 {
  203. continue
  204. }
  205. retm := Match{Caps: m.Caps}
  206. for _, src := range m.Srcs {
  207. if keep(src.Addr()) {
  208. retm.Srcs = append(retm.Srcs, src)
  209. }
  210. }
  211. if len(retm.Srcs) > 0 {
  212. ret = append(ret, retm)
  213. }
  214. }
  215. return ret
  216. }
  217. func maybeHexdump(flag RunFlags, b []byte) string {
  218. if flag == 0 {
  219. return ""
  220. }
  221. return packet.Hexdump(b) + "\n"
  222. }
  223. // TODO(apenwarr): use a bigger bucket for specifically TCP SYN accept logging?
  224. // Logging is a quick way to record every newly opened TCP connection, but
  225. // we have to be cautious about flooding the logs vs letting people use
  226. // flood protection to hide their traffic. We could use a rate limiter in
  227. // the actual *filter* for SYN accepts, perhaps.
  228. var acceptBucket = rate.NewLimiter(rate.Every(10*time.Second), 3)
  229. var dropBucket = rate.NewLimiter(rate.Every(5*time.Second), 10)
  230. // NOTE(Xe): This func init is used to detect
  231. // TS_DEBUG_FILTER_RATE_LIMIT_LOGS=all, and if it matches, to
  232. // effectively disable the limits on the log rate by setting the limit
  233. // to 1 millisecond. This should capture everything.
  234. func init() {
  235. if envknob.String("TS_DEBUG_FILTER_RATE_LIMIT_LOGS") != "all" {
  236. return
  237. }
  238. acceptBucket = rate.NewLimiter(rate.Every(time.Millisecond), 10)
  239. dropBucket = rate.NewLimiter(rate.Every(time.Millisecond), 10)
  240. }
  241. func (f *Filter) logRateLimit(runflags RunFlags, q *packet.Parsed, dir direction, r Response, why string) {
  242. if !f.loggingAllowed(q) {
  243. return
  244. }
  245. if r == Drop && omitDropLogging(q, dir) {
  246. return
  247. }
  248. var verdict string
  249. if r == Drop && (runflags&LogDrops) != 0 && dropBucket.Allow() {
  250. verdict = "Drop"
  251. runflags &= HexdumpDrops
  252. } else if r == Accept && (runflags&LogAccepts) != 0 && acceptBucket.Allow() {
  253. verdict = "Accept"
  254. runflags &= HexdumpAccepts
  255. }
  256. // Note: it is crucial that q.String() be called only if {accept,drop}Bucket.Allow() passes,
  257. // since it causes an allocation.
  258. if verdict != "" {
  259. b := q.Buffer()
  260. f.logf("%s: %s %d %s\n%s", verdict, q.String(), len(b), why, maybeHexdump(runflags, b))
  261. }
  262. }
  263. // dummyPacket is a 20-byte slice of garbage, to pass the filter
  264. // pre-check when evaluating synthesized packets.
  265. var dummyPacket = []byte{
  266. 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
  267. 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
  268. }
  269. // Check determines whether traffic from srcIP to dstIP:dstPort is allowed
  270. // using protocol proto.
  271. func (f *Filter) Check(srcIP, dstIP netip.Addr, dstPort uint16, proto ipproto.Proto) Response {
  272. pkt := &packet.Parsed{}
  273. pkt.Decode(dummyPacket) // initialize private fields
  274. switch {
  275. case (srcIP.Is4() && dstIP.Is6()) || (srcIP.Is6() && srcIP.Is4()):
  276. // Mismatched address families, no filters will
  277. // match.
  278. return Drop
  279. case srcIP.Is4():
  280. pkt.IPVersion = 4
  281. case srcIP.Is6():
  282. pkt.IPVersion = 6
  283. default:
  284. panic("unreachable")
  285. }
  286. pkt.Src = netip.AddrPortFrom(srcIP, 0)
  287. pkt.Dst = netip.AddrPortFrom(dstIP, dstPort)
  288. pkt.IPProto = proto
  289. if proto == ipproto.TCP {
  290. pkt.TCPFlags = packet.TCPSyn
  291. }
  292. return f.RunIn(pkt, 0)
  293. }
  294. // CheckTCP determines whether TCP traffic from srcIP to dstIP:dstPort
  295. // is allowed.
  296. func (f *Filter) CheckTCP(srcIP, dstIP netip.Addr, dstPort uint16) Response {
  297. return f.Check(srcIP, dstIP, dstPort, ipproto.TCP)
  298. }
  299. // CapsWithValues appends to base the capabilities that srcIP has talking
  300. // to dstIP.
  301. func (f *Filter) CapsWithValues(srcIP, dstIP netip.Addr) tailcfg.PeerCapMap {
  302. var mm matches
  303. switch {
  304. case srcIP.Is4():
  305. mm = f.cap4
  306. case srcIP.Is6():
  307. mm = f.cap6
  308. }
  309. var out tailcfg.PeerCapMap
  310. for _, m := range mm {
  311. if !ipInList(srcIP, m.Srcs) {
  312. continue
  313. }
  314. for _, cm := range m.Caps {
  315. if cm.Cap != "" && cm.Dst.Contains(dstIP) {
  316. prev, ok := out[cm.Cap]
  317. if !ok {
  318. mak.Set(&out, cm.Cap, slices.Clone(cm.Values))
  319. continue
  320. }
  321. out[cm.Cap] = append(prev, cm.Values...)
  322. }
  323. }
  324. }
  325. return out
  326. }
  327. // ShieldsUp reports whether this is a "shields up" (block everything
  328. // incoming) filter.
  329. func (f *Filter) ShieldsUp() bool { return f.shieldsUp }
  330. // RunIn determines whether this node is allowed to receive q from a
  331. // Tailscale peer.
  332. func (f *Filter) RunIn(q *packet.Parsed, rf RunFlags) Response {
  333. dir := in
  334. r := f.pre(q, rf, dir)
  335. if r == Accept || r == Drop {
  336. // already logged
  337. return r
  338. }
  339. var why string
  340. switch q.IPVersion {
  341. case 4:
  342. r, why = f.runIn4(q)
  343. case 6:
  344. r, why = f.runIn6(q)
  345. default:
  346. r, why = Drop, "not-ip"
  347. }
  348. f.logRateLimit(rf, q, dir, r, why)
  349. return r
  350. }
  351. // RunOut determines whether this node is allowed to send q to a
  352. // Tailscale peer.
  353. func (f *Filter) RunOut(q *packet.Parsed, rf RunFlags) Response {
  354. dir := out
  355. r := f.pre(q, rf, dir)
  356. if r == Accept || r == Drop {
  357. // already logged
  358. return r
  359. }
  360. r, why := f.runOut(q)
  361. f.logRateLimit(rf, q, dir, r, why)
  362. return r
  363. }
  364. var unknownProtoStringCache sync.Map // ipproto.Proto -> string
  365. func unknownProtoString(proto ipproto.Proto) string {
  366. if v, ok := unknownProtoStringCache.Load(proto); ok {
  367. return v.(string)
  368. }
  369. s := fmt.Sprintf("unknown-protocol-%d", proto)
  370. unknownProtoStringCache.Store(proto, s)
  371. return s
  372. }
  373. func (f *Filter) runIn4(q *packet.Parsed) (r Response, why string) {
  374. // A compromised peer could try to send us packets for
  375. // destinations we didn't explicitly advertise. This check is to
  376. // prevent that.
  377. if !f.local.Contains(q.Dst.Addr()) {
  378. return Drop, "destination not allowed"
  379. }
  380. switch q.IPProto {
  381. case ipproto.ICMPv4:
  382. if q.IsEchoResponse() || q.IsError() {
  383. // ICMP responses are allowed.
  384. // TODO(apenwarr): consider using conntrack state.
  385. // We could choose to reject all packets that aren't
  386. // related to an existing ICMP-Echo, TCP, or UDP
  387. // session.
  388. return Accept, "icmp response ok"
  389. } else if f.matches4.matchIPsOnly(q) {
  390. // If any port is open to an IP, allow ICMP to it.
  391. return Accept, "icmp ok"
  392. }
  393. case ipproto.TCP:
  394. // For TCP, we want to allow *outgoing* connections,
  395. // which means we want to allow return packets on those
  396. // connections. To make this restriction work, we need to
  397. // allow non-SYN packets (continuation of an existing session)
  398. // to arrive. This should be okay since a new incoming session
  399. // can't be initiated without first sending a SYN.
  400. // It happens to also be much faster.
  401. // TODO(apenwarr): Skip the rest of decoding in this path?
  402. if !q.IsTCPSyn() {
  403. return Accept, "tcp non-syn"
  404. }
  405. if f.matches4.match(q) {
  406. return Accept, "tcp ok"
  407. }
  408. case ipproto.UDP, ipproto.SCTP:
  409. t := flowtrack.Tuple{Proto: q.IPProto, Src: q.Src, Dst: q.Dst}
  410. f.state.mu.Lock()
  411. _, ok := f.state.lru.Get(t)
  412. f.state.mu.Unlock()
  413. if ok {
  414. return Accept, "cached"
  415. }
  416. if f.matches4.match(q) {
  417. return Accept, "ok"
  418. }
  419. case ipproto.TSMP:
  420. return Accept, "tsmp ok"
  421. default:
  422. if f.matches4.matchProtoAndIPsOnlyIfAllPorts(q) {
  423. return Accept, "other-portless ok"
  424. }
  425. return Drop, unknownProtoString(q.IPProto)
  426. }
  427. return Drop, "no rules matched"
  428. }
  429. func (f *Filter) runIn6(q *packet.Parsed) (r Response, why string) {
  430. // A compromised peer could try to send us packets for
  431. // destinations we didn't explicitly advertise. This check is to
  432. // prevent that.
  433. if !f.local.Contains(q.Dst.Addr()) {
  434. return Drop, "destination not allowed"
  435. }
  436. switch q.IPProto {
  437. case ipproto.ICMPv6:
  438. if q.IsEchoResponse() || q.IsError() {
  439. // ICMP responses are allowed.
  440. // TODO(apenwarr): consider using conntrack state.
  441. // We could choose to reject all packets that aren't
  442. // related to an existing ICMP-Echo, TCP, or UDP
  443. // session.
  444. return Accept, "icmp response ok"
  445. } else if f.matches6.matchIPsOnly(q) {
  446. // If any port is open to an IP, allow ICMP to it.
  447. return Accept, "icmp ok"
  448. }
  449. case ipproto.TCP:
  450. // For TCP, we want to allow *outgoing* connections,
  451. // which means we want to allow return packets on those
  452. // connections. To make this restriction work, we need to
  453. // allow non-SYN packets (continuation of an existing session)
  454. // to arrive. This should be okay since a new incoming session
  455. // can't be initiated without first sending a SYN.
  456. // It happens to also be much faster.
  457. // TODO(apenwarr): Skip the rest of decoding in this path?
  458. if q.IPProto == ipproto.TCP && !q.IsTCPSyn() {
  459. return Accept, "tcp non-syn"
  460. }
  461. if f.matches6.match(q) {
  462. return Accept, "tcp ok"
  463. }
  464. case ipproto.UDP, ipproto.SCTP:
  465. t := flowtrack.Tuple{Proto: q.IPProto, Src: q.Src, Dst: q.Dst}
  466. f.state.mu.Lock()
  467. _, ok := f.state.lru.Get(t)
  468. f.state.mu.Unlock()
  469. if ok {
  470. return Accept, "cached"
  471. }
  472. if f.matches6.match(q) {
  473. return Accept, "ok"
  474. }
  475. case ipproto.TSMP:
  476. return Accept, "tsmp ok"
  477. default:
  478. if f.matches6.matchProtoAndIPsOnlyIfAllPorts(q) {
  479. return Accept, "other-portless ok"
  480. }
  481. return Drop, unknownProtoString(q.IPProto)
  482. }
  483. return Drop, "no rules matched"
  484. }
  485. // runIn runs the output-specific part of the filter logic.
  486. func (f *Filter) runOut(q *packet.Parsed) (r Response, why string) {
  487. switch q.IPProto {
  488. case ipproto.UDP, ipproto.SCTP:
  489. tuple := flowtrack.Tuple{
  490. Proto: q.IPProto,
  491. Src: q.Dst, Dst: q.Src, // src/dst reversed
  492. }
  493. f.state.mu.Lock()
  494. f.state.lru.Add(tuple, struct{}{})
  495. f.state.mu.Unlock()
  496. }
  497. return Accept, "ok out"
  498. }
  499. // direction is whether a packet was flowing into this machine, or
  500. // flowing out.
  501. type direction int
  502. const (
  503. in direction = iota // from Tailscale peer to local machine
  504. out // from local machine to Tailscale peer
  505. )
  506. func (d direction) String() string {
  507. switch d {
  508. case in:
  509. return "in"
  510. case out:
  511. return "out"
  512. default:
  513. return fmt.Sprintf("[??dir=%d]", int(d))
  514. }
  515. }
  516. var gcpDNSAddr = netaddr.IPv4(169, 254, 169, 254)
  517. // pre runs the direction-agnostic filter logic. dir is only used for
  518. // logging.
  519. func (f *Filter) pre(q *packet.Parsed, rf RunFlags, dir direction) Response {
  520. if len(q.Buffer()) == 0 {
  521. // wireguard keepalive packet, always permit.
  522. return Accept
  523. }
  524. if len(q.Buffer()) < 20 {
  525. f.logRateLimit(rf, q, dir, Drop, "too short")
  526. return Drop
  527. }
  528. if q.Dst.Addr().IsMulticast() {
  529. f.logRateLimit(rf, q, dir, Drop, "multicast")
  530. return Drop
  531. }
  532. if q.Dst.Addr().IsLinkLocalUnicast() && q.Dst.Addr() != gcpDNSAddr {
  533. f.logRateLimit(rf, q, dir, Drop, "link-local-unicast")
  534. return Drop
  535. }
  536. if q.IPProto == ipproto.Fragment {
  537. // Fragments after the first always need to be passed through.
  538. // Very small fragments are considered Junk by Parsed.
  539. f.logRateLimit(rf, q, dir, Accept, "fragment")
  540. return Accept
  541. }
  542. return noVerdict
  543. }
  544. // loggingAllowed reports whether p can appear in logs at all.
  545. func (f *Filter) loggingAllowed(p *packet.Parsed) bool {
  546. return f.logIPs.Contains(p.Src.Addr()) && f.logIPs.Contains(p.Dst.Addr())
  547. }
  548. // omitDropLogging reports whether packet p, which has already been
  549. // deemed a packet to Drop, should bypass the [rate-limited] logging.
  550. // We don't want to log scary & spammy reject warnings for packets
  551. // that are totally normal, like IPv6 route announcements.
  552. func omitDropLogging(p *packet.Parsed, dir direction) bool {
  553. if dir != out {
  554. return false
  555. }
  556. return p.Dst.Addr().IsMulticast() || (p.Dst.Addr().IsLinkLocalUnicast() && p.Dst.Addr() != gcpDNSAddr) || p.IPProto == ipproto.IGMP
  557. }