nftables_runner.go 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. //go:build linux
  4. package linuxfw
  5. import (
  6. "encoding/binary"
  7. "encoding/hex"
  8. "errors"
  9. "fmt"
  10. "net"
  11. "net/netip"
  12. "reflect"
  13. "strings"
  14. "github.com/google/nftables"
  15. "github.com/google/nftables/expr"
  16. "golang.org/x/sys/unix"
  17. "tailscale.com/net/tsaddr"
  18. "tailscale.com/types/logger"
  19. "tailscale.com/types/ptr"
  20. )
  21. const (
  22. chainNameForward = "ts-forward"
  23. chainNameInput = "ts-input"
  24. chainNamePostrouting = "ts-postrouting"
  25. )
  26. // chainTypeRegular is an nftables chain that does not apply to a hook.
  27. const chainTypeRegular = ""
  28. type chainInfo struct {
  29. table *nftables.Table
  30. name string
  31. chainType nftables.ChainType
  32. chainHook *nftables.ChainHook
  33. chainPriority *nftables.ChainPriority
  34. chainPolicy *nftables.ChainPolicy
  35. }
  36. // nftable contains nat and filter tables for the given IP family (Proto).
  37. type nftable struct {
  38. Proto nftables.TableFamily // IPv4 or IPv6
  39. Filter *nftables.Table
  40. Nat *nftables.Table
  41. }
  42. // nftablesRunner implements a netfilterRunner using the netlink based nftables
  43. // library. As nftables allows for arbitrary tables and chains, there is a need
  44. // to follow conventions in order to integrate well with a surrounding
  45. // ecosystem. The rules installed by nftablesRunner have the following
  46. // properties:
  47. // - Install rules that intend to take precedence over rules installed by
  48. // other software. Tailscale provides packet filtering for tailnet traffic
  49. // inside the daemon based on the tailnet ACL rules.
  50. // - As nftables "accept" is not final, rules from high priority tables (low
  51. // numbers) will fall through to lower priority tables (high numbers). In
  52. // order to effectively be 'final', we install "jump" rules into conventional
  53. // tables and chains that will reach an accept verdict inside those tables.
  54. // - The table and chain conventions followed here are those used by
  55. // `iptables-nft` and `ufw`, so that those tools co-exist and do not
  56. // negatively affect Tailscale function.
  57. // - Be mindful that 1) all chains attached to a given hook (i.e the forward hook)
  58. // will be processed in priority order till either a rule in one of the chains issues a drop verdict
  59. // or there are no more chains for that hook
  60. // 2) processing of individual rules within a chain will stop once one of them issues a final verdict (accept, drop).
  61. // https://wiki.nftables.org/wiki-nftables/index.php/Configuring_chains
  62. type nftablesRunner struct {
  63. conn *nftables.Conn
  64. nft4 *nftable // IPv4 tables, never nil
  65. nft6 *nftable // IPv6 tables or nil if the system does not support IPv6
  66. v6Available bool // whether the host supports IPv6
  67. }
  68. func (n *nftablesRunner) ensurePreroutingChain(dst netip.Addr) (*nftables.Table, *nftables.Chain, error) {
  69. polAccept := nftables.ChainPolicyAccept
  70. table, err := n.getNFTByAddr(dst)
  71. if err != nil {
  72. return nil, nil, fmt.Errorf("error setting up nftables for IP family of %v: %w", dst, err)
  73. }
  74. nat, err := createTableIfNotExist(n.conn, table.Proto, "nat")
  75. if err != nil {
  76. return nil, nil, fmt.Errorf("error ensuring nat table: %w", err)
  77. }
  78. // ensure prerouting chain exists
  79. preroutingCh, err := getOrCreateChain(n.conn, chainInfo{
  80. table: nat,
  81. name: "PREROUTING",
  82. chainType: nftables.ChainTypeNAT,
  83. chainHook: nftables.ChainHookPrerouting,
  84. chainPriority: nftables.ChainPriorityNATDest,
  85. chainPolicy: &polAccept,
  86. })
  87. if err != nil {
  88. return nil, nil, fmt.Errorf("error ensuring prerouting chain: %w", err)
  89. }
  90. return nat, preroutingCh, nil
  91. }
  92. func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error {
  93. nat, preroutingCh, err := n.ensurePreroutingChain(dst)
  94. if err != nil {
  95. return err
  96. }
  97. var daddrOffset, fam, dadderLen uint32
  98. if origDst.Is4() {
  99. daddrOffset = 16
  100. dadderLen = 4
  101. fam = unix.NFPROTO_IPV4
  102. } else {
  103. daddrOffset = 24
  104. dadderLen = 16
  105. fam = unix.NFPROTO_IPV6
  106. }
  107. dnatRule := &nftables.Rule{
  108. Table: nat,
  109. Chain: preroutingCh,
  110. Exprs: []expr.Any{
  111. &expr.Payload{
  112. DestRegister: 1,
  113. Base: expr.PayloadBaseNetworkHeader,
  114. Offset: daddrOffset,
  115. Len: dadderLen,
  116. },
  117. &expr.Cmp{
  118. Op: expr.CmpOpEq,
  119. Register: 1,
  120. Data: origDst.AsSlice(),
  121. },
  122. &expr.Immediate{
  123. Register: 1,
  124. Data: dst.AsSlice(),
  125. },
  126. &expr.NAT{
  127. Type: expr.NATTypeDestNAT,
  128. Family: fam,
  129. RegAddrMin: 1,
  130. },
  131. },
  132. }
  133. n.conn.InsertRule(dnatRule)
  134. return n.conn.Flush()
  135. }
  136. // DNATWithLoadBalancer currently just forwards all traffic destined for origDst
  137. // to the first IP address from the backend targets.
  138. // TODO (irbekrm): instead of doing this load balance traffic evenly to all
  139. // backend destinations.
  140. // https://github.com/tailscale/tailscale/commit/d37f2f508509c6c35ad724fd75a27685b90b575b#diff-a3bcbcd1ca198799f4f768dc56fea913e1945a6b3ec9dbec89325a84a19a85e7R148-R232
  141. func (n *nftablesRunner) DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error {
  142. return n.AddDNATRule(origDst, dsts[0])
  143. }
  144. func (n *nftablesRunner) DNATNonTailscaleTraffic(tunname string, dst netip.Addr) error {
  145. nat, preroutingCh, err := n.ensurePreroutingChain(dst)
  146. if err != nil {
  147. return err
  148. }
  149. var famConst uint32
  150. if dst.Is4() {
  151. famConst = unix.NFPROTO_IPV4
  152. } else {
  153. famConst = unix.NFPROTO_IPV6
  154. }
  155. dnatRule := &nftables.Rule{
  156. Table: nat,
  157. Chain: preroutingCh,
  158. Exprs: []expr.Any{
  159. &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
  160. &expr.Cmp{
  161. Op: expr.CmpOpNeq,
  162. Register: 1,
  163. Data: []byte(tunname),
  164. },
  165. &expr.Immediate{
  166. Register: 1,
  167. Data: dst.AsSlice(),
  168. },
  169. &expr.NAT{
  170. Type: expr.NATTypeDestNAT,
  171. Family: famConst,
  172. RegAddrMin: 1,
  173. },
  174. },
  175. }
  176. n.conn.InsertRule(dnatRule)
  177. return n.conn.Flush()
  178. }
  179. func (n *nftablesRunner) EnsureSNATForDst(src, dst netip.Addr) error {
  180. polAccept := nftables.ChainPolicyAccept
  181. table, err := n.getNFTByAddr(dst)
  182. if err != nil {
  183. return fmt.Errorf("error setting up nftables for IP family of %v: %w", dst, err)
  184. }
  185. nat, err := createTableIfNotExist(n.conn, table.Proto, "nat")
  186. if err != nil {
  187. return fmt.Errorf("error ensuring nat table exists: %w", err)
  188. }
  189. // ensure postrouting chain exists
  190. postRoutingCh, err := getOrCreateChain(n.conn, chainInfo{
  191. table: nat,
  192. name: "POSTROUTING",
  193. chainType: nftables.ChainTypeNAT,
  194. chainHook: nftables.ChainHookPostrouting,
  195. chainPriority: nftables.ChainPriorityNATSource,
  196. chainPolicy: &polAccept,
  197. })
  198. if err != nil {
  199. return fmt.Errorf("error ensuring postrouting chain: %w", err)
  200. }
  201. rules, err := n.conn.GetRules(nat, postRoutingCh)
  202. if err != nil {
  203. return fmt.Errorf("error listing rules: %w", err)
  204. }
  205. snatRulePrefixMatch := fmt.Sprintf("dst:%s,src:", dst.String())
  206. snatRuleFullMatch := fmt.Sprintf("%s%s", snatRulePrefixMatch, src.String())
  207. for _, rule := range rules {
  208. current := string(rule.UserData)
  209. if strings.HasPrefix(string(rule.UserData), snatRulePrefixMatch) {
  210. if strings.EqualFold(current, snatRuleFullMatch) {
  211. return nil // already exists, do nothing
  212. }
  213. if err := n.conn.DelRule(rule); err != nil {
  214. return fmt.Errorf("error deleting SNAT rule: %w", err)
  215. }
  216. }
  217. }
  218. rule := snatRule(nat, postRoutingCh, src, dst, []byte(snatRuleFullMatch))
  219. n.conn.AddRule(rule)
  220. return n.conn.Flush()
  221. }
  222. // ClampMSSToPMTU ensures that all packets with TCP flags (SYN, ACK, RST) set
  223. // being forwarded via the given interface (tun) have MSS set to <MTU of the
  224. // interface> - 40 (IP and TCP headers). This can be useful if this tailscale
  225. // instance is expected to run as a forwarding proxy, forwarding packets from an
  226. // endpoint with higher MTU in an environment where path MTU discovery is
  227. // expected to not work (such as the proxies created by the Tailscale Kubernetes
  228. // operator). ClamMSSToPMTU creates a new base-chain ts-clamp in the filter
  229. // table with accept policy and priority -150. In practice, this means that for
  230. // SYN packets the clamp rule in this chain will likely run first and accept the
  231. // packet. This is fine because 1) nftables run ALL chains with the same hook
  232. // type unless a rule in one of them drops the packet and 2) this chain does not
  233. // have functionality to drop the packet- so in practice a matching clamp rule
  234. // will always be followed by the custom tailscale filtering rules in the other
  235. // chains attached to the filter hook (FORWARD, ts-forward).
  236. // We do not want to place the clamping rule into FORWARD/ts-forward chains
  237. // because wgengine populates those chains with rules that contain accept
  238. // verdicts that would cause no further procesing within that chain. This
  239. // functionality is currently invoked from outside wgengine (containerboot), so
  240. // we don't want to race with wgengine for rule ordering within chains.
  241. func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
  242. polAccept := nftables.ChainPolicyAccept
  243. table, err := n.getNFTByAddr(addr)
  244. if err != nil {
  245. return fmt.Errorf("error setting up nftables for IP family of %v: %w", addr, err)
  246. }
  247. filterTable, err := createTableIfNotExist(n.conn, table.Proto, "filter")
  248. if err != nil {
  249. return fmt.Errorf("error ensuring filter table: %w", err)
  250. }
  251. // ensure ts-clamp chain exists
  252. fwChain, err := getOrCreateChain(n.conn, chainInfo{
  253. table: filterTable,
  254. name: "ts-clamp",
  255. chainType: nftables.ChainTypeFilter,
  256. chainHook: nftables.ChainHookForward,
  257. chainPriority: nftables.ChainPriorityMangle,
  258. chainPolicy: &polAccept,
  259. })
  260. if err != nil {
  261. return fmt.Errorf("error ensuring forward chain: %w", err)
  262. }
  263. clampRule := &nftables.Rule{
  264. Table: filterTable,
  265. Chain: fwChain,
  266. Exprs: []expr.Any{
  267. &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
  268. &expr.Cmp{
  269. Op: expr.CmpOpEq,
  270. Register: 1,
  271. Data: []byte(tun),
  272. },
  273. &expr.Meta{Key: expr.MetaKeyL4PROTO, Register: 1},
  274. &expr.Cmp{
  275. Op: expr.CmpOpEq,
  276. Register: 1,
  277. Data: []byte{unix.IPPROTO_TCP},
  278. },
  279. &expr.Payload{
  280. DestRegister: 1,
  281. Base: expr.PayloadBaseTransportHeader,
  282. Offset: 13,
  283. Len: 1,
  284. },
  285. &expr.Bitwise{
  286. DestRegister: 1,
  287. SourceRegister: 1,
  288. Len: 1,
  289. Mask: []byte{0x02},
  290. Xor: []byte{0x00},
  291. },
  292. &expr.Cmp{
  293. Op: expr.CmpOpNeq, // match any packet with a TCP flag set (SYN, ACK, RST)
  294. Register: 1,
  295. Data: []byte{0x00},
  296. },
  297. &expr.Rt{
  298. Register: 1,
  299. Key: expr.RtTCPMSS,
  300. },
  301. &expr.Byteorder{
  302. DestRegister: 1,
  303. SourceRegister: 1,
  304. Op: expr.ByteorderHton,
  305. Len: 2,
  306. Size: 2,
  307. },
  308. &expr.Exthdr{
  309. SourceRegister: 1,
  310. Type: 2,
  311. Offset: 2,
  312. Len: 2,
  313. Op: expr.ExthdrOpTcpopt,
  314. },
  315. },
  316. }
  317. n.conn.AddRule(clampRule)
  318. return n.conn.Flush()
  319. }
  320. // deleteTableIfExists deletes a nftables table via connection c if it exists
  321. // within the given family.
  322. func deleteTableIfExists(c *nftables.Conn, family nftables.TableFamily, name string) error {
  323. t, err := getTableIfExists(c, family, name)
  324. if err != nil {
  325. return fmt.Errorf("get table: %w", err)
  326. }
  327. if t == nil {
  328. // Table does not exist, so nothing to delete.
  329. return nil
  330. }
  331. c.DelTable(t)
  332. if err := c.Flush(); err != nil {
  333. if t, err = getTableIfExists(c, family, name); t == nil && err == nil {
  334. // Check if the table still exists. If it does not, then the error
  335. // is due to the table not existing, so we can ignore it. Maybe a
  336. // concurrent process deleted the table.
  337. return nil
  338. }
  339. return fmt.Errorf("del table: %w", err)
  340. }
  341. return nil
  342. }
  343. // getTableIfExists returns the table with the given name from the given family
  344. // if it exists. If none match, it returns (nil, nil).
  345. func getTableIfExists(c *nftables.Conn, family nftables.TableFamily, name string) (*nftables.Table, error) {
  346. tables, err := c.ListTables()
  347. if err != nil {
  348. return nil, fmt.Errorf("get tables: %w", err)
  349. }
  350. for _, table := range tables {
  351. if table.Name == name && table.Family == family {
  352. return table, nil
  353. }
  354. }
  355. return nil, nil
  356. }
  357. // createTableIfNotExist creates a nftables table via connection c if it does
  358. // not exist within the given family.
  359. func createTableIfNotExist(c *nftables.Conn, family nftables.TableFamily, name string) (*nftables.Table, error) {
  360. if t, err := getTableIfExists(c, family, name); err != nil {
  361. return nil, fmt.Errorf("get table: %w", err)
  362. } else if t != nil {
  363. return t, nil
  364. }
  365. t := c.AddTable(&nftables.Table{
  366. Family: family,
  367. Name: name,
  368. })
  369. if err := c.Flush(); err != nil {
  370. return nil, fmt.Errorf("add table: %w", err)
  371. }
  372. return t, nil
  373. }
  374. type errorChainNotFound struct {
  375. chainName string
  376. tableName string
  377. }
  378. func (e errorChainNotFound) Error() string {
  379. return fmt.Sprintf("chain %s not found in table %s", e.chainName, e.tableName)
  380. }
  381. // getChainFromTable returns the chain with the given name from the given table.
  382. // Note that a chain name is unique within a table.
  383. func getChainFromTable(c *nftables.Conn, table *nftables.Table, name string) (*nftables.Chain, error) {
  384. chains, err := c.ListChainsOfTableFamily(table.Family)
  385. if err != nil {
  386. return nil, fmt.Errorf("list chains: %w", err)
  387. }
  388. for _, chain := range chains {
  389. // Table family is already checked so table name is unique
  390. if chain.Table.Name == table.Name && chain.Name == name {
  391. return chain, nil
  392. }
  393. }
  394. return nil, errorChainNotFound{table.Name, name}
  395. }
  396. // isTSChain reports whether `name` begins with "ts-" (and is thus a
  397. // Tailscale-managed chain).
  398. func isTSChain(name string) bool {
  399. return strings.HasPrefix(name, "ts-")
  400. }
  401. // createChainIfNotExist creates a chain with the given name in the given table
  402. // if it does not exist.
  403. func createChainIfNotExist(c *nftables.Conn, cinfo chainInfo) error {
  404. _, err := getOrCreateChain(c, cinfo)
  405. return err
  406. }
  407. func getOrCreateChain(c *nftables.Conn, cinfo chainInfo) (*nftables.Chain, error) {
  408. chain, err := getChainFromTable(c, cinfo.table, cinfo.name)
  409. if err != nil && !errors.Is(err, errorChainNotFound{cinfo.table.Name, cinfo.name}) {
  410. return nil, fmt.Errorf("get chain: %w", err)
  411. } else if err == nil {
  412. // The chain already exists. If it is a TS chain, check the
  413. // type/hook/priority, but for "conventional chains" assume they're what
  414. // we expect (in case iptables-nft/ufw make minor behavior changes in
  415. // the future).
  416. if isTSChain(chain.Name) && (chain.Type != cinfo.chainType || *chain.Hooknum != *cinfo.chainHook || *chain.Priority != *cinfo.chainPriority) {
  417. return nil, fmt.Errorf("chain %s already exists with different type/hook/priority", cinfo.name)
  418. }
  419. return chain, nil
  420. }
  421. chain = c.AddChain(&nftables.Chain{
  422. Name: cinfo.name,
  423. Table: cinfo.table,
  424. Type: cinfo.chainType,
  425. Hooknum: cinfo.chainHook,
  426. Priority: cinfo.chainPriority,
  427. Policy: cinfo.chainPolicy,
  428. })
  429. if err := c.Flush(); err != nil {
  430. return nil, fmt.Errorf("add chain: %w", err)
  431. }
  432. return chain, nil
  433. }
  434. // NetfilterRunner abstracts helpers to run netfilter commands. It is
  435. // implemented by linuxfw.IPTablesRunner and linuxfw.NfTablesRunner.
  436. type NetfilterRunner interface {
  437. // AddLoopbackRule adds a rule to permit loopback traffic to addr. This rule
  438. // is added only if it does not already exist.
  439. AddLoopbackRule(addr netip.Addr) error
  440. // DelLoopbackRule removes the rule added by AddLoopbackRule.
  441. DelLoopbackRule(addr netip.Addr) error
  442. // AddHooks adds rules to conventional chains like "FORWARD", "INPUT" and
  443. // "POSTROUTING" to jump from those chains to tailscale chains.
  444. AddHooks() error
  445. // DelHooks deletes rules added by AddHooks.
  446. DelHooks(logf logger.Logf) error
  447. // AddChains creates custom Tailscale chains.
  448. AddChains() error
  449. // DelChains removes chains added by AddChains.
  450. DelChains() error
  451. // AddBase adds rules reused by different other rules.
  452. AddBase(tunname string) error
  453. // DelBase removes rules added by AddBase.
  454. DelBase() error
  455. // AddSNATRule adds the netfilter rule to SNAT incoming traffic over
  456. // the Tailscale interface destined for local subnets. An error is
  457. // returned if the rule already exists.
  458. AddSNATRule() error
  459. // DelSNATRule removes the rule added by AddSNATRule.
  460. DelSNATRule() error
  461. // AddStatefulRule adds a netfilter rule for stateful packet filtering
  462. // using conntrack.
  463. AddStatefulRule(tunname string) error
  464. // DelStatefulRule removes a netfilter rule for stateful packet filtering
  465. // using conntrack.
  466. DelStatefulRule(tunname string) error
  467. // HasIPV6 reports true if the system supports IPv6.
  468. HasIPV6() bool
  469. // HasIPV6NAT reports true if the system supports IPv6 NAT.
  470. HasIPV6NAT() bool
  471. // HasIPV6Filter reports true if the system supports IPv6 filter tables
  472. // This is only meaningful for iptables implementation, where hosts have
  473. // partial ipables support (i.e missing filter table). For nftables
  474. // implementation, this will default to the value of HasIPv6().
  475. HasIPV6Filter() bool
  476. // AddDNATRule adds a rule to the nat/PREROUTING chain to DNAT traffic
  477. // destined for the given original destination to the given new destination.
  478. // This is used to forward all traffic destined for the Tailscale interface
  479. // to the provided destination, as used in the Kubernetes ingress proxies.
  480. AddDNATRule(origDst, dst netip.Addr) error
  481. // DNATWithLoadBalancer adds a rule to the nat/PREROUTING chain to DNAT
  482. // traffic destined for the given original destination to the given new
  483. // destination(s) using round robin to load balance if more than one
  484. // destination is provided. This is used to forward all traffic destined
  485. // for the Tailscale interface to the provided destination(s), as used
  486. // in the Kubernetes ingress proxies.
  487. DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error
  488. // EnsureSNATForDst sets up firewall to mask the source for traffic destined for dst to src:
  489. // - creates a SNAT rule if it doesn't already exist
  490. // - deletes any pre-existing rules matching the destination
  491. // This is used to forward traffic destined for the local machine over
  492. // the Tailscale interface, as used in the Kubernetes egress proxies.
  493. EnsureSNATForDst(src, dst netip.Addr) error
  494. // DNATNonTailscaleTraffic adds a rule to the nat/PREROUTING chain to DNAT
  495. // all traffic inbound from any interface except exemptInterface to dst.
  496. // This is used to forward traffic destined for the local machine over
  497. // the Tailscale interface, as used in the Kubernetes egress proxies.
  498. DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error
  499. EnsurePortMapRuleForSvc(svc, tun string, targetIP netip.Addr, pm PortMap) error
  500. DeletePortMapRuleForSvc(svc, tun string, targetIP netip.Addr, pm PortMap) error
  501. DeleteSvc(svc, tun string, targetIPs []netip.Addr, pm []PortMap) error
  502. // ClampMSSToPMTU adds a rule to the mangle/FORWARD chain to clamp MSS for
  503. // traffic destined for the provided tun interface.
  504. ClampMSSToPMTU(tun string, addr netip.Addr) error
  505. // AddMagicsockPortRule adds a rule to the ts-input chain to accept
  506. // incoming traffic on the specified port, to allow magicsock to
  507. // communicate.
  508. AddMagicsockPortRule(port uint16, network string) error
  509. // DelMagicsockPortRule removes the rule created by AddMagicsockPortRule,
  510. // if it exists.
  511. DelMagicsockPortRule(port uint16, network string) error
  512. }
  513. // New creates a NetfilterRunner, auto-detecting whether to use
  514. // nftables or iptables.
  515. // As nftables is still experimental, iptables will be used unless
  516. // either the TS_DEBUG_FIREWALL_MODE environment variable, or the prefHint
  517. // parameter, is set to one of "nftables" or "auto".
  518. func New(logf logger.Logf, prefHint string) (NetfilterRunner, error) {
  519. mode := detectFirewallMode(logf, prefHint)
  520. switch mode {
  521. case FirewallModeIPTables:
  522. // Note that we don't simply return an newIPTablesRunner here because it
  523. // would return a `nil` iptablesRunner which is different from returning
  524. // a nil NetfilterRunner.
  525. ipr, err := newIPTablesRunner(logf)
  526. if err != nil {
  527. return nil, err
  528. }
  529. return ipr, nil
  530. case FirewallModeNfTables:
  531. // Note that we don't simply return an newNfTablesRunner here because it
  532. // would return a `nil` nftablesRunner which is different from returning
  533. // a nil NetfilterRunner.
  534. nfr, err := newNfTablesRunner(logf)
  535. if err != nil {
  536. return nil, err
  537. }
  538. return nfr, nil
  539. default:
  540. return nil, fmt.Errorf("unknown firewall mode %v", mode)
  541. }
  542. }
  543. // newNfTablesRunner creates a new nftablesRunner without guaranteeing
  544. // the existence of the tables and chains.
  545. func newNfTablesRunner(logf logger.Logf) (*nftablesRunner, error) {
  546. conn, err := nftables.New()
  547. if err != nil {
  548. return nil, fmt.Errorf("nftables connection: %w", err)
  549. }
  550. return newNfTablesRunnerWithConn(logf, conn), nil
  551. }
  552. func newNfTablesRunnerWithConn(logf logger.Logf, conn *nftables.Conn) *nftablesRunner {
  553. nft4 := &nftable{Proto: nftables.TableFamilyIPv4}
  554. v6err := CheckIPv6(logf)
  555. if v6err != nil {
  556. logf("disabling tunneled IPv6 due to system IPv6 config: %v", v6err)
  557. }
  558. supportsV6 := v6err == nil
  559. var nft6 *nftable
  560. if supportsV6 {
  561. nft6 = &nftable{Proto: nftables.TableFamilyIPv6}
  562. }
  563. logf("netfilter running in nftables mode, v6 = %v", supportsV6)
  564. // TODO(KevinLiang10): convert iptables rule to nftable rules if they exist in the iptables
  565. return &nftablesRunner{
  566. conn: conn,
  567. nft4: nft4,
  568. nft6: nft6,
  569. v6Available: supportsV6,
  570. }
  571. }
  572. // newLoadSaddrExpr creates a new nftables expression that loads the source
  573. // address of the packet into the given register.
  574. func newLoadSaddrExpr(proto nftables.TableFamily, destReg uint32) (expr.Any, error) {
  575. switch proto {
  576. case nftables.TableFamilyIPv4:
  577. return &expr.Payload{
  578. DestRegister: destReg,
  579. Base: expr.PayloadBaseNetworkHeader,
  580. Offset: 12,
  581. Len: 4,
  582. }, nil
  583. case nftables.TableFamilyIPv6:
  584. return &expr.Payload{
  585. DestRegister: destReg,
  586. Base: expr.PayloadBaseNetworkHeader,
  587. Offset: 8,
  588. Len: 16,
  589. }, nil
  590. default:
  591. return nil, fmt.Errorf("table family %v is neither IPv4 nor IPv6", proto)
  592. }
  593. }
  594. // newLoadDportExpr creates a new nftables express that loads the desination port
  595. // of a TCP/UDP packet into the given register.
  596. func newLoadDportExpr(destReg uint32) expr.Any {
  597. return &expr.Payload{
  598. DestRegister: destReg,
  599. Base: expr.PayloadBaseTransportHeader,
  600. Offset: 2,
  601. Len: 2,
  602. }
  603. }
  604. // HasIPV6 reports true if the system supports IPv6.
  605. func (n *nftablesRunner) HasIPV6() bool {
  606. return n.v6Available
  607. }
  608. // HasIPV6NAT returns true if the system supports IPv6.
  609. // Kernel support for nftables was added after support for IPv6
  610. // NAT, so no need for a separate IPv6 NAT support check like we do for iptables.
  611. // https://tldp.org/HOWTO/Linux+IPv6-HOWTO/ch18s04.html
  612. // https://wiki.nftables.org/wiki-nftables/index.php/Building_and_installing_nftables_from_sources
  613. func (n *nftablesRunner) HasIPV6NAT() bool {
  614. return n.v6Available
  615. }
  616. // HasIPV6Filter returns true if system supports IPv6. There are no known edge
  617. // cases where nftables running on a host that supports IPv6 would not support
  618. // filter table.
  619. func (n *nftablesRunner) HasIPV6Filter() bool {
  620. return n.v6Available
  621. }
  622. // findRule iterates through the rules to find the rule with matching expressions.
  623. func findRule(conn *nftables.Conn, rule *nftables.Rule) (*nftables.Rule, error) {
  624. rules, err := conn.GetRules(rule.Table, rule.Chain)
  625. if err != nil {
  626. return nil, fmt.Errorf("get nftables rules: %w", err)
  627. }
  628. if len(rules) == 0 {
  629. return nil, nil
  630. }
  631. ruleLoop:
  632. for _, r := range rules {
  633. if len(r.Exprs) != len(rule.Exprs) {
  634. continue
  635. }
  636. for i, e := range r.Exprs {
  637. // Skip counter expressions, as they will not match.
  638. if _, ok := e.(*expr.Counter); ok {
  639. continue
  640. }
  641. if !reflect.DeepEqual(e, rule.Exprs[i]) {
  642. continue ruleLoop
  643. }
  644. }
  645. return r, nil
  646. }
  647. return nil, nil
  648. }
  649. func createLoopbackRule(
  650. proto nftables.TableFamily,
  651. table *nftables.Table,
  652. chain *nftables.Chain,
  653. addr netip.Addr,
  654. ) (*nftables.Rule, error) {
  655. saddrExpr, err := newLoadSaddrExpr(proto, 1)
  656. if err != nil {
  657. return nil, fmt.Errorf("newLoadSaddrExpr: %w", err)
  658. }
  659. loopBackRule := &nftables.Rule{
  660. Table: table,
  661. Chain: chain,
  662. Exprs: []expr.Any{
  663. &expr.Meta{
  664. Key: expr.MetaKeyIIFNAME,
  665. Register: 1,
  666. },
  667. &expr.Cmp{
  668. Op: expr.CmpOpEq,
  669. Register: 1,
  670. Data: []byte("lo"),
  671. },
  672. saddrExpr,
  673. &expr.Cmp{
  674. Op: expr.CmpOpEq,
  675. Register: 1,
  676. Data: addr.AsSlice(),
  677. },
  678. &expr.Counter{},
  679. &expr.Verdict{
  680. Kind: expr.VerdictAccept,
  681. },
  682. },
  683. }
  684. return loopBackRule, nil
  685. }
  686. // insertLoopbackRule inserts the TS loop back rule into
  687. // the given chain as the first rule if it does not exist.
  688. func insertLoopbackRule(
  689. conn *nftables.Conn, proto nftables.TableFamily,
  690. table *nftables.Table, chain *nftables.Chain, addr netip.Addr) error {
  691. loopBackRule, err := createLoopbackRule(proto, table, chain, addr)
  692. if err != nil {
  693. return fmt.Errorf("create loopback rule: %w", err)
  694. }
  695. // If TestDial is set, we are running in test mode and we should not
  696. // find rule because header will mismatch.
  697. if conn.TestDial == nil {
  698. // Check if the rule already exists.
  699. rule, err := findRule(conn, loopBackRule)
  700. if err != nil {
  701. return fmt.Errorf("find rule: %w", err)
  702. }
  703. if rule != nil {
  704. // Rule already exists, no need to insert.
  705. return nil
  706. }
  707. }
  708. // This inserts the rule to the top of the chain
  709. _ = conn.InsertRule(loopBackRule)
  710. if err = conn.Flush(); err != nil {
  711. return fmt.Errorf("insert rule: %w", err)
  712. }
  713. return nil
  714. }
  715. // getNFTByAddr returns the nftables with correct IP family
  716. // that we will be using for the given address.
  717. func (n *nftablesRunner) getNFTByAddr(addr netip.Addr) (*nftable, error) {
  718. if addr.Is6() && !n.v6Available {
  719. return nil, fmt.Errorf("nftables for IPv6 are not available on this host")
  720. }
  721. if addr.Is6() {
  722. return n.nft6, nil
  723. }
  724. return n.nft4, nil
  725. }
  726. // AddLoopbackRule adds an nftables rule to permit loopback traffic to
  727. // a local Tailscale IP. This rule is added only if it does not already exist.
  728. func (n *nftablesRunner) AddLoopbackRule(addr netip.Addr) error {
  729. nf, err := n.getNFTByAddr(addr)
  730. if err != nil {
  731. return fmt.Errorf("error setting up nftables for IP family of %v: %w", addr, err)
  732. }
  733. inputChain, err := getChainFromTable(n.conn, nf.Filter, chainNameInput)
  734. if err != nil {
  735. return fmt.Errorf("get input chain: %w", err)
  736. }
  737. if err := insertLoopbackRule(n.conn, nf.Proto, nf.Filter, inputChain, addr); err != nil {
  738. return fmt.Errorf("add loopback rule: %w", err)
  739. }
  740. return nil
  741. }
  742. // DelLoopbackRule removes the nftables rule permitting loopback
  743. // traffic to a Tailscale IP.
  744. func (n *nftablesRunner) DelLoopbackRule(addr netip.Addr) error {
  745. nf, err := n.getNFTByAddr(addr)
  746. if err != nil {
  747. return fmt.Errorf("error setting up nftables for IP family of %v: %w", addr, err)
  748. }
  749. inputChain, err := getChainFromTable(n.conn, nf.Filter, chainNameInput)
  750. if err != nil {
  751. return fmt.Errorf("get input chain: %w", err)
  752. }
  753. loopBackRule, err := createLoopbackRule(nf.Proto, nf.Filter, inputChain, addr)
  754. if err != nil {
  755. return fmt.Errorf("create loopback rule: %w", err)
  756. }
  757. existingLoopBackRule, err := findRule(n.conn, loopBackRule)
  758. if err != nil {
  759. return fmt.Errorf("find loop back rule: %w", err)
  760. }
  761. if existingLoopBackRule == nil {
  762. // Rule does not exist, no need to delete.
  763. return nil
  764. }
  765. if err := n.conn.DelRule(existingLoopBackRule); err != nil {
  766. return fmt.Errorf("delete rule: %w", err)
  767. }
  768. return n.conn.Flush()
  769. }
  770. // getTables returns tables for IP families that this host was determined to
  771. // support (either IPv4 and IPv6 or just IPv4).
  772. func (n *nftablesRunner) getTables() []*nftable {
  773. if n.HasIPV6() {
  774. return []*nftable{n.nft4, n.nft6}
  775. }
  776. return []*nftable{n.nft4}
  777. }
  778. // AddChains creates custom Tailscale chains in netfilter via nftables
  779. // if the ts-chain doesn't already exist.
  780. func (n *nftablesRunner) AddChains() error {
  781. polAccept := nftables.ChainPolicyAccept
  782. for _, table := range n.getTables() {
  783. // Create the filter table if it doesn't exist, this table name is the same
  784. // as the name used by iptables-nft and ufw. We install rules into the
  785. // same conventional table so that `accept` verdicts from our jump
  786. // chains are conclusive.
  787. filter, err := createTableIfNotExist(n.conn, table.Proto, "filter")
  788. if err != nil {
  789. return fmt.Errorf("create table: %w", err)
  790. }
  791. table.Filter = filter
  792. // Adding the "conventional chains" that are used by iptables-nft and ufw.
  793. if err = createChainIfNotExist(n.conn, chainInfo{filter, "FORWARD", nftables.ChainTypeFilter, nftables.ChainHookForward, nftables.ChainPriorityFilter, &polAccept}); err != nil {
  794. return fmt.Errorf("create forward chain: %w", err)
  795. }
  796. if err = createChainIfNotExist(n.conn, chainInfo{filter, "INPUT", nftables.ChainTypeFilter, nftables.ChainHookInput, nftables.ChainPriorityFilter, &polAccept}); err != nil {
  797. return fmt.Errorf("create input chain: %w", err)
  798. }
  799. // Adding the tailscale chains that contain our rules.
  800. if err = createChainIfNotExist(n.conn, chainInfo{filter, chainNameForward, chainTypeRegular, nil, nil, nil}); err != nil {
  801. return fmt.Errorf("create forward chain: %w", err)
  802. }
  803. if err = createChainIfNotExist(n.conn, chainInfo{filter, chainNameInput, chainTypeRegular, nil, nil, nil}); err != nil {
  804. return fmt.Errorf("create input chain: %w", err)
  805. }
  806. // Create the nat table if it doesn't exist, this table name is the same
  807. // as the name used by iptables-nft and ufw. We install rules into the
  808. // same conventional table so that `accept` verdicts from our jump
  809. // chains are conclusive.
  810. nat, err := createTableIfNotExist(n.conn, table.Proto, "nat")
  811. if err != nil {
  812. return fmt.Errorf("create table: %w", err)
  813. }
  814. table.Nat = nat
  815. // Adding the "conventional chains" that are used by iptables-nft and ufw.
  816. if err = createChainIfNotExist(n.conn, chainInfo{nat, "POSTROUTING", nftables.ChainTypeNAT, nftables.ChainHookPostrouting, nftables.ChainPriorityNATSource, &polAccept}); err != nil {
  817. return fmt.Errorf("create postrouting chain: %w", err)
  818. }
  819. // Adding the tailscale chain that contains our rules.
  820. if err = createChainIfNotExist(n.conn, chainInfo{nat, chainNamePostrouting, chainTypeRegular, nil, nil, nil}); err != nil {
  821. return fmt.Errorf("create postrouting chain: %w", err)
  822. }
  823. }
  824. return n.conn.Flush()
  825. }
  826. // These are dummy chains and tables we create to detect if nftables is
  827. // available. We create them, then delete them. If we can create and delete
  828. // them, then we can use nftables. If we can't, then we assume that we're
  829. // running on a system that doesn't support nftables. See
  830. // createDummyPostroutingChains.
  831. const (
  832. tsDummyChainName = "ts-test-postrouting"
  833. tsDummyTableName = "ts-test-nat"
  834. )
  835. // createDummyPostroutingChains creates dummy postrouting chains in netfilter
  836. // via netfilter via nftables, as a last resort measure to detect that nftables
  837. // can be used. It cleans up the dummy chains after creation.
  838. func (n *nftablesRunner) createDummyPostroutingChains() (retErr error) {
  839. polAccept := ptr.To(nftables.ChainPolicyAccept)
  840. for _, table := range n.getTables() {
  841. nat, err := createTableIfNotExist(n.conn, table.Proto, tsDummyTableName)
  842. if err != nil {
  843. return fmt.Errorf("create nat table: %w", err)
  844. }
  845. defer func(fm nftables.TableFamily) {
  846. if err := deleteTableIfExists(n.conn, fm, tsDummyTableName); err != nil && retErr == nil {
  847. retErr = fmt.Errorf("delete %q table: %w", tsDummyTableName, err)
  848. }
  849. }(table.Proto)
  850. table.Nat = nat
  851. if err = createChainIfNotExist(n.conn, chainInfo{nat, tsDummyChainName, nftables.ChainTypeNAT, nftables.ChainHookPostrouting, nftables.ChainPriorityNATSource, polAccept}); err != nil {
  852. return fmt.Errorf("create %q chain: %w", tsDummyChainName, err)
  853. }
  854. if err := deleteChainIfExists(n.conn, nat, tsDummyChainName); err != nil {
  855. return fmt.Errorf("delete %q chain: %w", tsDummyChainName, err)
  856. }
  857. }
  858. return nil
  859. }
  860. // deleteChainIfExists deletes a chain if it exists.
  861. func deleteChainIfExists(c *nftables.Conn, table *nftables.Table, name string) error {
  862. chain, err := getChainFromTable(c, table, name)
  863. if err != nil && !errors.Is(err, errorChainNotFound{table.Name, name}) {
  864. return fmt.Errorf("get chain: %w", err)
  865. } else if err != nil {
  866. // If the chain doesn't exist, we don't need to delete it.
  867. return nil
  868. }
  869. c.FlushChain(chain)
  870. c.DelChain(chain)
  871. if err := c.Flush(); err != nil {
  872. return fmt.Errorf("flush and delete chain: %w", err)
  873. }
  874. return nil
  875. }
  876. // DelChains removes the custom Tailscale chains from netfilter via nftables.
  877. func (n *nftablesRunner) DelChains() error {
  878. for _, table := range n.getTables() {
  879. if err := deleteChainIfExists(n.conn, table.Filter, chainNameForward); err != nil {
  880. return fmt.Errorf("delete chain: %w", err)
  881. }
  882. if err := deleteChainIfExists(n.conn, table.Filter, chainNameInput); err != nil {
  883. return fmt.Errorf("delete chain: %w", err)
  884. }
  885. }
  886. if err := deleteChainIfExists(n.conn, n.nft4.Nat, chainNamePostrouting); err != nil {
  887. return fmt.Errorf("delete chain: %w", err)
  888. }
  889. if n.HasIPV6NAT() {
  890. if err := deleteChainIfExists(n.conn, n.nft6.Nat, chainNamePostrouting); err != nil {
  891. return fmt.Errorf("delete chain: %w", err)
  892. }
  893. }
  894. if err := n.conn.Flush(); err != nil {
  895. return fmt.Errorf("flush: %w", err)
  896. }
  897. return nil
  898. }
  899. // createHookRule creates a rule to jump from a hooked chain to a regular chain.
  900. func createHookRule(table *nftables.Table, fromChain *nftables.Chain, toChainName string) *nftables.Rule {
  901. exprs := []expr.Any{
  902. &expr.Counter{},
  903. &expr.Verdict{
  904. Kind: expr.VerdictJump,
  905. Chain: toChainName,
  906. },
  907. }
  908. rule := &nftables.Rule{
  909. Table: table,
  910. Chain: fromChain,
  911. Exprs: exprs,
  912. }
  913. return rule
  914. }
  915. // addHookRule adds a rule to jump from a hooked chain to a regular chain at top of the hooked chain.
  916. func addHookRule(conn *nftables.Conn, table *nftables.Table, fromChain *nftables.Chain, toChainName string) error {
  917. rule := createHookRule(table, fromChain, toChainName)
  918. _ = conn.InsertRule(rule)
  919. if err := conn.Flush(); err != nil {
  920. return fmt.Errorf("flush add rule: %w", err)
  921. }
  922. return nil
  923. }
  924. // AddHooks is adding rules to conventional chains like "FORWARD", "INPUT" and "POSTROUTING"
  925. // in tables and jump from those chains to tailscale chains.
  926. func (n *nftablesRunner) AddHooks() error {
  927. conn := n.conn
  928. for _, table := range n.getTables() {
  929. inputChain, err := getChainFromTable(conn, table.Filter, "INPUT")
  930. if err != nil {
  931. return fmt.Errorf("get INPUT chain: %w", err)
  932. }
  933. err = addHookRule(conn, table.Filter, inputChain, chainNameInput)
  934. if err != nil {
  935. return fmt.Errorf("Addhook: %w", err)
  936. }
  937. forwardChain, err := getChainFromTable(conn, table.Filter, "FORWARD")
  938. if err != nil {
  939. return fmt.Errorf("get FORWARD chain: %w", err)
  940. }
  941. err = addHookRule(conn, table.Filter, forwardChain, chainNameForward)
  942. if err != nil {
  943. return fmt.Errorf("Addhook: %w", err)
  944. }
  945. postroutingChain, err := getChainFromTable(conn, table.Nat, "POSTROUTING")
  946. if err != nil {
  947. return fmt.Errorf("get INPUT chain: %w", err)
  948. }
  949. err = addHookRule(conn, table.Nat, postroutingChain, chainNamePostrouting)
  950. if err != nil {
  951. return fmt.Errorf("Addhook: %w", err)
  952. }
  953. }
  954. return nil
  955. }
  956. // delHookRule deletes a rule that jumps from a hooked chain to a regular chain.
  957. func delHookRule(conn *nftables.Conn, table *nftables.Table, fromChain *nftables.Chain, toChainName string) error {
  958. rule := createHookRule(table, fromChain, toChainName)
  959. existingRule, err := findRule(conn, rule)
  960. if err != nil {
  961. return fmt.Errorf("Failed to find hook rule: %w", err)
  962. }
  963. if existingRule == nil {
  964. return nil
  965. }
  966. _ = conn.DelRule(existingRule)
  967. if err := conn.Flush(); err != nil {
  968. return fmt.Errorf("flush del hook rule: %w", err)
  969. }
  970. return nil
  971. }
  972. // DelHooks is deleting the rules added to conventional chains to jump to tailscale chains.
  973. func (n *nftablesRunner) DelHooks(logf logger.Logf) error {
  974. conn := n.conn
  975. for _, table := range n.getTables() {
  976. inputChain, err := getChainFromTable(conn, table.Filter, "INPUT")
  977. if err != nil {
  978. return fmt.Errorf("get INPUT chain: %w", err)
  979. }
  980. err = delHookRule(conn, table.Filter, inputChain, chainNameInput)
  981. if err != nil {
  982. return fmt.Errorf("delhook: %w", err)
  983. }
  984. forwardChain, err := getChainFromTable(conn, table.Filter, "FORWARD")
  985. if err != nil {
  986. return fmt.Errorf("get FORWARD chain: %w", err)
  987. }
  988. err = delHookRule(conn, table.Filter, forwardChain, chainNameForward)
  989. if err != nil {
  990. return fmt.Errorf("delhook: %w", err)
  991. }
  992. postroutingChain, err := getChainFromTable(conn, table.Nat, "POSTROUTING")
  993. if err != nil {
  994. return fmt.Errorf("get INPUT chain: %w", err)
  995. }
  996. err = delHookRule(conn, table.Nat, postroutingChain, chainNamePostrouting)
  997. if err != nil {
  998. return fmt.Errorf("delhook: %w", err)
  999. }
  1000. }
  1001. return nil
  1002. }
  1003. // maskof returns the mask of the given prefix in big endian bytes.
  1004. func maskof(pfx netip.Prefix) []byte {
  1005. mask := make([]byte, 4)
  1006. binary.BigEndian.PutUint32(mask, ^(uint32(0xffff_ffff) >> pfx.Bits()))
  1007. return mask
  1008. }
  1009. // createRangeRule creates a rule that matches packets with source IP from the give
  1010. // range (like CGNAT range or ChromeOSVM range) and the interface is not the tunname,
  1011. // and makes the given decision. Only IPv4 is supported.
  1012. func createRangeRule(
  1013. table *nftables.Table, chain *nftables.Chain,
  1014. tunname string, rng netip.Prefix, decision expr.VerdictKind,
  1015. ) (*nftables.Rule, error) {
  1016. if rng.Addr().Is6() {
  1017. return nil, errors.New("IPv6 is not supported")
  1018. }
  1019. saddrExpr, err := newLoadSaddrExpr(nftables.TableFamilyIPv4, 1)
  1020. if err != nil {
  1021. return nil, fmt.Errorf("newLoadSaddrExpr: %w", err)
  1022. }
  1023. netip := rng.Addr().AsSlice()
  1024. mask := maskof(rng)
  1025. rule := &nftables.Rule{
  1026. Table: table,
  1027. Chain: chain,
  1028. Exprs: []expr.Any{
  1029. &expr.Meta{Key: expr.MetaKeyIIFNAME, Register: 1},
  1030. &expr.Cmp{
  1031. Op: expr.CmpOpNeq,
  1032. Register: 1,
  1033. Data: []byte(tunname),
  1034. },
  1035. saddrExpr,
  1036. &expr.Bitwise{
  1037. SourceRegister: 1,
  1038. DestRegister: 1,
  1039. Len: 4,
  1040. Mask: mask,
  1041. Xor: []byte{0x00, 0x00, 0x00, 0x00},
  1042. },
  1043. &expr.Cmp{
  1044. Op: expr.CmpOpEq,
  1045. Register: 1,
  1046. Data: netip,
  1047. },
  1048. &expr.Counter{},
  1049. &expr.Verdict{
  1050. Kind: decision,
  1051. },
  1052. },
  1053. }
  1054. return rule, nil
  1055. }
  1056. // addReturnChromeOSVMRangeRule adds a rule to return if the source IP
  1057. // is in the ChromeOS VM range.
  1058. func addReturnChromeOSVMRangeRule(c *nftables.Conn, table *nftables.Table, chain *nftables.Chain, tunname string) error {
  1059. rule, err := createRangeRule(table, chain, tunname, tsaddr.ChromeOSVMRange(), expr.VerdictReturn)
  1060. if err != nil {
  1061. return fmt.Errorf("create rule: %w", err)
  1062. }
  1063. _ = c.AddRule(rule)
  1064. if err = c.Flush(); err != nil {
  1065. return fmt.Errorf("add rule: %w", err)
  1066. }
  1067. return nil
  1068. }
  1069. // addDropCGNATRangeRule adds a rule to drop if the source IP is in the
  1070. // CGNAT range.
  1071. func addDropCGNATRangeRule(c *nftables.Conn, table *nftables.Table, chain *nftables.Chain, tunname string) error {
  1072. rule, err := createRangeRule(table, chain, tunname, tsaddr.CGNATRange(), expr.VerdictDrop)
  1073. if err != nil {
  1074. return fmt.Errorf("create rule: %w", err)
  1075. }
  1076. _ = c.AddRule(rule)
  1077. if err = c.Flush(); err != nil {
  1078. return fmt.Errorf("add rule: %w", err)
  1079. }
  1080. return nil
  1081. }
  1082. // createSetSubnetRouteMarkRule creates a rule to set the subnet route
  1083. // mark if the packet is from the given interface.
  1084. func createSetSubnetRouteMarkRule(table *nftables.Table, chain *nftables.Chain, tunname string) (*nftables.Rule, error) {
  1085. hexTsFwmarkMaskNeg := getTailscaleFwmarkMaskNeg()
  1086. hexTSSubnetRouteMark := getTailscaleSubnetRouteMark()
  1087. rule := &nftables.Rule{
  1088. Table: table,
  1089. Chain: chain,
  1090. Exprs: []expr.Any{
  1091. &expr.Meta{Key: expr.MetaKeyIIFNAME, Register: 1},
  1092. &expr.Cmp{
  1093. Op: expr.CmpOpEq,
  1094. Register: 1,
  1095. Data: []byte(tunname),
  1096. },
  1097. &expr.Counter{},
  1098. &expr.Meta{Key: expr.MetaKeyMARK, Register: 1},
  1099. &expr.Bitwise{
  1100. SourceRegister: 1,
  1101. DestRegister: 1,
  1102. Len: 4,
  1103. Mask: hexTsFwmarkMaskNeg,
  1104. Xor: hexTSSubnetRouteMark,
  1105. },
  1106. &expr.Meta{
  1107. Key: expr.MetaKeyMARK,
  1108. SourceRegister: true,
  1109. Register: 1,
  1110. },
  1111. },
  1112. }
  1113. return rule, nil
  1114. }
  1115. // addSetSubnetRouteMarkRule adds a rule to set the subnet route mark
  1116. // if the packet is from the given interface.
  1117. func addSetSubnetRouteMarkRule(c *nftables.Conn, table *nftables.Table, chain *nftables.Chain, tunname string) error {
  1118. rule, err := createSetSubnetRouteMarkRule(table, chain, tunname)
  1119. if err != nil {
  1120. return fmt.Errorf("create rule: %w", err)
  1121. }
  1122. _ = c.AddRule(rule)
  1123. if err := c.Flush(); err != nil {
  1124. return fmt.Errorf("add rule: %w", err)
  1125. }
  1126. return nil
  1127. }
  1128. // createDropOutgoingPacketFromCGNATRangeRuleWithTunname creates a rule to drop
  1129. // outgoing packets from the CGNAT range.
  1130. func createDropOutgoingPacketFromCGNATRangeRuleWithTunname(table *nftables.Table, chain *nftables.Chain, tunname string) (*nftables.Rule, error) {
  1131. _, ipNet, err := net.ParseCIDR(tsaddr.CGNATRange().String())
  1132. if err != nil {
  1133. return nil, fmt.Errorf("parse cidr: %v", err)
  1134. }
  1135. mask, err := hex.DecodeString(ipNet.Mask.String())
  1136. if err != nil {
  1137. return nil, fmt.Errorf("decode mask: %v", err)
  1138. }
  1139. netip := ipNet.IP.Mask(ipNet.Mask).To4()
  1140. saddrExpr, err := newLoadSaddrExpr(nftables.TableFamilyIPv4, 1)
  1141. if err != nil {
  1142. return nil, fmt.Errorf("newLoadSaddrExpr: %v", err)
  1143. }
  1144. rule := &nftables.Rule{
  1145. Table: table,
  1146. Chain: chain,
  1147. Exprs: []expr.Any{
  1148. &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
  1149. &expr.Cmp{
  1150. Op: expr.CmpOpEq,
  1151. Register: 1,
  1152. Data: []byte(tunname),
  1153. },
  1154. saddrExpr,
  1155. &expr.Bitwise{
  1156. SourceRegister: 1,
  1157. DestRegister: 1,
  1158. Len: 4,
  1159. Mask: mask,
  1160. Xor: []byte{0x00, 0x00, 0x00, 0x00},
  1161. },
  1162. &expr.Cmp{
  1163. Op: expr.CmpOpEq,
  1164. Register: 1,
  1165. Data: netip,
  1166. },
  1167. &expr.Counter{},
  1168. &expr.Verdict{
  1169. Kind: expr.VerdictDrop,
  1170. },
  1171. },
  1172. }
  1173. return rule, nil
  1174. }
  1175. // addDropOutgoingPacketFromCGNATRangeRuleWithTunname adds a rule to drop
  1176. // outgoing packets from the CGNAT range.
  1177. func addDropOutgoingPacketFromCGNATRangeRuleWithTunname(conn *nftables.Conn, table *nftables.Table, chain *nftables.Chain, tunname string) error {
  1178. rule, err := createDropOutgoingPacketFromCGNATRangeRuleWithTunname(table, chain, tunname)
  1179. if err != nil {
  1180. return fmt.Errorf("create rule: %w", err)
  1181. }
  1182. _ = conn.AddRule(rule)
  1183. if err := conn.Flush(); err != nil {
  1184. return fmt.Errorf("add rule: %w", err)
  1185. }
  1186. return nil
  1187. }
  1188. // createAcceptOutgoingPacketRule creates a rule to accept outgoing packets
  1189. // from the given interface.
  1190. func createAcceptOutgoingPacketRule(table *nftables.Table, chain *nftables.Chain, tunname string) *nftables.Rule {
  1191. return &nftables.Rule{
  1192. Table: table,
  1193. Chain: chain,
  1194. Exprs: []expr.Any{
  1195. &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
  1196. &expr.Cmp{
  1197. Op: expr.CmpOpEq,
  1198. Register: 1,
  1199. Data: []byte(tunname),
  1200. },
  1201. &expr.Counter{},
  1202. &expr.Verdict{
  1203. Kind: expr.VerdictAccept,
  1204. },
  1205. },
  1206. }
  1207. }
  1208. // addAcceptOutgoingPacketRule adds a rule to accept outgoing packets
  1209. // from the given interface.
  1210. func addAcceptOutgoingPacketRule(conn *nftables.Conn, table *nftables.Table, chain *nftables.Chain, tunname string) error {
  1211. rule := createAcceptOutgoingPacketRule(table, chain, tunname)
  1212. _ = conn.AddRule(rule)
  1213. if err := conn.Flush(); err != nil {
  1214. return fmt.Errorf("flush add rule: %w", err)
  1215. }
  1216. return nil
  1217. }
  1218. // createAcceptOnPortRule creates a rule to accept incoming packets to
  1219. // a given destination UDP port.
  1220. func createAcceptOnPortRule(table *nftables.Table, chain *nftables.Chain, port uint16) *nftables.Rule {
  1221. portBytes := make([]byte, 2)
  1222. binary.BigEndian.PutUint16(portBytes, port)
  1223. return &nftables.Rule{
  1224. Table: table,
  1225. Chain: chain,
  1226. Exprs: []expr.Any{
  1227. &expr.Meta{
  1228. Key: expr.MetaKeyL4PROTO,
  1229. Register: 1,
  1230. },
  1231. &expr.Cmp{
  1232. Op: expr.CmpOpEq,
  1233. Register: 1,
  1234. Data: []byte{unix.IPPROTO_UDP},
  1235. },
  1236. newLoadDportExpr(1),
  1237. &expr.Cmp{
  1238. Op: expr.CmpOpEq,
  1239. Register: 1,
  1240. Data: portBytes,
  1241. },
  1242. &expr.Counter{},
  1243. &expr.Verdict{
  1244. Kind: expr.VerdictAccept,
  1245. },
  1246. },
  1247. }
  1248. }
  1249. // addAcceptOnPortRule adds a rule to accept incoming packets to
  1250. // a given destination UDP port.
  1251. func addAcceptOnPortRule(conn *nftables.Conn, table *nftables.Table, chain *nftables.Chain, port uint16) error {
  1252. rule := createAcceptOnPortRule(table, chain, port)
  1253. _ = conn.AddRule(rule)
  1254. if err := conn.Flush(); err != nil {
  1255. return fmt.Errorf("flush add rule: %w", err)
  1256. }
  1257. return nil
  1258. }
  1259. // addAcceptOnPortRule removes a rule to accept incoming packets to
  1260. // a given destination UDP port.
  1261. func removeAcceptOnPortRule(conn *nftables.Conn, table *nftables.Table, chain *nftables.Chain, port uint16) error {
  1262. rule := createAcceptOnPortRule(table, chain, port)
  1263. rule, err := findRule(conn, rule)
  1264. if err != nil {
  1265. return fmt.Errorf("find rule: %v", err)
  1266. }
  1267. _ = conn.DelRule(rule)
  1268. if err := conn.Flush(); err != nil {
  1269. return fmt.Errorf("flush del rule: %w", err)
  1270. }
  1271. return nil
  1272. }
  1273. // AddMagicsockPortRule adds a rule to nftables to allow incoming traffic on
  1274. // the specified UDP port, so magicsock can accept incoming connections.
  1275. // network must be either "udp4" or "udp6" - this determines whether the rule
  1276. // is added for IPv4 or IPv6.
  1277. func (n *nftablesRunner) AddMagicsockPortRule(port uint16, network string) error {
  1278. var filterTable *nftables.Table
  1279. switch network {
  1280. case "udp4":
  1281. filterTable = n.nft4.Filter
  1282. case "udp6":
  1283. filterTable = n.nft6.Filter
  1284. default:
  1285. return fmt.Errorf("unsupported network %s", network)
  1286. }
  1287. inputChain, err := getChainFromTable(n.conn, filterTable, chainNameInput)
  1288. if err != nil {
  1289. return fmt.Errorf("get input chain: %v", err)
  1290. }
  1291. err = addAcceptOnPortRule(n.conn, filterTable, inputChain, port)
  1292. if err != nil {
  1293. return fmt.Errorf("add accept on port rule: %v", err)
  1294. }
  1295. return nil
  1296. }
  1297. // DelMagicsockPortRule removes a rule added by AddMagicsockPortRule to accept
  1298. // incoming traffic on a particular UDP port.
  1299. // network must be either "udp4" or "udp6" - this determines whether the rule
  1300. // is removed for IPv4 or IPv6.
  1301. func (n *nftablesRunner) DelMagicsockPortRule(port uint16, network string) error {
  1302. var filterTable *nftables.Table
  1303. switch network {
  1304. case "udp4":
  1305. filterTable = n.nft4.Filter
  1306. case "udp6":
  1307. filterTable = n.nft6.Filter
  1308. default:
  1309. return fmt.Errorf("unsupported network %s", network)
  1310. }
  1311. inputChain, err := getChainFromTable(n.conn, filterTable, chainNameInput)
  1312. if err != nil {
  1313. return fmt.Errorf("get input chain: %v", err)
  1314. }
  1315. err = removeAcceptOnPortRule(n.conn, filterTable, inputChain, port)
  1316. if err != nil {
  1317. return fmt.Errorf("add accept on port rule: %v", err)
  1318. }
  1319. return nil
  1320. }
  1321. // createAcceptIncomingPacketRule creates a rule to accept incoming packets to
  1322. // the given interface.
  1323. func createAcceptIncomingPacketRule(table *nftables.Table, chain *nftables.Chain, tunname string) *nftables.Rule {
  1324. return &nftables.Rule{
  1325. Table: table,
  1326. Chain: chain,
  1327. Exprs: []expr.Any{
  1328. &expr.Meta{Key: expr.MetaKeyIIFNAME, Register: 1},
  1329. &expr.Cmp{
  1330. Op: expr.CmpOpEq,
  1331. Register: 1,
  1332. Data: []byte(tunname),
  1333. },
  1334. &expr.Counter{},
  1335. &expr.Verdict{
  1336. Kind: expr.VerdictAccept,
  1337. },
  1338. },
  1339. }
  1340. }
  1341. func addAcceptIncomingPacketRule(conn *nftables.Conn, table *nftables.Table, chain *nftables.Chain, tunname string) error {
  1342. rule := createAcceptIncomingPacketRule(table, chain, tunname)
  1343. _ = conn.AddRule(rule)
  1344. if err := conn.Flush(); err != nil {
  1345. return fmt.Errorf("flush add rule: %w", err)
  1346. }
  1347. return nil
  1348. }
  1349. // AddBase adds some basic processing rules.
  1350. func (n *nftablesRunner) AddBase(tunname string) error {
  1351. if err := n.addBase4(tunname); err != nil {
  1352. return fmt.Errorf("add base v4: %w", err)
  1353. }
  1354. if n.HasIPV6() {
  1355. if err := n.addBase6(tunname); err != nil {
  1356. return fmt.Errorf("add base v6: %w", err)
  1357. }
  1358. }
  1359. return nil
  1360. }
  1361. // addBase4 adds some basic IPv4 processing rules.
  1362. func (n *nftablesRunner) addBase4(tunname string) error {
  1363. conn := n.conn
  1364. inputChain, err := getChainFromTable(conn, n.nft4.Filter, chainNameInput)
  1365. if err != nil {
  1366. return fmt.Errorf("get input chain v4: %v", err)
  1367. }
  1368. if err = addReturnChromeOSVMRangeRule(conn, n.nft4.Filter, inputChain, tunname); err != nil {
  1369. return fmt.Errorf("add return chromeos vm range rule v4: %w", err)
  1370. }
  1371. if err = addDropCGNATRangeRule(conn, n.nft4.Filter, inputChain, tunname); err != nil {
  1372. return fmt.Errorf("add drop cgnat range rule v4: %w", err)
  1373. }
  1374. if err = addAcceptIncomingPacketRule(conn, n.nft4.Filter, inputChain, tunname); err != nil {
  1375. return fmt.Errorf("add accept incoming packet rule v4: %w", err)
  1376. }
  1377. forwardChain, err := getChainFromTable(conn, n.nft4.Filter, chainNameForward)
  1378. if err != nil {
  1379. return fmt.Errorf("get forward chain v4: %v", err)
  1380. }
  1381. if err = addSetSubnetRouteMarkRule(conn, n.nft4.Filter, forwardChain, tunname); err != nil {
  1382. return fmt.Errorf("add set subnet route mark rule v4: %w", err)
  1383. }
  1384. if err = addMatchSubnetRouteMarkRule(conn, n.nft4.Filter, forwardChain, Accept); err != nil {
  1385. return fmt.Errorf("add match subnet route mark rule v4: %w", err)
  1386. }
  1387. if err = addDropOutgoingPacketFromCGNATRangeRuleWithTunname(conn, n.nft4.Filter, forwardChain, tunname); err != nil {
  1388. return fmt.Errorf("add drop outgoing packet from cgnat range rule v4: %w", err)
  1389. }
  1390. if err = addAcceptOutgoingPacketRule(conn, n.nft4.Filter, forwardChain, tunname); err != nil {
  1391. return fmt.Errorf("add accept outgoing packet rule v4: %w", err)
  1392. }
  1393. if err = conn.Flush(); err != nil {
  1394. return fmt.Errorf("flush base v4: %w", err)
  1395. }
  1396. return nil
  1397. }
  1398. // addBase6 adds some basic IPv6 processing rules.
  1399. func (n *nftablesRunner) addBase6(tunname string) error {
  1400. conn := n.conn
  1401. inputChain, err := getChainFromTable(conn, n.nft6.Filter, chainNameInput)
  1402. if err != nil {
  1403. return fmt.Errorf("get input chain v4: %v", err)
  1404. }
  1405. if err = addAcceptIncomingPacketRule(conn, n.nft6.Filter, inputChain, tunname); err != nil {
  1406. return fmt.Errorf("add accept incoming packet rule v6: %w", err)
  1407. }
  1408. forwardChain, err := getChainFromTable(conn, n.nft6.Filter, chainNameForward)
  1409. if err != nil {
  1410. return fmt.Errorf("get forward chain v6: %w", err)
  1411. }
  1412. if err = addSetSubnetRouteMarkRule(conn, n.nft6.Filter, forwardChain, tunname); err != nil {
  1413. return fmt.Errorf("add set subnet route mark rule v6: %w", err)
  1414. }
  1415. if err = addMatchSubnetRouteMarkRule(conn, n.nft6.Filter, forwardChain, Accept); err != nil {
  1416. return fmt.Errorf("add match subnet route mark rule v6: %w", err)
  1417. }
  1418. if err = addAcceptOutgoingPacketRule(conn, n.nft6.Filter, forwardChain, tunname); err != nil {
  1419. return fmt.Errorf("add accept outgoing packet rule v6: %w", err)
  1420. }
  1421. if err = conn.Flush(); err != nil {
  1422. return fmt.Errorf("flush base v6: %w", err)
  1423. }
  1424. return nil
  1425. }
  1426. // DelBase empties, but does not remove, custom Tailscale chains from
  1427. // netfilter via iptables.
  1428. func (n *nftablesRunner) DelBase() error {
  1429. conn := n.conn
  1430. for _, table := range n.getTables() {
  1431. inputChain, err := getChainFromTable(conn, table.Filter, chainNameInput)
  1432. if err != nil {
  1433. return fmt.Errorf("get input chain: %v", err)
  1434. }
  1435. conn.FlushChain(inputChain)
  1436. forwardChain, err := getChainFromTable(conn, table.Filter, chainNameForward)
  1437. if err != nil {
  1438. return fmt.Errorf("get forward chain: %v", err)
  1439. }
  1440. conn.FlushChain(forwardChain)
  1441. postrouteChain, err := getChainFromTable(conn, table.Nat, chainNamePostrouting)
  1442. if err != nil {
  1443. return fmt.Errorf("get postrouting chain v4: %v", err)
  1444. }
  1445. conn.FlushChain(postrouteChain)
  1446. }
  1447. return conn.Flush()
  1448. }
  1449. // createMatchSubnetRouteMarkRule creates a rule that matches packets
  1450. // with the subnet route mark and takes the specified action.
  1451. func createMatchSubnetRouteMarkRule(table *nftables.Table, chain *nftables.Chain, action MatchDecision) (*nftables.Rule, error) {
  1452. hexTSFwmarkMask := getTailscaleFwmarkMask()
  1453. hexTSSubnetRouteMark := getTailscaleSubnetRouteMark()
  1454. var endAction expr.Any
  1455. endAction = &expr.Verdict{Kind: expr.VerdictAccept}
  1456. if action == Masq {
  1457. endAction = &expr.Masq{}
  1458. }
  1459. exprs := []expr.Any{
  1460. &expr.Meta{Key: expr.MetaKeyMARK, Register: 1},
  1461. &expr.Bitwise{
  1462. SourceRegister: 1,
  1463. DestRegister: 1,
  1464. Len: 4,
  1465. Mask: hexTSFwmarkMask,
  1466. Xor: []byte{0x00, 0x00, 0x00, 0x00},
  1467. },
  1468. &expr.Cmp{
  1469. Op: expr.CmpOpEq,
  1470. Register: 1,
  1471. Data: hexTSSubnetRouteMark,
  1472. },
  1473. &expr.Counter{},
  1474. endAction,
  1475. }
  1476. rule := &nftables.Rule{
  1477. Table: table,
  1478. Chain: chain,
  1479. Exprs: exprs,
  1480. }
  1481. return rule, nil
  1482. }
  1483. // addMatchSubnetRouteMarkRule adds a rule that matches packets with
  1484. // the subnet route mark and takes the specified action.
  1485. func addMatchSubnetRouteMarkRule(conn *nftables.Conn, table *nftables.Table, chain *nftables.Chain, action MatchDecision) error {
  1486. rule, err := createMatchSubnetRouteMarkRule(table, chain, action)
  1487. if err != nil {
  1488. return fmt.Errorf("create match subnet route mark rule: %w", err)
  1489. }
  1490. _ = conn.AddRule(rule)
  1491. if err := conn.Flush(); err != nil {
  1492. return fmt.Errorf("flush add rule: %w", err)
  1493. }
  1494. return nil
  1495. }
  1496. // AddSNATRule adds a netfilter rule to SNAT traffic destined for
  1497. // local subnets.
  1498. func (n *nftablesRunner) AddSNATRule() error {
  1499. conn := n.conn
  1500. for _, table := range n.getTables() {
  1501. chain, err := getChainFromTable(conn, table.Nat, chainNamePostrouting)
  1502. if err != nil {
  1503. return fmt.Errorf("get postrouting chain v4: %w", err)
  1504. }
  1505. if err = addMatchSubnetRouteMarkRule(conn, table.Nat, chain, Masq); err != nil {
  1506. return fmt.Errorf("add match subnet route mark rule v4: %w", err)
  1507. }
  1508. }
  1509. if err := conn.Flush(); err != nil {
  1510. return fmt.Errorf("flush add SNAT rule: %w", err)
  1511. }
  1512. return nil
  1513. }
  1514. // DelSNATRule removes the netfilter rule to SNAT traffic destined for
  1515. // local subnets. An error is returned if the rule does not exist.
  1516. func (n *nftablesRunner) DelSNATRule() error {
  1517. conn := n.conn
  1518. hexTSFwmarkMask := getTailscaleFwmarkMask()
  1519. hexTSSubnetRouteMark := getTailscaleSubnetRouteMark()
  1520. exprs := []expr.Any{
  1521. &expr.Meta{Key: expr.MetaKeyMARK, Register: 1},
  1522. &expr.Bitwise{
  1523. SourceRegister: 1,
  1524. DestRegister: 1,
  1525. Len: 4,
  1526. Mask: hexTSFwmarkMask,
  1527. },
  1528. &expr.Cmp{
  1529. Op: expr.CmpOpEq,
  1530. Register: 1,
  1531. Data: hexTSSubnetRouteMark,
  1532. },
  1533. &expr.Counter{},
  1534. &expr.Masq{},
  1535. }
  1536. for _, table := range n.getTables() {
  1537. chain, err := getChainFromTable(conn, table.Nat, chainNamePostrouting)
  1538. if err != nil {
  1539. return fmt.Errorf("get postrouting chain v4: %w", err)
  1540. }
  1541. rule := &nftables.Rule{
  1542. Table: table.Nat,
  1543. Chain: chain,
  1544. Exprs: exprs,
  1545. }
  1546. SNATRule, err := findRule(conn, rule)
  1547. if err != nil {
  1548. return fmt.Errorf("find SNAT rule v4: %w", err)
  1549. }
  1550. if SNATRule != nil {
  1551. _ = conn.DelRule(SNATRule)
  1552. }
  1553. }
  1554. if err := conn.Flush(); err != nil {
  1555. return fmt.Errorf("flush del SNAT rule: %w", err)
  1556. }
  1557. return nil
  1558. }
  1559. func nativeUint32(v uint32) []byte {
  1560. b := make([]byte, 4)
  1561. binary.NativeEndian.PutUint32(b, v)
  1562. return b
  1563. }
  1564. func makeStatefulRuleExprs(tunname string) []expr.Any {
  1565. return []expr.Any{
  1566. // Check if the output interface is the Tailscale interface by
  1567. // first loding the OIFNAME into register 1 and comparing it
  1568. // against our tunname.
  1569. //
  1570. // 'cmp' implicitly breaks from a rule if a comparison fails,
  1571. // so if we continue past this rule we know that the packet is
  1572. // going to our TUN.
  1573. &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
  1574. &expr.Cmp{
  1575. Op: expr.CmpOpEq,
  1576. Register: 1,
  1577. Data: []byte(tunname),
  1578. },
  1579. // Store the conntrack state in register 1
  1580. &expr.Ct{
  1581. Register: 1,
  1582. Key: expr.CtKeySTATE,
  1583. },
  1584. // Mask the state in register 1 to "hide" the ESTABLISHED and
  1585. // RELATED bits (which are expected and fine); if there are any
  1586. // other bits, we want them to remain.
  1587. //
  1588. // This operation is, in the kernel:
  1589. // dst[i] = (src[i] & mask[i]) ^ xor[i]
  1590. //
  1591. // So, we can mask by setting the inverse of the bits we want
  1592. // to remove; i.e. ESTABLISHED = 0b00000010, RELATED =
  1593. // 0b00000100, so, if we assume an 8-bit state (in reality,
  1594. // it's 32-bit), we can mask with 0b11111001 to clear those
  1595. // bits and keep everything else (e.g. the INVALID bit which is
  1596. // 0b00000001).
  1597. //
  1598. // TODO(andrew-d): for now, let's also allow
  1599. // CtStateBitUNTRACKED, which is a state for packets that are not
  1600. // tracked (marked so explicitly with an iptables rule using
  1601. // --notrack); we should figure out if we want to allow this or not.
  1602. &expr.Bitwise{
  1603. SourceRegister: 1,
  1604. DestRegister: 1,
  1605. Len: 4,
  1606. Mask: nativeUint32(^(0 |
  1607. expr.CtStateBitESTABLISHED |
  1608. expr.CtStateBitRELATED |
  1609. expr.CtStateBitUNTRACKED)),
  1610. // Xor is unused but must be specified
  1611. Xor: nativeUint32(0),
  1612. },
  1613. // Compare against the expected state (0, i.e. no bits set
  1614. // other than maybe ESTABLISHED and RELATED). We want this
  1615. // comparison to fail if there are no bits set, so that this
  1616. // rule's evaluation stops and we don't fall through to the
  1617. // "Drop" verdict.
  1618. //
  1619. // For example, if the state is ESTABLISHED (and we want to
  1620. // break from this rule/accept this packet):
  1621. // state = ESTABLISHED
  1622. // register1 = 0b0 (since the bitwise operation cleared the ESTABLISHED bit)
  1623. //
  1624. // compare register1 (0b0) != 0: false
  1625. // -> comparison implicitly breaks
  1626. // -> continue to the next rule
  1627. //
  1628. // For example, if the state is NEW (and we want to continue to
  1629. // the next expression and thus drop this packet):
  1630. // state = NEW
  1631. // register1 = 0b1000
  1632. //
  1633. // compare register1 (0b1000) != 0: true
  1634. // -> comparison continues to next expr
  1635. &expr.Cmp{
  1636. Op: expr.CmpOpNeq,
  1637. Register: 1,
  1638. Data: []byte{0, 0, 0, 0},
  1639. },
  1640. // If we get here, we know that this packet is going to our TUN
  1641. // device, and has a conntrack state set other than ESTABLISHED
  1642. // or RELATED. We thus count and drop the packet.
  1643. &expr.Counter{},
  1644. &expr.Verdict{Kind: expr.VerdictDrop},
  1645. }
  1646. // TODO(andrew-d): iptables-nft writes a rule that dumps as:
  1647. //
  1648. // match name conntrack rev 3
  1649. //
  1650. // I think this is using expr.Match against the following struct
  1651. // (xt_conntrack_mtinfo3):
  1652. //
  1653. // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/xt_conntrack.h#L64-L77
  1654. //
  1655. // We could probably do something similar here, but I'm not sure if
  1656. // there's any advantage. Below is an example Match statement if we
  1657. // decide to do that, based on dumping the rule that iptables-nft
  1658. // generates:
  1659. //
  1660. // _ = expr.Match{
  1661. // Name: "conntrack",
  1662. // Rev: 3,
  1663. // Info: &xt.ConntrackMtinfo3{
  1664. // ConntrackMtinfo2: xt.ConntrackMtinfo2{
  1665. // ConntrackMtinfoBase: xt.ConntrackMtinfoBase{
  1666. // MatchFlags: xt.ConntrackState,
  1667. // InvertFlags: xt.ConntrackState,
  1668. // },
  1669. // // Mask the state to remove ESTABLISHED and
  1670. // // RELATED before comparing.
  1671. // StateMask: expr.CtStateBitESTABLISHED | expr.CtStateBitRELATED,
  1672. // },
  1673. // },
  1674. // }
  1675. }
  1676. // AddStatefulRule adds a netfilter rule for stateful packet filtering using
  1677. // conntrack.
  1678. func (n *nftablesRunner) AddStatefulRule(tunname string) error {
  1679. conn := n.conn
  1680. exprs := makeStatefulRuleExprs(tunname)
  1681. for _, table := range n.getTables() {
  1682. chain, err := getChainFromTable(conn, table.Filter, chainNameForward)
  1683. if err != nil {
  1684. return fmt.Errorf("get forward chain: %w", err)
  1685. }
  1686. // First, find the 'accept' rule that we want to insert our rule before.
  1687. acceptRule := createAcceptOutgoingPacketRule(table.Filter, chain, tunname)
  1688. rule, err := findRule(conn, acceptRule)
  1689. if err != nil {
  1690. return fmt.Errorf("find accept rule: %w", err)
  1691. }
  1692. conn.InsertRule(&nftables.Rule{
  1693. Table: table.Filter,
  1694. Chain: chain,
  1695. Exprs: exprs,
  1696. // Specifying Position in an Insert operation means to
  1697. // insert this rule before the specified rule.
  1698. Position: rule.Handle,
  1699. })
  1700. }
  1701. if err := conn.Flush(); err != nil {
  1702. return fmt.Errorf("flush add stateful rule: %w", err)
  1703. }
  1704. return nil
  1705. }
  1706. // DelStatefulRule removes the netfilter rule for stateful packet filtering
  1707. // using conntrack.
  1708. func (n *nftablesRunner) DelStatefulRule(tunname string) error {
  1709. conn := n.conn
  1710. exprs := makeStatefulRuleExprs(tunname)
  1711. for _, table := range n.getTables() {
  1712. chain, err := getChainFromTable(conn, table.Filter, chainNameForward)
  1713. if err != nil {
  1714. return fmt.Errorf("get forward chain: %w", err)
  1715. }
  1716. rule, err := findRule(conn, &nftables.Rule{
  1717. Table: table.Filter,
  1718. Chain: chain,
  1719. Exprs: exprs,
  1720. })
  1721. if err != nil {
  1722. return fmt.Errorf("find stateful rule: %w", err)
  1723. }
  1724. if rule != nil {
  1725. conn.DelRule(rule)
  1726. }
  1727. }
  1728. if err := conn.Flush(); err != nil {
  1729. return fmt.Errorf("flush del stateful rule: %w", err)
  1730. }
  1731. return nil
  1732. }
  1733. // cleanupChain removes a jump rule from hookChainName to tsChainName, and then
  1734. // the entire chain tsChainName. Errors are logged, but attempts to remove both
  1735. // the jump rule and chain continue even if one errors.
  1736. func cleanupChain(logf logger.Logf, conn *nftables.Conn, table *nftables.Table, hookChainName, tsChainName string) {
  1737. // remove the jump first, before removing the jump destination.
  1738. defaultChain, err := getChainFromTable(conn, table, hookChainName)
  1739. if err != nil && !errors.Is(err, errorChainNotFound{table.Name, hookChainName}) {
  1740. logf("cleanup: did not find default chain: %s", err)
  1741. }
  1742. if !errors.Is(err, errorChainNotFound{table.Name, hookChainName}) {
  1743. // delete hook in convention chain
  1744. _ = delHookRule(conn, table, defaultChain, tsChainName)
  1745. }
  1746. tsChain, err := getChainFromTable(conn, table, tsChainName)
  1747. if err != nil && !errors.Is(err, errorChainNotFound{table.Name, tsChainName}) {
  1748. logf("cleanup: did not find ts-chain: %s", err)
  1749. }
  1750. if tsChain != nil {
  1751. // flush and delete ts-chain
  1752. conn.FlushChain(tsChain)
  1753. conn.DelChain(tsChain)
  1754. err = conn.Flush()
  1755. logf("cleanup: delete and flush chain %s: %s", tsChainName, err)
  1756. }
  1757. }
  1758. // NfTablesCleanUp removes all Tailscale added nftables rules.
  1759. // Any errors that occur are logged to the provided logf.
  1760. func NfTablesCleanUp(logf logger.Logf) {
  1761. conn, err := nftables.New()
  1762. if err != nil {
  1763. logf("cleanup: nftables connection: %s", err)
  1764. }
  1765. tables, err := conn.ListTables() // both v4 and v6
  1766. if err != nil {
  1767. logf("cleanup: list tables: %s", err)
  1768. }
  1769. for _, table := range tables {
  1770. // These table names were used briefly in 1.48.0.
  1771. if table.Name == "ts-filter" || table.Name == "ts-nat" {
  1772. conn.DelTable(table)
  1773. if err := conn.Flush(); err != nil {
  1774. logf("cleanup: flush delete table %s: %s", table.Name, err)
  1775. }
  1776. }
  1777. if table.Name == "filter" {
  1778. cleanupChain(logf, conn, table, "INPUT", chainNameInput)
  1779. cleanupChain(logf, conn, table, "FORWARD", chainNameForward)
  1780. }
  1781. if table.Name == "nat" {
  1782. cleanupChain(logf, conn, table, "POSTROUTING", chainNamePostrouting)
  1783. }
  1784. }
  1785. }
  1786. func snatRule(t *nftables.Table, ch *nftables.Chain, src, dst netip.Addr, meta []byte) *nftables.Rule {
  1787. var daddrOffset, fam, daddrLen uint32
  1788. if dst.Is4() {
  1789. daddrOffset = 16
  1790. daddrLen = 4
  1791. fam = unix.NFPROTO_IPV4
  1792. } else {
  1793. daddrOffset = 24
  1794. daddrLen = 16
  1795. fam = unix.NFPROTO_IPV6
  1796. }
  1797. return &nftables.Rule{
  1798. Table: t,
  1799. Chain: ch,
  1800. Exprs: []expr.Any{
  1801. &expr.Payload{
  1802. DestRegister: 1,
  1803. Base: expr.PayloadBaseNetworkHeader,
  1804. Offset: daddrOffset,
  1805. Len: daddrLen,
  1806. },
  1807. &expr.Cmp{
  1808. Op: expr.CmpOpEq,
  1809. Register: 1,
  1810. Data: dst.AsSlice(),
  1811. },
  1812. &expr.Immediate{
  1813. Register: 1,
  1814. Data: src.AsSlice(),
  1815. },
  1816. &expr.NAT{
  1817. Type: expr.NATTypeSourceNAT,
  1818. Family: fam,
  1819. RegAddrMin: 1,
  1820. RegAddrMax: 1,
  1821. },
  1822. },
  1823. UserData: meta,
  1824. }
  1825. }