tailssh.go 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. //go:build linux || (darwin && !ios) || freebsd || openbsd
  4. // Package tailssh is an SSH server integrated into Tailscale.
  5. package tailssh
  6. import (
  7. "bytes"
  8. "context"
  9. "crypto/rand"
  10. "encoding/json"
  11. "errors"
  12. "fmt"
  13. "io"
  14. "net"
  15. "net/http"
  16. "net/netip"
  17. "net/url"
  18. "os"
  19. "os/exec"
  20. "path/filepath"
  21. "runtime"
  22. "strconv"
  23. "strings"
  24. "sync"
  25. "sync/atomic"
  26. "syscall"
  27. "time"
  28. gossh "github.com/tailscale/golang-x-crypto/ssh"
  29. "tailscale.com/envknob"
  30. "tailscale.com/ipn/ipnlocal"
  31. "tailscale.com/logtail/backoff"
  32. "tailscale.com/net/tsaddr"
  33. "tailscale.com/net/tsdial"
  34. "tailscale.com/sessionrecording"
  35. "tailscale.com/tailcfg"
  36. "tailscale.com/tempfork/gliderlabs/ssh"
  37. "tailscale.com/types/key"
  38. "tailscale.com/types/logger"
  39. "tailscale.com/types/netmap"
  40. "tailscale.com/util/clientmetric"
  41. "tailscale.com/util/httpm"
  42. "tailscale.com/util/mak"
  43. )
  44. var (
  45. sshVerboseLogging = envknob.RegisterBool("TS_DEBUG_SSH_VLOG")
  46. sshDisableSFTP = envknob.RegisterBool("TS_SSH_DISABLE_SFTP")
  47. sshDisableForwarding = envknob.RegisterBool("TS_SSH_DISABLE_FORWARDING")
  48. sshDisablePTY = envknob.RegisterBool("TS_SSH_DISABLE_PTY")
  49. )
  50. const (
  51. // forcePasswordSuffix is the suffix at the end of a username that forces
  52. // Tailscale SSH into password authentication mode to work around buggy SSH
  53. // clients that get confused by successful replies to auth type "none".
  54. forcePasswordSuffix = "+password"
  55. )
  56. // ipnLocalBackend is the subset of ipnlocal.LocalBackend that we use.
  57. // It is used for testing.
  58. type ipnLocalBackend interface {
  59. GetSSH_HostKeys() ([]gossh.Signer, error)
  60. ShouldRunSSH() bool
  61. NetMap() *netmap.NetworkMap
  62. WhoIs(proto string, ipp netip.AddrPort) (n tailcfg.NodeView, u tailcfg.UserProfile, ok bool)
  63. DoNoiseRequest(req *http.Request) (*http.Response, error)
  64. Dialer() *tsdial.Dialer
  65. TailscaleVarRoot() string
  66. NodeKey() key.NodePublic
  67. }
  68. type server struct {
  69. lb ipnLocalBackend
  70. logf logger.Logf
  71. tailscaledPath string
  72. timeNow func() time.Time // or nil for time.Now
  73. sessionWaitGroup sync.WaitGroup
  74. // mu protects the following
  75. mu sync.Mutex
  76. activeConns map[*conn]bool // set; value is always true
  77. shutdownCalled bool
  78. }
  79. func (srv *server) now() time.Time {
  80. if srv != nil && srv.timeNow != nil {
  81. return srv.timeNow()
  82. }
  83. return time.Now()
  84. }
  85. func init() {
  86. ipnlocal.RegisterNewSSHServer(func(logf logger.Logf, lb *ipnlocal.LocalBackend) (ipnlocal.SSHServer, error) {
  87. tsd, err := os.Executable()
  88. if err != nil {
  89. return nil, err
  90. }
  91. srv := &server{
  92. lb: lb,
  93. logf: logf,
  94. tailscaledPath: tsd,
  95. timeNow: func() time.Time {
  96. return lb.ControlNow(time.Now())
  97. },
  98. }
  99. return srv, nil
  100. })
  101. }
  102. // attachSessionToConnIfNotShutdown ensures that srv is not shutdown before
  103. // attaching the session to the conn. This ensures that once Shutdown is called,
  104. // new sessions are not allowed and existing ones are cleaned up.
  105. // It reports whether ss was attached to the conn.
  106. func (srv *server) attachSessionToConnIfNotShutdown(ss *sshSession) bool {
  107. srv.mu.Lock()
  108. defer srv.mu.Unlock()
  109. if srv.shutdownCalled {
  110. // Do not start any new sessions.
  111. return false
  112. }
  113. ss.conn.attachSession(ss)
  114. return true
  115. }
  116. func (srv *server) trackActiveConn(c *conn, add bool) {
  117. srv.mu.Lock()
  118. defer srv.mu.Unlock()
  119. if add {
  120. mak.Set(&srv.activeConns, c, true)
  121. return
  122. }
  123. delete(srv.activeConns, c)
  124. }
  125. // NumActiveConns returns the number of active SSH connections.
  126. func (srv *server) NumActiveConns() int {
  127. srv.mu.Lock()
  128. defer srv.mu.Unlock()
  129. return len(srv.activeConns)
  130. }
  131. // HandleSSHConn handles a Tailscale SSH connection from c.
  132. // This is the entry point for all SSH connections.
  133. // When this returns, the connection is closed.
  134. func (srv *server) HandleSSHConn(nc net.Conn) error {
  135. metricIncomingConnections.Add(1)
  136. c, err := srv.newConn()
  137. if err != nil {
  138. return err
  139. }
  140. srv.trackActiveConn(c, true) // add
  141. defer srv.trackActiveConn(c, false) // remove
  142. c.HandleConn(nc)
  143. // Return nil to signal to netstack's interception that it doesn't need to
  144. // log. If ss.HandleConn had problems, it can log itself (ideally on an
  145. // sshSession.logf).
  146. return nil
  147. }
  148. // Shutdown terminates all active sessions.
  149. func (srv *server) Shutdown() {
  150. srv.mu.Lock()
  151. srv.shutdownCalled = true
  152. for c := range srv.activeConns {
  153. c.Close()
  154. }
  155. srv.mu.Unlock()
  156. srv.sessionWaitGroup.Wait()
  157. }
  158. // OnPolicyChange terminates any active sessions that no longer match
  159. // the SSH access policy.
  160. func (srv *server) OnPolicyChange() {
  161. srv.mu.Lock()
  162. defer srv.mu.Unlock()
  163. for c := range srv.activeConns {
  164. if c.info == nil {
  165. // c.info is nil when the connection hasn't been authenticated yet.
  166. // In that case, the connection will be terminated when it is.
  167. continue
  168. }
  169. go c.checkStillValid()
  170. }
  171. }
  172. // conn represents a single SSH connection and its associated
  173. // ssh.Server.
  174. //
  175. // During the lifecycle of a connection, the following are called in order:
  176. // Setup and discover server info
  177. // - ServerConfigCallback
  178. //
  179. // Do the user auth
  180. // - NoClientAuthHandler
  181. //
  182. // Once auth is done, the conn can be multiplexed with multiple sessions and
  183. // channels concurrently. At which point any of the following can be called
  184. // in any order.
  185. // - c.handleSessionPostSSHAuth
  186. // - c.mayForwardLocalPortTo followed by ssh.DirectTCPIPHandler
  187. type conn struct {
  188. *ssh.Server
  189. srv *server
  190. insecureSkipTailscaleAuth bool // used by tests.
  191. // idH is the RFC4253 sec8 hash H. It is used to identify the connection,
  192. // and is shared among all sessions. It should not be shared outside
  193. // process. It is confusingly referred to as SessionID by the gliderlabs/ssh
  194. // library.
  195. idH string
  196. connID string // ID that's shared with control
  197. // anyPasswordIsOkay is whether the client is authorized but has requested
  198. // password-based auth to work around their buggy SSH client. When set, we
  199. // accept any password in the PasswordHandler.
  200. anyPasswordIsOkay bool // set by NoClientAuthCallback
  201. action0 *tailcfg.SSHAction // set by doPolicyAuth; first matching action
  202. currentAction *tailcfg.SSHAction // set by doPolicyAuth, updated by resolveNextAction
  203. finalAction *tailcfg.SSHAction // set by doPolicyAuth or resolveNextAction
  204. finalActionErr error // set by doPolicyAuth or resolveNextAction
  205. info *sshConnInfo // set by setInfo
  206. localUser *userMeta // set by doPolicyAuth
  207. userGroupIDs []string // set by doPolicyAuth
  208. acceptEnv []string
  209. // mu protects the following fields.
  210. //
  211. // srv.mu should be acquired prior to mu.
  212. // It is safe to just acquire mu, but unsafe to
  213. // acquire mu and then srv.mu.
  214. mu sync.Mutex // protects the following
  215. sessions []*sshSession
  216. }
  217. func (c *conn) logf(format string, args ...any) {
  218. format = fmt.Sprintf("%v: %v", c.connID, format)
  219. c.srv.logf(format, args...)
  220. }
  221. func (c *conn) vlogf(format string, args ...any) {
  222. if sshVerboseLogging() {
  223. c.logf(format, args...)
  224. }
  225. }
  226. // isAuthorized walks through the action chain and returns nil if the connection
  227. // is authorized. If the connection is not authorized, it returns
  228. // errDenied. If the action chain resolution fails, it returns the
  229. // resolution error.
  230. func (c *conn) isAuthorized(ctx ssh.Context) error {
  231. action := c.currentAction
  232. for {
  233. if action.Accept {
  234. return nil
  235. }
  236. if action.Reject || action.HoldAndDelegate == "" {
  237. return errDenied
  238. }
  239. var err error
  240. action, err = c.resolveNextAction(ctx)
  241. if err != nil {
  242. return err
  243. }
  244. if action.Message != "" {
  245. if err := ctx.SendAuthBanner(action.Message); err != nil {
  246. return err
  247. }
  248. }
  249. }
  250. }
  251. // errDenied is returned by auth callbacks when a connection is denied by the
  252. // policy.
  253. var errDenied = errors.New("ssh: access denied")
  254. // NoClientAuthCallback implements gossh.NoClientAuthCallback and is called by
  255. // the ssh.Server when the client first connects with the "none"
  256. // authentication method.
  257. //
  258. // It is responsible for continuing policy evaluation from BannerCallback (or
  259. // starting it afresh). It returns an error if the policy evaluation fails, or
  260. // if the decision is "reject"
  261. //
  262. // It either returns nil (accept) or errDenied (reject). The errors may be wrapped.
  263. func (c *conn) NoClientAuthCallback(ctx ssh.Context) error {
  264. if c.insecureSkipTailscaleAuth {
  265. return nil
  266. }
  267. if err := c.doPolicyAuth(ctx); err != nil {
  268. return err
  269. }
  270. if err := c.isAuthorized(ctx); err != nil {
  271. return err
  272. }
  273. // Let users specify a username ending in +password to force password auth.
  274. // This exists for buggy SSH clients that get confused by success from
  275. // "none" auth.
  276. if strings.HasSuffix(ctx.User(), forcePasswordSuffix) {
  277. c.anyPasswordIsOkay = true
  278. return errors.New("any password please") // not shown to users
  279. }
  280. return nil
  281. }
  282. func (c *conn) nextAuthMethodCallback(cm gossh.ConnMetadata, prevErrors []error) (nextMethod []string) {
  283. switch {
  284. case c.anyPasswordIsOkay:
  285. nextMethod = append(nextMethod, "password")
  286. }
  287. // The fake "tailscale" method is always appended to next so OpenSSH renders
  288. // that in parens as the final failure. (It also shows up in "ssh -v", etc)
  289. nextMethod = append(nextMethod, "tailscale")
  290. return
  291. }
  292. // fakePasswordHandler is our implementation of the PasswordHandler hook that
  293. // checks whether the user's password is correct. But we don't actually use
  294. // passwords. This exists only for when the user's username ends in "+password"
  295. // to signal that their SSH client is buggy and gets confused by auth type
  296. // "none" succeeding and they want our SSH server to require a dummy password
  297. // prompt instead. We then accept any password since we've already authenticated
  298. // & authorized them.
  299. func (c *conn) fakePasswordHandler(ctx ssh.Context, password string) bool {
  300. return c.anyPasswordIsOkay
  301. }
  302. // doPolicyAuth verifies that conn can proceed.
  303. // It returns nil if the matching policy action is Accept or
  304. // HoldAndDelegate. Otherwise, it returns errDenied.
  305. func (c *conn) doPolicyAuth(ctx ssh.Context) error {
  306. if err := c.setInfo(ctx); err != nil {
  307. c.logf("failed to get conninfo: %v", err)
  308. return errDenied
  309. }
  310. a, localUser, acceptEnv, err := c.evaluatePolicy()
  311. if err != nil {
  312. return fmt.Errorf("%w: %v", errDenied, err)
  313. }
  314. c.action0 = a
  315. c.currentAction = a
  316. c.acceptEnv = acceptEnv
  317. if a.Message != "" {
  318. if err := ctx.SendAuthBanner(a.Message); err != nil {
  319. return fmt.Errorf("SendBanner: %w", err)
  320. }
  321. }
  322. if a.Accept || a.HoldAndDelegate != "" {
  323. if a.Accept {
  324. c.finalAction = a
  325. }
  326. lu, err := userLookup(localUser)
  327. if err != nil {
  328. c.logf("failed to look up %v: %v", localUser, err)
  329. ctx.SendAuthBanner(fmt.Sprintf("failed to look up %v\r\n", localUser))
  330. return err
  331. }
  332. gids, err := lu.GroupIds()
  333. if err != nil {
  334. c.logf("failed to look up local user's group IDs: %v", err)
  335. return err
  336. }
  337. c.userGroupIDs = gids
  338. c.localUser = lu
  339. return nil
  340. }
  341. if a.Reject {
  342. c.finalAction = a
  343. return errDenied
  344. }
  345. // Shouldn't get here, but:
  346. return errDenied
  347. }
  348. // ServerConfig implements ssh.ServerConfigCallback.
  349. func (c *conn) ServerConfig(ctx ssh.Context) *gossh.ServerConfig {
  350. return &gossh.ServerConfig{
  351. NoClientAuth: true, // required for the NoClientAuthCallback to run
  352. NextAuthMethodCallback: c.nextAuthMethodCallback,
  353. }
  354. }
  355. func (srv *server) newConn() (*conn, error) {
  356. srv.mu.Lock()
  357. if srv.shutdownCalled {
  358. srv.mu.Unlock()
  359. // Stop accepting new connections.
  360. // Connections in the auth phase are handled in handleConnPostSSHAuth.
  361. // Existing sessions are terminated by Shutdown.
  362. return nil, errDenied
  363. }
  364. srv.mu.Unlock()
  365. c := &conn{srv: srv}
  366. now := srv.now()
  367. c.connID = fmt.Sprintf("ssh-conn-%s-%02x", now.UTC().Format("20060102T150405"), randBytes(5))
  368. fwdHandler := &ssh.ForwardedTCPHandler{}
  369. c.Server = &ssh.Server{
  370. Version: "Tailscale",
  371. ServerConfigCallback: c.ServerConfig,
  372. NoClientAuthHandler: c.NoClientAuthCallback,
  373. PasswordHandler: c.fakePasswordHandler,
  374. Handler: c.handleSessionPostSSHAuth,
  375. LocalPortForwardingCallback: c.mayForwardLocalPortTo,
  376. ReversePortForwardingCallback: c.mayReversePortForwardTo,
  377. SubsystemHandlers: map[string]ssh.SubsystemHandler{
  378. "sftp": c.handleSessionPostSSHAuth,
  379. },
  380. // Note: the direct-tcpip channel handler and LocalPortForwardingCallback
  381. // only adds support for forwarding ports from the local machine.
  382. // TODO(maisem/bradfitz): add remote port forwarding support.
  383. ChannelHandlers: map[string]ssh.ChannelHandler{
  384. "direct-tcpip": ssh.DirectTCPIPHandler,
  385. },
  386. RequestHandlers: map[string]ssh.RequestHandler{
  387. "tcpip-forward": fwdHandler.HandleSSHRequest,
  388. "cancel-tcpip-forward": fwdHandler.HandleSSHRequest,
  389. },
  390. }
  391. ss := c.Server
  392. for k, v := range ssh.DefaultRequestHandlers {
  393. ss.RequestHandlers[k] = v
  394. }
  395. for k, v := range ssh.DefaultChannelHandlers {
  396. ss.ChannelHandlers[k] = v
  397. }
  398. for k, v := range ssh.DefaultSubsystemHandlers {
  399. ss.SubsystemHandlers[k] = v
  400. }
  401. keys, err := srv.lb.GetSSH_HostKeys()
  402. if err != nil {
  403. return nil, err
  404. }
  405. for _, signer := range keys {
  406. ss.AddHostKey(signer)
  407. }
  408. return c, nil
  409. }
  410. // mayReversePortPortForwardTo reports whether the ctx should be allowed to port forward
  411. // to the specified host and port.
  412. // TODO(bradfitz/maisem): should we have more checks on host/port?
  413. func (c *conn) mayReversePortForwardTo(ctx ssh.Context, destinationHost string, destinationPort uint32) bool {
  414. if sshDisableForwarding() {
  415. return false
  416. }
  417. if c.finalAction != nil && c.finalAction.AllowRemotePortForwarding {
  418. metricRemotePortForward.Add(1)
  419. return true
  420. }
  421. return false
  422. }
  423. // mayForwardLocalPortTo reports whether the ctx should be allowed to port forward
  424. // to the specified host and port.
  425. // TODO(bradfitz/maisem): should we have more checks on host/port?
  426. func (c *conn) mayForwardLocalPortTo(ctx ssh.Context, destinationHost string, destinationPort uint32) bool {
  427. if sshDisableForwarding() {
  428. return false
  429. }
  430. if c.finalAction != nil && c.finalAction.AllowLocalPortForwarding {
  431. metricLocalPortForward.Add(1)
  432. return true
  433. }
  434. return false
  435. }
  436. // sshPolicy returns the SSHPolicy for current node.
  437. // If there is no SSHPolicy in the netmap, it returns a debugPolicy
  438. // if one is defined.
  439. func (c *conn) sshPolicy() (_ *tailcfg.SSHPolicy, ok bool) {
  440. lb := c.srv.lb
  441. if !lb.ShouldRunSSH() {
  442. return nil, false
  443. }
  444. nm := lb.NetMap()
  445. if nm == nil {
  446. return nil, false
  447. }
  448. if pol := nm.SSHPolicy; pol != nil && !envknob.SSHIgnoreTailnetPolicy() {
  449. return pol, true
  450. }
  451. debugPolicyFile := envknob.SSHPolicyFile()
  452. if debugPolicyFile != "" {
  453. c.logf("reading debug SSH policy file: %v", debugPolicyFile)
  454. f, err := os.ReadFile(debugPolicyFile)
  455. if err != nil {
  456. c.logf("error reading debug SSH policy file: %v", err)
  457. return nil, false
  458. }
  459. p := new(tailcfg.SSHPolicy)
  460. if err := json.Unmarshal(f, p); err != nil {
  461. c.logf("invalid JSON in %v: %v", debugPolicyFile, err)
  462. return nil, false
  463. }
  464. return p, true
  465. }
  466. return nil, false
  467. }
  468. func toIPPort(a net.Addr) (ipp netip.AddrPort) {
  469. ta, ok := a.(*net.TCPAddr)
  470. if !ok {
  471. return
  472. }
  473. tanetaddr, ok := netip.AddrFromSlice(ta.IP)
  474. if !ok {
  475. return
  476. }
  477. return netip.AddrPortFrom(tanetaddr.Unmap(), uint16(ta.Port))
  478. }
  479. // connInfo returns a populated sshConnInfo from the provided arguments,
  480. // validating only that they represent a known Tailscale identity.
  481. func (c *conn) setInfo(ctx ssh.Context) error {
  482. if c.info != nil {
  483. return nil
  484. }
  485. ci := &sshConnInfo{
  486. sshUser: strings.TrimSuffix(ctx.User(), forcePasswordSuffix),
  487. src: toIPPort(ctx.RemoteAddr()),
  488. dst: toIPPort(ctx.LocalAddr()),
  489. }
  490. if !tsaddr.IsTailscaleIP(ci.dst.Addr()) {
  491. return fmt.Errorf("tailssh: rejecting non-Tailscale local address %v", ci.dst)
  492. }
  493. if !tsaddr.IsTailscaleIP(ci.src.Addr()) {
  494. return fmt.Errorf("tailssh: rejecting non-Tailscale remote address %v", ci.src)
  495. }
  496. node, uprof, ok := c.srv.lb.WhoIs("tcp", ci.src)
  497. if !ok {
  498. return fmt.Errorf("unknown Tailscale identity from src %v", ci.src)
  499. }
  500. ci.node = node
  501. ci.uprof = uprof
  502. c.idH = ctx.SessionID()
  503. c.info = ci
  504. c.logf("handling conn: %v", ci.String())
  505. return nil
  506. }
  507. // evaluatePolicy returns the SSHAction and localUser after evaluating
  508. // the SSHPolicy for this conn.
  509. func (c *conn) evaluatePolicy() (_ *tailcfg.SSHAction, localUser string, acceptEnv []string, _ error) {
  510. pol, ok := c.sshPolicy()
  511. if !ok {
  512. return nil, "", nil, fmt.Errorf("tailssh: rejecting connection; no SSH policy")
  513. }
  514. a, localUser, acceptEnv, ok := c.evalSSHPolicy(pol)
  515. if !ok {
  516. return nil, "", nil, fmt.Errorf("tailssh: rejecting connection; no matching policy")
  517. }
  518. return a, localUser, acceptEnv, nil
  519. }
  520. // handleSessionPostSSHAuth runs an SSH session after the SSH-level authentication,
  521. // but not necessarily before all the Tailscale-level extra verification has
  522. // completed. It also handles SFTP requests.
  523. func (c *conn) handleSessionPostSSHAuth(s ssh.Session) {
  524. // Do this check after auth, but before starting the session.
  525. switch s.Subsystem() {
  526. case "sftp":
  527. if sshDisableSFTP() {
  528. fmt.Fprintf(s.Stderr(), "sftp disabled\r\n")
  529. s.Exit(1)
  530. return
  531. }
  532. metricSFTP.Add(1)
  533. case "":
  534. // Regular SSH session.
  535. default:
  536. fmt.Fprintf(s.Stderr(), "Unsupported subsystem %q\r\n", s.Subsystem())
  537. s.Exit(1)
  538. return
  539. }
  540. ss := c.newSSHSession(s)
  541. ss.logf("handling new SSH connection from %v (%v) to ssh-user %q", c.info.uprof.LoginName, c.info.src.Addr(), c.localUser.Username)
  542. ss.logf("access granted to %v as ssh-user %q", c.info.uprof.LoginName, c.localUser.Username)
  543. ss.run()
  544. }
  545. // resolveNextAction starts at c.currentAction and makes it way through the
  546. // action chain one step at a time. An action without a HoldAndDelegate is
  547. // considered the final action. Once a final action is reached, this function
  548. // will keep returning that action. It updates c.currentAction to the next
  549. // action in the chain. When the final action is reached, it also sets
  550. // c.finalAction to the final action.
  551. func (c *conn) resolveNextAction(sctx ssh.Context) (action *tailcfg.SSHAction, err error) {
  552. if c.finalAction != nil || c.finalActionErr != nil {
  553. return c.finalAction, c.finalActionErr
  554. }
  555. defer func() {
  556. if action != nil {
  557. c.currentAction = action
  558. if action.Accept || action.Reject {
  559. c.finalAction = action
  560. }
  561. }
  562. if err != nil {
  563. c.finalActionErr = err
  564. }
  565. }()
  566. ctx, cancel := context.WithCancel(sctx)
  567. defer cancel()
  568. // Loop processing/fetching Actions until one reaches a
  569. // terminal state (Accept, Reject, or invalid Action), or
  570. // until fetchSSHAction times out due to the context being
  571. // done (client disconnect) or its 30 minute timeout passes.
  572. // (Which is a long time for somebody to see login
  573. // instructions and go to a URL to do something.)
  574. action = c.currentAction
  575. if action.Accept || action.Reject {
  576. if action.Reject {
  577. metricTerminalReject.Add(1)
  578. } else {
  579. metricTerminalAccept.Add(1)
  580. }
  581. return action, nil
  582. }
  583. url := action.HoldAndDelegate
  584. if url == "" {
  585. metricTerminalMalformed.Add(1)
  586. return nil, errors.New("reached Action that lacked Accept, Reject, and HoldAndDelegate")
  587. }
  588. metricHolds.Add(1)
  589. url = c.expandDelegateURLLocked(url)
  590. nextAction, err := c.fetchSSHAction(ctx, url)
  591. if err != nil {
  592. metricTerminalFetchError.Add(1)
  593. return nil, fmt.Errorf("fetching SSHAction from %s: %w", url, err)
  594. }
  595. return nextAction, nil
  596. }
  597. func (c *conn) expandDelegateURLLocked(actionURL string) string {
  598. nm := c.srv.lb.NetMap()
  599. ci := c.info
  600. lu := c.localUser
  601. var dstNodeID string
  602. if nm != nil {
  603. dstNodeID = fmt.Sprint(int64(nm.SelfNode.ID()))
  604. }
  605. return strings.NewReplacer(
  606. "$SRC_NODE_IP", url.QueryEscape(ci.src.Addr().String()),
  607. "$SRC_NODE_ID", fmt.Sprint(int64(ci.node.ID())),
  608. "$DST_NODE_IP", url.QueryEscape(ci.dst.Addr().String()),
  609. "$DST_NODE_ID", dstNodeID,
  610. "$SSH_USER", url.QueryEscape(ci.sshUser),
  611. "$LOCAL_USER", url.QueryEscape(lu.Username),
  612. ).Replace(actionURL)
  613. }
  614. // sshSession is an accepted Tailscale SSH session.
  615. type sshSession struct {
  616. ssh.Session
  617. sharedID string // ID that's shared with control
  618. logf logger.Logf
  619. ctx context.Context
  620. cancelCtx context.CancelCauseFunc
  621. conn *conn
  622. agentListener net.Listener // non-nil if agent-forwarding requested+allowed
  623. // initialized by launchProcess:
  624. cmd *exec.Cmd
  625. wrStdin io.WriteCloser
  626. rdStdout io.ReadCloser
  627. rdStderr io.ReadCloser // rdStderr is nil for pty sessions
  628. ptyReq *ssh.Pty // non-nil for pty sessions
  629. // childPipes is a list of pipes that need to be closed when the process exits.
  630. // For pty sessions, this is the tty fd.
  631. // For non-pty sessions, this is the stdin, stdout, stderr fds.
  632. childPipes []io.Closer
  633. // We use this sync.Once to ensure that we only terminate the process once,
  634. // either it exits itself or is terminated
  635. exitOnce sync.Once
  636. }
  637. func (ss *sshSession) vlogf(format string, args ...any) {
  638. if sshVerboseLogging() {
  639. ss.logf(format, args...)
  640. }
  641. }
  642. func (c *conn) newSSHSession(s ssh.Session) *sshSession {
  643. sharedID := fmt.Sprintf("sess-%s-%02x", c.srv.now().UTC().Format("20060102T150405"), randBytes(5))
  644. c.logf("starting session: %v", sharedID)
  645. ctx, cancel := context.WithCancelCause(s.Context())
  646. return &sshSession{
  647. Session: s,
  648. sharedID: sharedID,
  649. ctx: ctx,
  650. cancelCtx: cancel,
  651. conn: c,
  652. logf: logger.WithPrefix(c.srv.logf, "ssh-session("+sharedID+"): "),
  653. }
  654. }
  655. // isStillValid reports whether the conn is still valid.
  656. func (c *conn) isStillValid() bool {
  657. a, localUser, _, err := c.evaluatePolicy()
  658. c.vlogf("stillValid: %+v %v %v", a, localUser, err)
  659. if err != nil {
  660. return false
  661. }
  662. if !a.Accept && a.HoldAndDelegate == "" {
  663. return false
  664. }
  665. return c.localUser.Username == localUser
  666. }
  667. // checkStillValid checks that the conn is still valid per the latest SSHPolicy.
  668. // If not, it terminates all sessions associated with the conn.
  669. func (c *conn) checkStillValid() {
  670. if c.isStillValid() {
  671. return
  672. }
  673. metricPolicyChangeKick.Add(1)
  674. c.logf("session no longer valid per new SSH policy; closing")
  675. c.mu.Lock()
  676. defer c.mu.Unlock()
  677. for _, s := range c.sessions {
  678. s.cancelCtx(userVisibleError{
  679. fmt.Sprintf("Access revoked.\r\n"),
  680. context.Canceled,
  681. })
  682. }
  683. }
  684. func (c *conn) fetchSSHAction(ctx context.Context, url string) (*tailcfg.SSHAction, error) {
  685. ctx, cancel := context.WithTimeout(ctx, 30*time.Minute)
  686. defer cancel()
  687. bo := backoff.NewBackoff("fetch-ssh-action", c.logf, 10*time.Second)
  688. for {
  689. if err := ctx.Err(); err != nil {
  690. return nil, err
  691. }
  692. req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
  693. if err != nil {
  694. return nil, err
  695. }
  696. res, err := c.srv.lb.DoNoiseRequest(req)
  697. if err != nil {
  698. bo.BackOff(ctx, err)
  699. continue
  700. }
  701. if res.StatusCode != 200 {
  702. body, _ := io.ReadAll(res.Body)
  703. res.Body.Close()
  704. if len(body) > 1<<10 {
  705. body = body[:1<<10]
  706. }
  707. c.logf("fetch of %v: %s, %s", url, res.Status, body)
  708. bo.BackOff(ctx, fmt.Errorf("unexpected status: %v", res.Status))
  709. continue
  710. }
  711. a := new(tailcfg.SSHAction)
  712. err = json.NewDecoder(res.Body).Decode(a)
  713. res.Body.Close()
  714. if err != nil {
  715. c.logf("invalid next SSHAction JSON from %v: %v", url, err)
  716. bo.BackOff(ctx, err)
  717. continue
  718. }
  719. return a, nil
  720. }
  721. }
  722. // killProcessOnContextDone waits for ss.ctx to be done and kills the process,
  723. // unless the process has already exited.
  724. func (ss *sshSession) killProcessOnContextDone() {
  725. <-ss.ctx.Done()
  726. // Either the process has already exited, in which case this does nothing.
  727. // Or, the process is still running in which case this will kill it.
  728. ss.exitOnce.Do(func() {
  729. err := context.Cause(ss.ctx)
  730. if serr, ok := err.(SSHTerminationError); ok {
  731. msg := serr.SSHTerminationMessage()
  732. if msg != "" {
  733. io.WriteString(ss.Stderr(), "\r\n\r\n"+msg+"\r\n\r\n")
  734. }
  735. }
  736. ss.logf("terminating SSH session from %v: %v", ss.conn.info.src.Addr(), err)
  737. // We don't need to Process.Wait here, sshSession.run() does
  738. // the waiting regardless of termination reason.
  739. // TODO(maisem): should this be a SIGTERM followed by a SIGKILL?
  740. ss.cmd.Process.Kill()
  741. })
  742. }
  743. // attachSession registers ss as an active session.
  744. func (c *conn) attachSession(ss *sshSession) {
  745. c.srv.sessionWaitGroup.Add(1)
  746. if ss.sharedID == "" {
  747. panic("empty sharedID")
  748. }
  749. c.mu.Lock()
  750. defer c.mu.Unlock()
  751. c.sessions = append(c.sessions, ss)
  752. }
  753. // detachSession unregisters s from the list of active sessions.
  754. func (c *conn) detachSession(ss *sshSession) {
  755. defer c.srv.sessionWaitGroup.Done()
  756. c.mu.Lock()
  757. defer c.mu.Unlock()
  758. for i, s := range c.sessions {
  759. if s == ss {
  760. c.sessions = append(c.sessions[:i], c.sessions[i+1:]...)
  761. break
  762. }
  763. }
  764. }
  765. var errSessionDone = errors.New("session is done")
  766. // handleSSHAgentForwarding starts a Unix socket listener and in the background
  767. // forwards agent connections between the listener and the ssh.Session.
  768. // On success, it assigns ss.agentListener.
  769. func (ss *sshSession) handleSSHAgentForwarding(s ssh.Session, lu *userMeta) error {
  770. if !ssh.AgentRequested(ss) || !ss.conn.finalAction.AllowAgentForwarding {
  771. return nil
  772. }
  773. if sshDisableForwarding() {
  774. // TODO(bradfitz): or do we want to return an error here instead so the user
  775. // gets an error if they ran with ssh -A? But for now we just silently
  776. // don't work, like the condition above.
  777. return nil
  778. }
  779. ss.logf("ssh: agent forwarding requested")
  780. ln, err := ssh.NewAgentListener()
  781. if err != nil {
  782. return err
  783. }
  784. defer func() {
  785. if err != nil && ln != nil {
  786. ln.Close()
  787. }
  788. }()
  789. uid, err := strconv.ParseUint(lu.Uid, 10, 32)
  790. if err != nil {
  791. return err
  792. }
  793. gid, err := strconv.ParseUint(lu.Gid, 10, 32)
  794. if err != nil {
  795. return err
  796. }
  797. socket := ln.Addr().String()
  798. dir := filepath.Dir(socket)
  799. // Make sure the socket is accessible only by the user.
  800. if err := os.Chmod(socket, 0600); err != nil {
  801. return err
  802. }
  803. if err := os.Chown(socket, int(uid), int(gid)); err != nil {
  804. return err
  805. }
  806. // Make sure the dir is also accessible.
  807. if err := os.Chmod(dir, 0755); err != nil {
  808. return err
  809. }
  810. go ssh.ForwardAgentConnections(ln, s)
  811. ss.agentListener = ln
  812. return nil
  813. }
  814. // run is the entrypoint for a newly accepted SSH session.
  815. //
  816. // It handles ss once it's been accepted and determined
  817. // that it should run.
  818. func (ss *sshSession) run() {
  819. metricActiveSessions.Add(1)
  820. defer metricActiveSessions.Add(-1)
  821. defer ss.cancelCtx(errSessionDone)
  822. if attached := ss.conn.srv.attachSessionToConnIfNotShutdown(ss); !attached {
  823. fmt.Fprintf(ss, "Tailscale SSH is shutting down\r\n")
  824. ss.Exit(1)
  825. return
  826. }
  827. defer ss.conn.detachSession(ss)
  828. lu := ss.conn.localUser
  829. logf := ss.logf
  830. if ss.conn.finalAction.SessionDuration != 0 {
  831. t := time.AfterFunc(ss.conn.finalAction.SessionDuration, func() {
  832. ss.cancelCtx(userVisibleError{
  833. fmt.Sprintf("Session timeout of %v elapsed.", ss.conn.finalAction.SessionDuration),
  834. context.DeadlineExceeded,
  835. })
  836. })
  837. defer t.Stop()
  838. }
  839. if euid := os.Geteuid(); euid != 0 {
  840. if lu.Uid != fmt.Sprint(euid) {
  841. ss.logf("can't switch to user %q from process euid %v", lu.Username, euid)
  842. fmt.Fprintf(ss, "can't switch user\r\n")
  843. ss.Exit(1)
  844. return
  845. }
  846. }
  847. // Take control of the PTY so that we can configure it below.
  848. // See https://github.com/tailscale/tailscale/issues/4146
  849. ss.DisablePTYEmulation()
  850. var rec *recording // or nil if disabled
  851. if ss.Subsystem() != "sftp" {
  852. if err := ss.handleSSHAgentForwarding(ss, lu); err != nil {
  853. ss.logf("agent forwarding failed: %v", err)
  854. } else if ss.agentListener != nil {
  855. // TODO(maisem/bradfitz): add a way to close all session resources
  856. defer ss.agentListener.Close()
  857. }
  858. if ss.shouldRecord() {
  859. var err error
  860. rec, err = ss.startNewRecording()
  861. if err != nil {
  862. var uve userVisibleError
  863. if errors.As(err, &uve) {
  864. fmt.Fprintf(ss, "%s\r\n", uve.SSHTerminationMessage())
  865. } else {
  866. fmt.Fprintf(ss, "can't start new recording\r\n")
  867. }
  868. ss.logf("startNewRecording: %v", err)
  869. ss.Exit(1)
  870. return
  871. }
  872. ss.logf("startNewRecording: <nil>")
  873. if rec != nil {
  874. defer rec.Close()
  875. }
  876. }
  877. }
  878. err := ss.launchProcess()
  879. if err != nil {
  880. logf("start failed: %v", err.Error())
  881. if errors.Is(err, context.Canceled) {
  882. err := context.Cause(ss.ctx)
  883. var uve userVisibleError
  884. if errors.As(err, &uve) {
  885. fmt.Fprintf(ss, "%s\r\n", uve)
  886. }
  887. }
  888. ss.Exit(1)
  889. return
  890. }
  891. go ss.killProcessOnContextDone()
  892. var processDone atomic.Bool
  893. go func() {
  894. defer ss.wrStdin.Close()
  895. if _, err := io.Copy(rec.writer("i", ss.wrStdin), ss); err != nil {
  896. logf("stdin copy: %v", err)
  897. ss.cancelCtx(err)
  898. }
  899. }()
  900. outputDone := make(chan struct{})
  901. var openOutputStreams atomic.Int32
  902. if ss.rdStderr != nil {
  903. openOutputStreams.Store(2)
  904. } else {
  905. openOutputStreams.Store(1)
  906. }
  907. go func() {
  908. defer ss.rdStdout.Close()
  909. _, err := io.Copy(rec.writer("o", ss), ss.rdStdout)
  910. if err != nil && !errors.Is(err, io.EOF) {
  911. isErrBecauseProcessExited := processDone.Load() && errors.Is(err, syscall.EIO)
  912. if !isErrBecauseProcessExited {
  913. logf("stdout copy: %v", err)
  914. ss.cancelCtx(err)
  915. }
  916. }
  917. if openOutputStreams.Add(-1) == 0 {
  918. ss.CloseWrite()
  919. close(outputDone)
  920. }
  921. }()
  922. // rdStderr is nil for ptys.
  923. if ss.rdStderr != nil {
  924. go func() {
  925. defer ss.rdStderr.Close()
  926. _, err := io.Copy(ss.Stderr(), ss.rdStderr)
  927. if err != nil {
  928. logf("stderr copy: %v", err)
  929. }
  930. if openOutputStreams.Add(-1) == 0 {
  931. ss.CloseWrite()
  932. close(outputDone)
  933. }
  934. }()
  935. }
  936. err = ss.cmd.Wait()
  937. processDone.Store(true)
  938. // This will either make the SSH Termination goroutine be a no-op,
  939. // or itself will be a no-op because the process was killed by the
  940. // aforementioned goroutine.
  941. ss.exitOnce.Do(func() {})
  942. // Close the process-side of all pipes to signal the asynchronous
  943. // io.Copy routines reading/writing from the pipes to terminate.
  944. // Block for the io.Copy to finish before calling ss.Exit below.
  945. closeAll(ss.childPipes...)
  946. select {
  947. case <-outputDone:
  948. case <-ss.ctx.Done():
  949. }
  950. if err == nil {
  951. ss.logf("Session complete")
  952. ss.Exit(0)
  953. return
  954. }
  955. if ee, ok := err.(*exec.ExitError); ok {
  956. code := ee.ProcessState.ExitCode()
  957. ss.logf("Wait: code=%v", code)
  958. ss.Exit(code)
  959. return
  960. }
  961. ss.logf("Wait: %v", err)
  962. ss.Exit(1)
  963. return
  964. }
  965. // recordSSHToLocalDisk is a deprecated dev knob to allow recording SSH sessions
  966. // to local storage. It is only used if there is no recording configured by the
  967. // coordination server. This will be removed in the future.
  968. var recordSSHToLocalDisk = envknob.RegisterBool("TS_DEBUG_LOG_SSH")
  969. // recorders returns the list of recorders to use for this session.
  970. // If the final action has a non-empty list of recorders, that list is
  971. // returned. Otherwise, the list of recorders from the initial action
  972. // is returned.
  973. func (ss *sshSession) recorders() ([]netip.AddrPort, *tailcfg.SSHRecorderFailureAction) {
  974. if len(ss.conn.finalAction.Recorders) > 0 {
  975. return ss.conn.finalAction.Recorders, ss.conn.finalAction.OnRecordingFailure
  976. }
  977. return ss.conn.action0.Recorders, ss.conn.action0.OnRecordingFailure
  978. }
  979. func (ss *sshSession) shouldRecord() bool {
  980. recs, _ := ss.recorders()
  981. return len(recs) > 0 || recordSSHToLocalDisk()
  982. }
  983. type sshConnInfo struct {
  984. // sshUser is the requested local SSH username ("root", "alice", etc).
  985. sshUser string
  986. // src is the Tailscale IP and port that the connection came from.
  987. src netip.AddrPort
  988. // dst is the Tailscale IP and port that the connection came for.
  989. dst netip.AddrPort
  990. // node is srcIP's node.
  991. node tailcfg.NodeView
  992. // uprof is node's UserProfile.
  993. uprof tailcfg.UserProfile
  994. }
  995. func (ci *sshConnInfo) String() string {
  996. return fmt.Sprintf("%v->%v@%v", ci.src, ci.sshUser, ci.dst)
  997. }
  998. func (c *conn) ruleExpired(r *tailcfg.SSHRule) bool {
  999. if r.RuleExpires == nil {
  1000. return false
  1001. }
  1002. return r.RuleExpires.Before(c.srv.now())
  1003. }
  1004. func (c *conn) evalSSHPolicy(pol *tailcfg.SSHPolicy) (a *tailcfg.SSHAction, localUser string, acceptEnv []string, ok bool) {
  1005. for _, r := range pol.Rules {
  1006. if a, localUser, acceptEnv, err := c.matchRule(r); err == nil {
  1007. return a, localUser, acceptEnv, true
  1008. }
  1009. }
  1010. return nil, "", nil, false
  1011. }
  1012. // internal errors for testing; they don't escape to callers or logs.
  1013. var (
  1014. errNilRule = errors.New("nil rule")
  1015. errNilAction = errors.New("nil action")
  1016. errRuleExpired = errors.New("rule expired")
  1017. errPrincipalMatch = errors.New("principal didn't match")
  1018. errUserMatch = errors.New("user didn't match")
  1019. errInvalidConn = errors.New("invalid connection state")
  1020. )
  1021. func (c *conn) matchRule(r *tailcfg.SSHRule) (a *tailcfg.SSHAction, localUser string, acceptEnv []string, err error) {
  1022. defer func() {
  1023. c.vlogf("matchRule(%+v): %v", r, err)
  1024. }()
  1025. if c == nil {
  1026. return nil, "", nil, errInvalidConn
  1027. }
  1028. if c.info == nil {
  1029. c.logf("invalid connection state")
  1030. return nil, "", nil, errInvalidConn
  1031. }
  1032. if r == nil {
  1033. return nil, "", nil, errNilRule
  1034. }
  1035. if r.Action == nil {
  1036. return nil, "", nil, errNilAction
  1037. }
  1038. if c.ruleExpired(r) {
  1039. return nil, "", nil, errRuleExpired
  1040. }
  1041. if !r.Action.Reject {
  1042. // For all but Reject rules, SSHUsers is required.
  1043. // If SSHUsers is nil or empty, mapLocalUser will return an
  1044. // empty string anyway.
  1045. localUser = mapLocalUser(r.SSHUsers, c.info.sshUser)
  1046. if localUser == "" {
  1047. return nil, "", nil, errUserMatch
  1048. }
  1049. }
  1050. if !c.anyPrincipalMatches(r.Principals) {
  1051. return nil, "", nil, errPrincipalMatch
  1052. }
  1053. return r.Action, localUser, r.AcceptEnv, nil
  1054. }
  1055. func mapLocalUser(ruleSSHUsers map[string]string, reqSSHUser string) (localUser string) {
  1056. v, ok := ruleSSHUsers[reqSSHUser]
  1057. if !ok {
  1058. v = ruleSSHUsers["*"]
  1059. }
  1060. if v == "=" {
  1061. return reqSSHUser
  1062. }
  1063. return v
  1064. }
  1065. func (c *conn) anyPrincipalMatches(ps []*tailcfg.SSHPrincipal) bool {
  1066. for _, p := range ps {
  1067. if p == nil {
  1068. continue
  1069. }
  1070. if c.principalMatchesTailscaleIdentity(p) {
  1071. return true
  1072. }
  1073. }
  1074. return false
  1075. }
  1076. // principalMatchesTailscaleIdentity reports whether one of p's four fields
  1077. // that match the Tailscale identity match (Node, NodeIP, UserLogin, Any).
  1078. func (c *conn) principalMatchesTailscaleIdentity(p *tailcfg.SSHPrincipal) bool {
  1079. ci := c.info
  1080. if p.Any {
  1081. return true
  1082. }
  1083. if !p.Node.IsZero() && ci.node.Valid() && p.Node == ci.node.StableID() {
  1084. return true
  1085. }
  1086. if p.NodeIP != "" {
  1087. if ip, _ := netip.ParseAddr(p.NodeIP); ip == ci.src.Addr() {
  1088. return true
  1089. }
  1090. }
  1091. if p.UserLogin != "" && ci.uprof.LoginName == p.UserLogin {
  1092. return true
  1093. }
  1094. return false
  1095. }
  1096. func randBytes(n int) []byte {
  1097. b := make([]byte, n)
  1098. if _, err := rand.Read(b); err != nil {
  1099. panic(err)
  1100. }
  1101. return b
  1102. }
  1103. func (ss *sshSession) openFileForRecording(now time.Time) (_ io.WriteCloser, err error) {
  1104. varRoot := ss.conn.srv.lb.TailscaleVarRoot()
  1105. if varRoot == "" {
  1106. return nil, errors.New("no var root for recording storage")
  1107. }
  1108. dir := filepath.Join(varRoot, "ssh-sessions")
  1109. if err := os.MkdirAll(dir, 0700); err != nil {
  1110. return nil, err
  1111. }
  1112. f, err := os.CreateTemp(dir, fmt.Sprintf("ssh-session-%v-*.cast", now.UnixNano()))
  1113. if err != nil {
  1114. return nil, err
  1115. }
  1116. return f, nil
  1117. }
  1118. // startNewRecording starts a new SSH session recording.
  1119. // It may return a nil recording if recording is not available.
  1120. func (ss *sshSession) startNewRecording() (_ *recording, err error) {
  1121. // We store the node key as soon as possible when creating
  1122. // a new recording incase of FUS.
  1123. nodeKey := ss.conn.srv.lb.NodeKey()
  1124. if nodeKey.IsZero() {
  1125. return nil, errors.New("ssh server is unavailable: no node key")
  1126. }
  1127. recorders, onFailure := ss.recorders()
  1128. var localRecording bool
  1129. if len(recorders) == 0 {
  1130. if recordSSHToLocalDisk() {
  1131. localRecording = true
  1132. } else {
  1133. return nil, errors.New("no recorders configured")
  1134. }
  1135. }
  1136. var w ssh.Window
  1137. if ptyReq, _, isPtyReq := ss.Pty(); isPtyReq {
  1138. w = ptyReq.Window
  1139. }
  1140. term := envValFromList(ss.Environ(), "TERM")
  1141. if term == "" {
  1142. term = "xterm-256color" // something non-empty
  1143. }
  1144. now := time.Now()
  1145. rec := &recording{
  1146. ss: ss,
  1147. start: now,
  1148. failOpen: onFailure == nil || onFailure.TerminateSessionWithMessage == "",
  1149. }
  1150. // We want to use a background context for uploading and not ss.ctx.
  1151. // ss.ctx is closed when the session closes, but we don't want to break the upload at that time.
  1152. // Instead we want to wait for the session to close the writer when it finishes.
  1153. ctx := context.Background()
  1154. if localRecording {
  1155. rec.out, err = ss.openFileForRecording(now)
  1156. if err != nil {
  1157. return nil, err
  1158. }
  1159. } else {
  1160. var errChan <-chan error
  1161. var attempts []*tailcfg.SSHRecordingAttempt
  1162. rec.out, attempts, errChan, err = sessionrecording.ConnectToRecorder(ctx, recorders, ss.conn.srv.lb.Dialer().UserDial)
  1163. if err != nil {
  1164. if onFailure != nil && onFailure.NotifyURL != "" && len(attempts) > 0 {
  1165. eventType := tailcfg.SSHSessionRecordingFailed
  1166. if onFailure.RejectSessionWithMessage != "" {
  1167. eventType = tailcfg.SSHSessionRecordingRejected
  1168. }
  1169. ss.notifyControl(ctx, nodeKey, eventType, attempts, onFailure.NotifyURL)
  1170. }
  1171. if onFailure != nil && onFailure.RejectSessionWithMessage != "" {
  1172. ss.logf("recording: error starting recording (rejecting session): %v", err)
  1173. return nil, userVisibleError{
  1174. error: err,
  1175. msg: onFailure.RejectSessionWithMessage,
  1176. }
  1177. }
  1178. ss.logf("recording: error starting recording (failing open): %v", err)
  1179. return nil, nil
  1180. }
  1181. go func() {
  1182. err := <-errChan
  1183. if err == nil {
  1184. select {
  1185. case <-ss.ctx.Done():
  1186. // Success.
  1187. ss.logf("recording: finished uploading recording")
  1188. return
  1189. default:
  1190. err = errors.New("recording upload ended before the SSH session")
  1191. }
  1192. }
  1193. if onFailure != nil && onFailure.NotifyURL != "" && len(attempts) > 0 {
  1194. lastAttempt := attempts[len(attempts)-1]
  1195. lastAttempt.FailureMessage = err.Error()
  1196. eventType := tailcfg.SSHSessionRecordingFailed
  1197. if onFailure.TerminateSessionWithMessage != "" {
  1198. eventType = tailcfg.SSHSessionRecordingTerminated
  1199. }
  1200. ss.notifyControl(ctx, nodeKey, eventType, attempts, onFailure.NotifyURL)
  1201. }
  1202. if onFailure != nil && onFailure.TerminateSessionWithMessage != "" {
  1203. ss.logf("recording: error uploading recording (closing session): %v", err)
  1204. ss.cancelCtx(userVisibleError{
  1205. error: err,
  1206. msg: onFailure.TerminateSessionWithMessage,
  1207. })
  1208. return
  1209. }
  1210. ss.logf("recording: error uploading recording (failing open): %v", err)
  1211. }()
  1212. }
  1213. ch := sessionrecording.CastHeader{
  1214. Version: 2,
  1215. Width: w.Width,
  1216. Height: w.Height,
  1217. Timestamp: now.Unix(),
  1218. Command: strings.Join(ss.Command(), " "),
  1219. Env: map[string]string{
  1220. "TERM": term,
  1221. // TODO(bradfitz): anything else important?
  1222. // including all seems noisey, but maybe we should
  1223. // for auditing. But first need to break
  1224. // launchProcess's startWithStdPipes and
  1225. // startWithPTY up so that they first return the cmd
  1226. // without starting it, and then a step that starts
  1227. // it. Then we can (1) make the cmd, (2) start the
  1228. // recording, (3) start the process.
  1229. },
  1230. SSHUser: ss.conn.info.sshUser,
  1231. LocalUser: ss.conn.localUser.Username,
  1232. SrcNode: strings.TrimSuffix(ss.conn.info.node.Name(), "."),
  1233. SrcNodeID: ss.conn.info.node.StableID(),
  1234. ConnectionID: ss.conn.connID,
  1235. }
  1236. if !ss.conn.info.node.IsTagged() {
  1237. ch.SrcNodeUser = ss.conn.info.uprof.LoginName
  1238. ch.SrcNodeUserID = ss.conn.info.node.User()
  1239. } else {
  1240. ch.SrcNodeTags = ss.conn.info.node.Tags().AsSlice()
  1241. }
  1242. j, err := json.Marshal(ch)
  1243. if err != nil {
  1244. return nil, err
  1245. }
  1246. j = append(j, '\n')
  1247. if _, err := rec.out.Write(j); err != nil {
  1248. if errors.Is(err, io.ErrClosedPipe) && ss.ctx.Err() != nil {
  1249. // If we got an io.ErrClosedPipe, it's likely because
  1250. // the recording server closed the connection on us. Return
  1251. // the original context error instead.
  1252. return nil, context.Cause(ss.ctx)
  1253. }
  1254. return nil, err
  1255. }
  1256. return rec, nil
  1257. }
  1258. // notifyControl sends a SSHEventNotifyRequest to control over noise.
  1259. // A SSHEventNotifyRequest is sent when an action or state reached during
  1260. // an SSH session is a defined EventType.
  1261. func (ss *sshSession) notifyControl(ctx context.Context, nodeKey key.NodePublic, notifyType tailcfg.SSHEventType, attempts []*tailcfg.SSHRecordingAttempt, url string) {
  1262. re := tailcfg.SSHEventNotifyRequest{
  1263. EventType: notifyType,
  1264. ConnectionID: ss.conn.connID,
  1265. CapVersion: tailcfg.CurrentCapabilityVersion,
  1266. NodeKey: nodeKey,
  1267. SrcNode: ss.conn.info.node.ID(),
  1268. SSHUser: ss.conn.info.sshUser,
  1269. LocalUser: ss.conn.localUser.Username,
  1270. RecordingAttempts: attempts,
  1271. }
  1272. body, err := json.Marshal(re)
  1273. if err != nil {
  1274. ss.logf("notifyControl: unable to marshal SSHNotifyRequest:", err)
  1275. return
  1276. }
  1277. req, err := http.NewRequestWithContext(ctx, httpm.POST, url, bytes.NewReader(body))
  1278. if err != nil {
  1279. ss.logf("notifyControl: unable to create request:", err)
  1280. return
  1281. }
  1282. resp, err := ss.conn.srv.lb.DoNoiseRequest(req)
  1283. if err != nil {
  1284. ss.logf("notifyControl: unable to send noise request:", err)
  1285. return
  1286. }
  1287. if resp.StatusCode != http.StatusCreated {
  1288. ss.logf("notifyControl: noise request returned status code %v", resp.StatusCode)
  1289. return
  1290. }
  1291. }
  1292. // recording is the state for an SSH session recording.
  1293. type recording struct {
  1294. ss *sshSession
  1295. start time.Time
  1296. // failOpen specifies whether the session should be allowed to
  1297. // continue if writing to the recording fails.
  1298. failOpen bool
  1299. mu sync.Mutex // guards writes to, close of out
  1300. out io.WriteCloser
  1301. }
  1302. func (r *recording) Close() error {
  1303. r.mu.Lock()
  1304. defer r.mu.Unlock()
  1305. if r.out == nil {
  1306. return nil
  1307. }
  1308. err := r.out.Close()
  1309. r.out = nil
  1310. return err
  1311. }
  1312. // writer returns an io.Writer around w that first records the write.
  1313. //
  1314. // The dir should be "i" for input or "o" for output.
  1315. //
  1316. // If r is nil, it returns w unchanged.
  1317. //
  1318. // Currently (2023-03-21) we only record output, not input.
  1319. func (r *recording) writer(dir string, w io.Writer) io.Writer {
  1320. if r == nil {
  1321. return w
  1322. }
  1323. if dir == "i" {
  1324. // TODO: record input? Maybe not, since it might contain
  1325. // passwords.
  1326. return w
  1327. }
  1328. return &loggingWriter{r: r, dir: dir, w: w}
  1329. }
  1330. // loggingWriter is an io.Writer wrapper that writes first an
  1331. // asciinema JSON cast format recording line, and then writes to w.
  1332. type loggingWriter struct {
  1333. r *recording
  1334. dir string // "i" or "o" (input or output)
  1335. w io.Writer // underlying Writer, after writing to r.out
  1336. // recordingFailedOpen specifies whether we've failed to write to
  1337. // r.out and should stop trying. It is set to true if we fail to write
  1338. // to r.out and r.failOpen is set.
  1339. recordingFailedOpen bool
  1340. }
  1341. func (w *loggingWriter) Write(p []byte) (n int, err error) {
  1342. if !w.recordingFailedOpen {
  1343. j, err := json.Marshal([]any{
  1344. time.Since(w.r.start).Seconds(),
  1345. w.dir,
  1346. string(p),
  1347. })
  1348. if err != nil {
  1349. return 0, err
  1350. }
  1351. j = append(j, '\n')
  1352. if err := w.writeCastLine(j); err != nil {
  1353. if !w.r.failOpen {
  1354. return 0, err
  1355. }
  1356. w.recordingFailedOpen = true
  1357. }
  1358. }
  1359. return w.w.Write(p)
  1360. }
  1361. func (w loggingWriter) writeCastLine(j []byte) error {
  1362. w.r.mu.Lock()
  1363. defer w.r.mu.Unlock()
  1364. if w.r.out == nil {
  1365. return errors.New("logger closed")
  1366. }
  1367. _, err := w.r.out.Write(j)
  1368. if err != nil {
  1369. return fmt.Errorf("logger Write: %w", err)
  1370. }
  1371. return nil
  1372. }
  1373. func envValFromList(env []string, wantKey string) (v string) {
  1374. for _, kv := range env {
  1375. if thisKey, v, ok := strings.Cut(kv, "="); ok && envEq(thisKey, wantKey) {
  1376. return v
  1377. }
  1378. }
  1379. return ""
  1380. }
  1381. // envEq reports whether environment variable a == b for the current
  1382. // operating system.
  1383. func envEq(a, b string) bool {
  1384. //lint:ignore SA4032 in case this func moves elsewhere, permit the GOOS check
  1385. if runtime.GOOS == "windows" {
  1386. return strings.EqualFold(a, b)
  1387. }
  1388. return a == b
  1389. }
  1390. var (
  1391. metricActiveSessions = clientmetric.NewGauge("ssh_active_sessions")
  1392. metricIncomingConnections = clientmetric.NewCounter("ssh_incoming_connections")
  1393. metricTerminalAccept = clientmetric.NewCounter("ssh_terminalaction_accept")
  1394. metricTerminalReject = clientmetric.NewCounter("ssh_terminalaction_reject")
  1395. metricTerminalMalformed = clientmetric.NewCounter("ssh_terminalaction_malformed")
  1396. metricTerminalFetchError = clientmetric.NewCounter("ssh_terminalaction_fetch_error")
  1397. metricHolds = clientmetric.NewCounter("ssh_holds")
  1398. metricPolicyChangeKick = clientmetric.NewCounter("ssh_policy_change_kick")
  1399. metricSFTP = clientmetric.NewCounter("ssh_sftp_sessions")
  1400. metricLocalPortForward = clientmetric.NewCounter("ssh_local_port_forward_requests")
  1401. metricRemotePortForward = clientmetric.NewCounter("ssh_remote_port_forward_requests")
  1402. )
  1403. // userVisibleError is a wrapper around an error that implements
  1404. // SSHTerminationError, so msg is written to their session.
  1405. type userVisibleError struct {
  1406. msg string
  1407. error
  1408. }
  1409. func (ue userVisibleError) SSHTerminationMessage() string { return ue.msg }
  1410. // SSHTerminationError is implemented by errors that terminate an SSH
  1411. // session and should be written to user's sessions.
  1412. type SSHTerminationError interface {
  1413. error
  1414. SSHTerminationMessage() string
  1415. }
  1416. func closeAll(cs ...io.Closer) {
  1417. for _, c := range cs {
  1418. if c != nil {
  1419. c.Close()
  1420. }
  1421. }
  1422. }