tailssh.go 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. //go:build (linux && !android) || (darwin && !ios) || freebsd || openbsd || plan9
  4. // Package tailssh is an SSH server integrated into Tailscale.
  5. package tailssh
  6. import (
  7. "bytes"
  8. "context"
  9. "crypto/rand"
  10. "encoding/json"
  11. "errors"
  12. "fmt"
  13. "io"
  14. "net"
  15. "net/http"
  16. "net/netip"
  17. "net/url"
  18. "os"
  19. "os/exec"
  20. "path/filepath"
  21. "runtime"
  22. "strconv"
  23. "strings"
  24. "sync"
  25. "sync/atomic"
  26. "syscall"
  27. "time"
  28. gossh "golang.org/x/crypto/ssh"
  29. "tailscale.com/envknob"
  30. "tailscale.com/ipn/ipnlocal"
  31. "tailscale.com/net/tsaddr"
  32. "tailscale.com/net/tsdial"
  33. "tailscale.com/sessionrecording"
  34. "tailscale.com/tailcfg"
  35. "tailscale.com/tempfork/gliderlabs/ssh"
  36. "tailscale.com/types/key"
  37. "tailscale.com/types/logger"
  38. "tailscale.com/types/netmap"
  39. "tailscale.com/util/backoff"
  40. "tailscale.com/util/clientmetric"
  41. "tailscale.com/util/httpm"
  42. "tailscale.com/util/mak"
  43. )
  44. var (
  45. sshVerboseLogging = envknob.RegisterBool("TS_DEBUG_SSH_VLOG")
  46. sshDisableSFTP = envknob.RegisterBool("TS_SSH_DISABLE_SFTP")
  47. sshDisableForwarding = envknob.RegisterBool("TS_SSH_DISABLE_FORWARDING")
  48. sshDisablePTY = envknob.RegisterBool("TS_SSH_DISABLE_PTY")
  49. // errTerminal is an empty gossh.PartialSuccessError (with no 'Next'
  50. // authentication methods that may proceed), which results in the SSH
  51. // server immediately disconnecting the client.
  52. errTerminal = &gossh.PartialSuccessError{}
  53. )
  54. const (
  55. // forcePasswordSuffix is the suffix at the end of a username that forces
  56. // Tailscale SSH into password authentication mode to work around buggy SSH
  57. // clients that get confused by successful replies to auth type "none".
  58. forcePasswordSuffix = "+password"
  59. )
  60. // ipnLocalBackend is the subset of ipnlocal.LocalBackend that we use.
  61. // It is used for testing.
  62. type ipnLocalBackend interface {
  63. GetSSH_HostKeys() ([]gossh.Signer, error)
  64. ShouldRunSSH() bool
  65. NetMap() *netmap.NetworkMap
  66. WhoIs(proto string, ipp netip.AddrPort) (n tailcfg.NodeView, u tailcfg.UserProfile, ok bool)
  67. DoNoiseRequest(req *http.Request) (*http.Response, error)
  68. Dialer() *tsdial.Dialer
  69. TailscaleVarRoot() string
  70. NodeKey() key.NodePublic
  71. }
  72. type server struct {
  73. lb ipnLocalBackend
  74. logf logger.Logf
  75. tailscaledPath string
  76. timeNow func() time.Time // or nil for time.Now
  77. sessionWaitGroup sync.WaitGroup
  78. // mu protects the following
  79. mu sync.Mutex
  80. activeConns map[*conn]bool // set; value is always true
  81. shutdownCalled bool
  82. }
  83. func (srv *server) now() time.Time {
  84. if srv != nil && srv.timeNow != nil {
  85. return srv.timeNow()
  86. }
  87. return time.Now()
  88. }
  89. func init() {
  90. ipnlocal.RegisterNewSSHServer(func(logf logger.Logf, lb *ipnlocal.LocalBackend) (ipnlocal.SSHServer, error) {
  91. tsd, err := os.Executable()
  92. if err != nil {
  93. return nil, err
  94. }
  95. srv := &server{
  96. lb: lb,
  97. logf: logf,
  98. tailscaledPath: tsd,
  99. timeNow: func() time.Time {
  100. return lb.ControlNow(time.Now())
  101. },
  102. }
  103. return srv, nil
  104. })
  105. }
  106. // attachSessionToConnIfNotShutdown ensures that srv is not shutdown before
  107. // attaching the session to the conn. This ensures that once Shutdown is called,
  108. // new sessions are not allowed and existing ones are cleaned up.
  109. // It reports whether ss was attached to the conn.
  110. func (srv *server) attachSessionToConnIfNotShutdown(ss *sshSession) bool {
  111. srv.mu.Lock()
  112. defer srv.mu.Unlock()
  113. if srv.shutdownCalled {
  114. // Do not start any new sessions.
  115. return false
  116. }
  117. ss.conn.attachSession(ss)
  118. return true
  119. }
  120. func (srv *server) trackActiveConn(c *conn, add bool) {
  121. srv.mu.Lock()
  122. defer srv.mu.Unlock()
  123. if add {
  124. mak.Set(&srv.activeConns, c, true)
  125. return
  126. }
  127. delete(srv.activeConns, c)
  128. }
  129. // NumActiveConns returns the number of active SSH connections.
  130. func (srv *server) NumActiveConns() int {
  131. srv.mu.Lock()
  132. defer srv.mu.Unlock()
  133. return len(srv.activeConns)
  134. }
  135. // HandleSSHConn handles a Tailscale SSH connection from c.
  136. // This is the entry point for all SSH connections.
  137. // When this returns, the connection is closed.
  138. func (srv *server) HandleSSHConn(nc net.Conn) error {
  139. metricIncomingConnections.Add(1)
  140. c, err := srv.newConn()
  141. if err != nil {
  142. return err
  143. }
  144. srv.trackActiveConn(c, true) // add
  145. defer srv.trackActiveConn(c, false) // remove
  146. c.HandleConn(nc)
  147. // Return nil to signal to netstack's interception that it doesn't need to
  148. // log. If ss.HandleConn had problems, it can log itself (ideally on an
  149. // sshSession.logf).
  150. return nil
  151. }
  152. // Shutdown terminates all active sessions.
  153. func (srv *server) Shutdown() {
  154. srv.mu.Lock()
  155. srv.shutdownCalled = true
  156. for c := range srv.activeConns {
  157. c.Close()
  158. }
  159. srv.mu.Unlock()
  160. srv.sessionWaitGroup.Wait()
  161. }
  162. // OnPolicyChange terminates any active sessions that no longer match
  163. // the SSH access policy.
  164. func (srv *server) OnPolicyChange() {
  165. srv.mu.Lock()
  166. defer srv.mu.Unlock()
  167. for c := range srv.activeConns {
  168. if c.info == nil {
  169. // c.info is nil when the connection hasn't been authenticated yet.
  170. // In that case, the connection will be terminated when it is.
  171. continue
  172. }
  173. go c.checkStillValid()
  174. }
  175. }
  176. // conn represents a single SSH connection and its associated
  177. // ssh.Server.
  178. //
  179. // During the lifecycle of a connection, the following are called in order:
  180. // Setup and discover server info
  181. // - ServerConfigCallback
  182. //
  183. // Get access to a ServerPreAuthConn (useful for sending banners)
  184. //
  185. // Do the user auth with a NoClientAuthCallback. If user specified
  186. // a username ending in "+password", follow this with password auth
  187. // (to work around buggy SSH clients that don't work with noauth).
  188. //
  189. // Once auth is done, the conn can be multiplexed with multiple sessions and
  190. // channels concurrently. At which point any of the following can be called
  191. // in any order.
  192. // - c.handleSessionPostSSHAuth
  193. // - c.mayForwardLocalPortTo followed by ssh.DirectTCPIPHandler
  194. type conn struct {
  195. *ssh.Server
  196. srv *server
  197. insecureSkipTailscaleAuth bool // used by tests.
  198. // idH is the RFC4253 sec8 hash H. It is used to identify the connection,
  199. // and is shared among all sessions. It should not be shared outside
  200. // process. It is confusingly referred to as SessionID by the gliderlabs/ssh
  201. // library.
  202. idH string
  203. connID string // ID that's shared with control
  204. // spac is a [gossh.ServerPreAuthConn] used for sending auth banners.
  205. // Banners cannot be sent after auth completes.
  206. spac gossh.ServerPreAuthConn
  207. action0 *tailcfg.SSHAction // set by clientAuth
  208. finalAction *tailcfg.SSHAction // set by clientAuth
  209. info *sshConnInfo // set by setInfo
  210. localUser *userMeta // set by clientAuth
  211. userGroupIDs []string // set by clientAuth
  212. acceptEnv []string
  213. // mu protects the following fields.
  214. //
  215. // srv.mu should be acquired prior to mu.
  216. // It is safe to just acquire mu, but unsafe to
  217. // acquire mu and then srv.mu.
  218. mu sync.Mutex // protects the following
  219. sessions []*sshSession
  220. }
  221. func (c *conn) logf(format string, args ...any) {
  222. format = fmt.Sprintf("%v: %v", c.connID, format)
  223. c.srv.logf(format, args...)
  224. }
  225. func (c *conn) vlogf(format string, args ...any) {
  226. if sshVerboseLogging() {
  227. c.logf(format, args...)
  228. }
  229. }
  230. // errDenied is returned by auth callbacks when a connection is denied by the
  231. // policy. It writes the message to an auth banner and then returns an empty
  232. // gossh.PartialSuccessError in order to stop processing authentication
  233. // attempts and immediately disconnect the client.
  234. func (c *conn) errDenied(message string) error {
  235. if message == "" {
  236. message = "tailscale: access denied"
  237. }
  238. if err := c.spac.SendAuthBanner(message); err != nil {
  239. c.logf("failed to send auth banner: %s", err)
  240. }
  241. return errTerminal
  242. }
  243. // errBanner writes the given message to an auth banner and then returns an
  244. // empty gossh.PartialSuccessError in order to stop processing authentication
  245. // attempts and immediately disconnect the client. The contents of err is not
  246. // leaked in the auth banner, but it is logged to the server's log.
  247. func (c *conn) errBanner(message string, err error) error {
  248. if err != nil {
  249. c.logf("%s: %s", message, err)
  250. }
  251. if err := c.spac.SendAuthBanner("tailscale: " + message + "\n"); err != nil {
  252. c.logf("failed to send auth banner: %s", err)
  253. }
  254. return errTerminal
  255. }
  256. // errUnexpected is returned by auth callbacks that encounter an unexpected
  257. // error, such as being unable to send an auth banner. It sends an empty
  258. // gossh.PartialSuccessError to tell gossh.Server to stop processing
  259. // authentication attempts and instead disconnect immediately.
  260. func (c *conn) errUnexpected(err error) error {
  261. c.logf("terminal error: %s", err)
  262. return errTerminal
  263. }
  264. // clientAuth is responsible for performing client authentication.
  265. //
  266. // If policy evaluation fails, it returns an error.
  267. // If access is denied, it returns an error. This must always be an empty
  268. // gossh.PartialSuccessError to prevent further authentication methods from
  269. // being tried.
  270. func (c *conn) clientAuth(cm gossh.ConnMetadata) (perms *gossh.Permissions, retErr error) {
  271. defer func() {
  272. if pse, ok := retErr.(*gossh.PartialSuccessError); ok {
  273. if pse.Next.GSSAPIWithMICConfig != nil ||
  274. pse.Next.KeyboardInteractiveCallback != nil ||
  275. pse.Next.PasswordCallback != nil ||
  276. pse.Next.PublicKeyCallback != nil {
  277. panic("clientAuth attempted to return a non-empty PartialSuccessError")
  278. }
  279. } else if retErr != nil {
  280. panic(fmt.Sprintf("clientAuth attempted to return a non-PartialSuccessError error of type: %t", retErr))
  281. }
  282. }()
  283. if c.insecureSkipTailscaleAuth {
  284. return &gossh.Permissions{}, nil
  285. }
  286. if err := c.setInfo(cm); err != nil {
  287. return nil, c.errBanner("failed to get connection info", err)
  288. }
  289. action, localUser, acceptEnv, result := c.evaluatePolicy()
  290. switch result {
  291. case accepted:
  292. // do nothing
  293. case rejectedUser:
  294. return nil, c.errBanner(fmt.Sprintf("tailnet policy does not permit you to SSH as user %q", c.info.sshUser), nil)
  295. case rejected, noPolicy:
  296. return nil, c.errBanner("tailnet policy does not permit you to SSH to this node", fmt.Errorf("failed to evaluate policy, result: %s", result))
  297. default:
  298. return nil, c.errBanner("failed to evaluate tailnet policy", fmt.Errorf("failed to evaluate policy, result: %s", result))
  299. }
  300. c.action0 = action
  301. if action.Accept || action.HoldAndDelegate != "" {
  302. // Immediately look up user information for purposes of generating
  303. // hold and delegate URL (if necessary).
  304. lu, err := userLookup(localUser)
  305. if err != nil {
  306. return nil, c.errBanner(fmt.Sprintf("failed to look up local user %q ", localUser), err)
  307. }
  308. gids, err := lu.GroupIds()
  309. if err != nil {
  310. return nil, c.errBanner("failed to look up local user's group IDs", err)
  311. }
  312. c.userGroupIDs = gids
  313. c.localUser = lu
  314. c.acceptEnv = acceptEnv
  315. }
  316. for {
  317. switch {
  318. case action.Accept:
  319. metricTerminalAccept.Add(1)
  320. if action.Message != "" {
  321. if err := c.spac.SendAuthBanner(action.Message); err != nil {
  322. return nil, c.errUnexpected(fmt.Errorf("error sending auth welcome message: %w", err))
  323. }
  324. }
  325. c.finalAction = action
  326. return &gossh.Permissions{}, nil
  327. case action.Reject:
  328. metricTerminalReject.Add(1)
  329. c.finalAction = action
  330. return nil, c.errDenied(action.Message)
  331. case action.HoldAndDelegate != "":
  332. if action.Message != "" {
  333. if err := c.spac.SendAuthBanner(action.Message); err != nil {
  334. return nil, c.errUnexpected(fmt.Errorf("error sending hold and delegate message: %w", err))
  335. }
  336. }
  337. url := action.HoldAndDelegate
  338. ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
  339. defer cancel()
  340. metricHolds.Add(1)
  341. url = c.expandDelegateURLLocked(url)
  342. var err error
  343. action, err = c.fetchSSHAction(ctx, url)
  344. if err != nil {
  345. metricTerminalFetchError.Add(1)
  346. return nil, c.errBanner("failed to fetch next SSH action", fmt.Errorf("fetch failed from %s: %w", url, err))
  347. }
  348. default:
  349. metricTerminalMalformed.Add(1)
  350. return nil, c.errBanner("reached Action that had neither Accept, Reject, nor HoldAndDelegate", nil)
  351. }
  352. }
  353. }
  354. // ServerConfig implements ssh.ServerConfigCallback.
  355. func (c *conn) ServerConfig(ctx ssh.Context) *gossh.ServerConfig {
  356. return &gossh.ServerConfig{
  357. PreAuthConnCallback: func(spac gossh.ServerPreAuthConn) {
  358. c.spac = spac
  359. },
  360. NoClientAuth: true, // required for the NoClientAuthCallback to run
  361. NoClientAuthCallback: func(cm gossh.ConnMetadata) (*gossh.Permissions, error) {
  362. // First perform client authentication, which can potentially
  363. // involve multiple steps (for example prompting user to log in to
  364. // Tailscale admin panel to confirm identity).
  365. perms, err := c.clientAuth(cm)
  366. if err != nil {
  367. return nil, err
  368. }
  369. // Authentication succeeded. Buggy SSH clients get confused by
  370. // success from the "none" auth method. As a workaround, let users
  371. // specify a username ending in "+password" to force password auth.
  372. // The actual value of the password doesn't matter.
  373. if strings.HasSuffix(cm.User(), forcePasswordSuffix) {
  374. return nil, &gossh.PartialSuccessError{
  375. Next: gossh.ServerAuthCallbacks{
  376. PasswordCallback: func(_ gossh.ConnMetadata, password []byte) (*gossh.Permissions, error) {
  377. return &gossh.Permissions{}, nil
  378. },
  379. },
  380. }
  381. }
  382. return perms, nil
  383. },
  384. PasswordCallback: func(cm gossh.ConnMetadata, pword []byte) (*gossh.Permissions, error) {
  385. // Some clients don't request 'none' authentication. Instead, they
  386. // immediately supply a password. We humor them by accepting the
  387. // password, but authenticate as usual, ignoring the actual value of
  388. // the password.
  389. return c.clientAuth(cm)
  390. },
  391. PublicKeyCallback: func(cm gossh.ConnMetadata, key gossh.PublicKey) (*gossh.Permissions, error) {
  392. // Some clients don't request 'none' authentication. Instead, they
  393. // immediately supply a public key. We humor them by accepting the
  394. // key, but authenticate as usual, ignoring the actual content of
  395. // the key.
  396. return c.clientAuth(cm)
  397. },
  398. }
  399. }
  400. func (srv *server) newConn() (*conn, error) {
  401. srv.mu.Lock()
  402. if srv.shutdownCalled {
  403. srv.mu.Unlock()
  404. // Stop accepting new connections.
  405. // Connections in the auth phase are handled in handleConnPostSSHAuth.
  406. // Existing sessions are terminated by Shutdown.
  407. return nil, errors.New("server is shutting down")
  408. }
  409. srv.mu.Unlock()
  410. c := &conn{srv: srv}
  411. now := srv.now()
  412. c.connID = fmt.Sprintf("ssh-conn-%s-%02x", now.UTC().Format("20060102T150405"), randBytes(5))
  413. fwdHandler := &ssh.ForwardedTCPHandler{}
  414. c.Server = &ssh.Server{
  415. Version: "Tailscale",
  416. ServerConfigCallback: c.ServerConfig,
  417. Handler: c.handleSessionPostSSHAuth,
  418. LocalPortForwardingCallback: c.mayForwardLocalPortTo,
  419. ReversePortForwardingCallback: c.mayReversePortForwardTo,
  420. SubsystemHandlers: map[string]ssh.SubsystemHandler{
  421. "sftp": c.handleSessionPostSSHAuth,
  422. },
  423. // Note: the direct-tcpip channel handler and LocalPortForwardingCallback
  424. // only adds support for forwarding ports from the local machine.
  425. // TODO(maisem/bradfitz): add remote port forwarding support.
  426. ChannelHandlers: map[string]ssh.ChannelHandler{
  427. "direct-tcpip": ssh.DirectTCPIPHandler,
  428. },
  429. RequestHandlers: map[string]ssh.RequestHandler{
  430. "tcpip-forward": fwdHandler.HandleSSHRequest,
  431. "cancel-tcpip-forward": fwdHandler.HandleSSHRequest,
  432. },
  433. }
  434. ss := c.Server
  435. for k, v := range ssh.DefaultRequestHandlers {
  436. ss.RequestHandlers[k] = v
  437. }
  438. for k, v := range ssh.DefaultChannelHandlers {
  439. ss.ChannelHandlers[k] = v
  440. }
  441. for k, v := range ssh.DefaultSubsystemHandlers {
  442. ss.SubsystemHandlers[k] = v
  443. }
  444. keys, err := srv.lb.GetSSH_HostKeys()
  445. if err != nil {
  446. return nil, err
  447. }
  448. for _, signer := range keys {
  449. ss.AddHostKey(signer)
  450. }
  451. return c, nil
  452. }
  453. // mayReversePortPortForwardTo reports whether the ctx should be allowed to port forward
  454. // to the specified host and port.
  455. // TODO(bradfitz/maisem): should we have more checks on host/port?
  456. func (c *conn) mayReversePortForwardTo(ctx ssh.Context, destinationHost string, destinationPort uint32) bool {
  457. if sshDisableForwarding() {
  458. return false
  459. }
  460. if c.finalAction != nil && c.finalAction.AllowRemotePortForwarding {
  461. metricRemotePortForward.Add(1)
  462. return true
  463. }
  464. return false
  465. }
  466. // mayForwardLocalPortTo reports whether the ctx should be allowed to port forward
  467. // to the specified host and port.
  468. // TODO(bradfitz/maisem): should we have more checks on host/port?
  469. func (c *conn) mayForwardLocalPortTo(ctx ssh.Context, destinationHost string, destinationPort uint32) bool {
  470. if sshDisableForwarding() {
  471. return false
  472. }
  473. if c.finalAction != nil && c.finalAction.AllowLocalPortForwarding {
  474. metricLocalPortForward.Add(1)
  475. return true
  476. }
  477. return false
  478. }
  479. // sshPolicy returns the SSHPolicy for current node.
  480. // If there is no SSHPolicy in the netmap, it returns a debugPolicy
  481. // if one is defined.
  482. func (c *conn) sshPolicy() (_ *tailcfg.SSHPolicy, ok bool) {
  483. lb := c.srv.lb
  484. if !lb.ShouldRunSSH() {
  485. return nil, false
  486. }
  487. nm := lb.NetMap()
  488. if nm == nil {
  489. return nil, false
  490. }
  491. if pol := nm.SSHPolicy; pol != nil && !envknob.SSHIgnoreTailnetPolicy() {
  492. return pol, true
  493. }
  494. debugPolicyFile := envknob.SSHPolicyFile()
  495. if debugPolicyFile != "" {
  496. c.logf("reading debug SSH policy file: %v", debugPolicyFile)
  497. f, err := os.ReadFile(debugPolicyFile)
  498. if err != nil {
  499. c.logf("error reading debug SSH policy file: %v", err)
  500. return nil, false
  501. }
  502. p := new(tailcfg.SSHPolicy)
  503. if err := json.Unmarshal(f, p); err != nil {
  504. c.logf("invalid JSON in %v: %v", debugPolicyFile, err)
  505. return nil, false
  506. }
  507. return p, true
  508. }
  509. return nil, false
  510. }
  511. func toIPPort(a net.Addr) (ipp netip.AddrPort) {
  512. ta, ok := a.(*net.TCPAddr)
  513. if !ok {
  514. return
  515. }
  516. tanetaddr, ok := netip.AddrFromSlice(ta.IP)
  517. if !ok {
  518. return
  519. }
  520. return netip.AddrPortFrom(tanetaddr.Unmap(), uint16(ta.Port))
  521. }
  522. // connInfo populates the sshConnInfo from the provided arguments,
  523. // validating only that they represent a known Tailscale identity.
  524. func (c *conn) setInfo(cm gossh.ConnMetadata) error {
  525. if c.info != nil {
  526. return nil
  527. }
  528. ci := &sshConnInfo{
  529. sshUser: strings.TrimSuffix(cm.User(), forcePasswordSuffix),
  530. src: toIPPort(cm.RemoteAddr()),
  531. dst: toIPPort(cm.LocalAddr()),
  532. }
  533. if !tsaddr.IsTailscaleIP(ci.dst.Addr()) {
  534. return fmt.Errorf("tailssh: rejecting non-Tailscale local address %v", ci.dst)
  535. }
  536. if !tsaddr.IsTailscaleIP(ci.src.Addr()) {
  537. return fmt.Errorf("tailssh: rejecting non-Tailscale remote address %v", ci.src)
  538. }
  539. node, uprof, ok := c.srv.lb.WhoIs("tcp", ci.src)
  540. if !ok {
  541. return fmt.Errorf("unknown Tailscale identity from src %v", ci.src)
  542. }
  543. ci.node = node
  544. ci.uprof = uprof
  545. c.idH = string(cm.SessionID())
  546. c.info = ci
  547. c.logf("handling conn: %v", ci.String())
  548. return nil
  549. }
  550. type evalResult string
  551. const (
  552. noPolicy evalResult = "no policy"
  553. rejected evalResult = "rejected"
  554. rejectedUser evalResult = "rejected user"
  555. accepted evalResult = "accept"
  556. )
  557. // evaluatePolicy returns the SSHAction and localUser after evaluating
  558. // the SSHPolicy for this conn.
  559. func (c *conn) evaluatePolicy() (_ *tailcfg.SSHAction, localUser string, acceptEnv []string, result evalResult) {
  560. pol, ok := c.sshPolicy()
  561. if !ok {
  562. return nil, "", nil, noPolicy
  563. }
  564. return c.evalSSHPolicy(pol)
  565. }
  566. // handleSessionPostSSHAuth runs an SSH session after the SSH-level authentication,
  567. // but not necessarily before all the Tailscale-level extra verification has
  568. // completed. It also handles SFTP requests.
  569. func (c *conn) handleSessionPostSSHAuth(s ssh.Session) {
  570. // Do this check after auth, but before starting the session.
  571. switch s.Subsystem() {
  572. case "sftp":
  573. if sshDisableSFTP() {
  574. fmt.Fprintf(s.Stderr(), "sftp disabled\r\n")
  575. s.Exit(1)
  576. return
  577. }
  578. metricSFTP.Add(1)
  579. case "":
  580. // Regular SSH session.
  581. default:
  582. fmt.Fprintf(s.Stderr(), "Unsupported subsystem %q\r\n", s.Subsystem())
  583. s.Exit(1)
  584. return
  585. }
  586. ss := c.newSSHSession(s)
  587. ss.logf("handling new SSH connection from %v (%v) to ssh-user %q", c.info.uprof.LoginName, c.info.src.Addr(), c.localUser.Username)
  588. ss.logf("access granted to %v as ssh-user %q", c.info.uprof.LoginName, c.localUser.Username)
  589. ss.run()
  590. }
  591. func (c *conn) expandDelegateURLLocked(actionURL string) string {
  592. nm := c.srv.lb.NetMap()
  593. ci := c.info
  594. lu := c.localUser
  595. var dstNodeID string
  596. if nm != nil {
  597. dstNodeID = fmt.Sprint(int64(nm.SelfNode.ID()))
  598. }
  599. return strings.NewReplacer(
  600. "$SRC_NODE_IP", url.QueryEscape(ci.src.Addr().String()),
  601. "$SRC_NODE_ID", fmt.Sprint(int64(ci.node.ID())),
  602. "$DST_NODE_IP", url.QueryEscape(ci.dst.Addr().String()),
  603. "$DST_NODE_ID", dstNodeID,
  604. "$SSH_USER", url.QueryEscape(ci.sshUser),
  605. "$LOCAL_USER", url.QueryEscape(lu.Username),
  606. ).Replace(actionURL)
  607. }
  608. // sshSession is an accepted Tailscale SSH session.
  609. type sshSession struct {
  610. ssh.Session
  611. sharedID string // ID that's shared with control
  612. logf logger.Logf
  613. ctx context.Context
  614. cancelCtx context.CancelCauseFunc
  615. conn *conn
  616. agentListener net.Listener // non-nil if agent-forwarding requested+allowed
  617. // initialized by launchProcess:
  618. cmd *exec.Cmd
  619. wrStdin io.WriteCloser
  620. rdStdout io.ReadCloser
  621. rdStderr io.ReadCloser // rdStderr is nil for pty sessions
  622. ptyReq *ssh.Pty // non-nil for pty sessions
  623. // childPipes is a list of pipes that need to be closed when the process exits.
  624. // For pty sessions, this is the tty fd.
  625. // For non-pty sessions, this is the stdin, stdout, stderr fds.
  626. childPipes []io.Closer
  627. // We use this sync.Once to ensure that we only terminate the process once,
  628. // either it exits itself or is terminated
  629. exitOnce sync.Once
  630. }
  631. func (ss *sshSession) vlogf(format string, args ...any) {
  632. if sshVerboseLogging() {
  633. ss.logf(format, args...)
  634. }
  635. }
  636. func (c *conn) newSSHSession(s ssh.Session) *sshSession {
  637. sharedID := fmt.Sprintf("sess-%s-%02x", c.srv.now().UTC().Format("20060102T150405"), randBytes(5))
  638. c.logf("starting session: %v", sharedID)
  639. ctx, cancel := context.WithCancelCause(s.Context())
  640. return &sshSession{
  641. Session: s,
  642. sharedID: sharedID,
  643. ctx: ctx,
  644. cancelCtx: cancel,
  645. conn: c,
  646. logf: logger.WithPrefix(c.srv.logf, "ssh-session("+sharedID+"): "),
  647. }
  648. }
  649. // isStillValid reports whether the conn is still valid.
  650. func (c *conn) isStillValid() bool {
  651. a, localUser, _, result := c.evaluatePolicy()
  652. c.vlogf("stillValid: %+v %v %v", a, localUser, result)
  653. if result != accepted {
  654. return false
  655. }
  656. if !a.Accept && a.HoldAndDelegate == "" {
  657. return false
  658. }
  659. return c.localUser.Username == localUser
  660. }
  661. // checkStillValid checks that the conn is still valid per the latest SSHPolicy.
  662. // If not, it terminates all sessions associated with the conn.
  663. func (c *conn) checkStillValid() {
  664. if c.isStillValid() {
  665. return
  666. }
  667. metricPolicyChangeKick.Add(1)
  668. c.logf("session no longer valid per new SSH policy; closing")
  669. c.mu.Lock()
  670. defer c.mu.Unlock()
  671. for _, s := range c.sessions {
  672. s.cancelCtx(userVisibleError{
  673. fmt.Sprintf("Access revoked.\r\n"),
  674. context.Canceled,
  675. })
  676. }
  677. }
  678. func (c *conn) fetchSSHAction(ctx context.Context, url string) (*tailcfg.SSHAction, error) {
  679. ctx, cancel := context.WithTimeout(ctx, 30*time.Minute)
  680. defer cancel()
  681. bo := backoff.NewBackoff("fetch-ssh-action", c.logf, 10*time.Second)
  682. for {
  683. if err := ctx.Err(); err != nil {
  684. return nil, err
  685. }
  686. req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
  687. if err != nil {
  688. return nil, err
  689. }
  690. res, err := c.srv.lb.DoNoiseRequest(req)
  691. if err != nil {
  692. bo.BackOff(ctx, err)
  693. continue
  694. }
  695. if res.StatusCode != 200 {
  696. body, _ := io.ReadAll(res.Body)
  697. res.Body.Close()
  698. if len(body) > 1<<10 {
  699. body = body[:1<<10]
  700. }
  701. c.logf("fetch of %v: %s, %s", url, res.Status, body)
  702. bo.BackOff(ctx, fmt.Errorf("unexpected status: %v", res.Status))
  703. continue
  704. }
  705. a := new(tailcfg.SSHAction)
  706. err = json.NewDecoder(res.Body).Decode(a)
  707. res.Body.Close()
  708. if err != nil {
  709. c.logf("invalid next SSHAction JSON from %v: %v", url, err)
  710. bo.BackOff(ctx, err)
  711. continue
  712. }
  713. return a, nil
  714. }
  715. }
  716. // killProcessOnContextDone waits for ss.ctx to be done and kills the process,
  717. // unless the process has already exited.
  718. func (ss *sshSession) killProcessOnContextDone() {
  719. <-ss.ctx.Done()
  720. // Either the process has already exited, in which case this does nothing.
  721. // Or, the process is still running in which case this will kill it.
  722. ss.exitOnce.Do(func() {
  723. err := context.Cause(ss.ctx)
  724. if serr, ok := err.(SSHTerminationError); ok {
  725. msg := serr.SSHTerminationMessage()
  726. if msg != "" {
  727. io.WriteString(ss.Stderr(), "\r\n\r\n"+msg+"\r\n\r\n")
  728. }
  729. }
  730. ss.logf("terminating SSH session from %v: %v", ss.conn.info.src.Addr(), err)
  731. // We don't need to Process.Wait here, sshSession.run() does
  732. // the waiting regardless of termination reason.
  733. // TODO(maisem): should this be a SIGTERM followed by a SIGKILL?
  734. ss.cmd.Process.Kill()
  735. })
  736. }
  737. // attachSession registers ss as an active session.
  738. func (c *conn) attachSession(ss *sshSession) {
  739. c.srv.sessionWaitGroup.Add(1)
  740. if ss.sharedID == "" {
  741. panic("empty sharedID")
  742. }
  743. c.mu.Lock()
  744. defer c.mu.Unlock()
  745. c.sessions = append(c.sessions, ss)
  746. }
  747. // detachSession unregisters s from the list of active sessions.
  748. func (c *conn) detachSession(ss *sshSession) {
  749. defer c.srv.sessionWaitGroup.Done()
  750. c.mu.Lock()
  751. defer c.mu.Unlock()
  752. for i, s := range c.sessions {
  753. if s == ss {
  754. c.sessions = append(c.sessions[:i], c.sessions[i+1:]...)
  755. break
  756. }
  757. }
  758. }
  759. var errSessionDone = errors.New("session is done")
  760. // handleSSHAgentForwarding starts a Unix socket listener and in the background
  761. // forwards agent connections between the listener and the ssh.Session.
  762. // On success, it assigns ss.agentListener.
  763. func (ss *sshSession) handleSSHAgentForwarding(s ssh.Session, lu *userMeta) error {
  764. if !ssh.AgentRequested(ss) || !ss.conn.finalAction.AllowAgentForwarding {
  765. return nil
  766. }
  767. if sshDisableForwarding() {
  768. // TODO(bradfitz): or do we want to return an error here instead so the user
  769. // gets an error if they ran with ssh -A? But for now we just silently
  770. // don't work, like the condition above.
  771. return nil
  772. }
  773. ss.logf("ssh: agent forwarding requested")
  774. ln, err := ssh.NewAgentListener()
  775. if err != nil {
  776. return err
  777. }
  778. defer func() {
  779. if err != nil && ln != nil {
  780. ln.Close()
  781. }
  782. }()
  783. uid, err := strconv.ParseUint(lu.Uid, 10, 32)
  784. if err != nil {
  785. return err
  786. }
  787. gid, err := strconv.ParseUint(lu.Gid, 10, 32)
  788. if err != nil {
  789. return err
  790. }
  791. socket := ln.Addr().String()
  792. dir := filepath.Dir(socket)
  793. // Make sure the socket is accessible only by the user.
  794. if err := os.Chmod(socket, 0600); err != nil {
  795. return err
  796. }
  797. if err := os.Chown(socket, int(uid), int(gid)); err != nil {
  798. return err
  799. }
  800. // Make sure the dir is also accessible.
  801. if err := os.Chmod(dir, 0755); err != nil {
  802. return err
  803. }
  804. go ssh.ForwardAgentConnections(ln, s)
  805. ss.agentListener = ln
  806. return nil
  807. }
  808. // run is the entrypoint for a newly accepted SSH session.
  809. //
  810. // It handles ss once it's been accepted and determined
  811. // that it should run.
  812. func (ss *sshSession) run() {
  813. metricActiveSessions.Add(1)
  814. defer metricActiveSessions.Add(-1)
  815. defer ss.cancelCtx(errSessionDone)
  816. if attached := ss.conn.srv.attachSessionToConnIfNotShutdown(ss); !attached {
  817. fmt.Fprintf(ss, "Tailscale SSH is shutting down\r\n")
  818. ss.Exit(1)
  819. return
  820. }
  821. defer ss.conn.detachSession(ss)
  822. lu := ss.conn.localUser
  823. logf := ss.logf
  824. if ss.conn.finalAction.SessionDuration != 0 {
  825. t := time.AfterFunc(ss.conn.finalAction.SessionDuration, func() {
  826. ss.cancelCtx(userVisibleError{
  827. fmt.Sprintf("Session timeout of %v elapsed.", ss.conn.finalAction.SessionDuration),
  828. context.DeadlineExceeded,
  829. })
  830. })
  831. defer t.Stop()
  832. }
  833. if euid := os.Geteuid(); euid != 0 && runtime.GOOS != "plan9" {
  834. if lu.Uid != fmt.Sprint(euid) {
  835. ss.logf("can't switch to user %q from process euid %v", lu.Username, euid)
  836. fmt.Fprintf(ss, "can't switch user\r\n")
  837. ss.Exit(1)
  838. return
  839. }
  840. }
  841. // Take control of the PTY so that we can configure it below.
  842. // See https://github.com/tailscale/tailscale/issues/4146
  843. ss.DisablePTYEmulation()
  844. var rec *recording // or nil if disabled
  845. if ss.Subsystem() != "sftp" {
  846. if err := ss.handleSSHAgentForwarding(ss, lu); err != nil {
  847. ss.logf("agent forwarding failed: %v", err)
  848. } else if ss.agentListener != nil {
  849. // TODO(maisem/bradfitz): add a way to close all session resources
  850. defer ss.agentListener.Close()
  851. }
  852. if ss.shouldRecord() {
  853. var err error
  854. rec, err = ss.startNewRecording()
  855. if err != nil {
  856. var uve userVisibleError
  857. if errors.As(err, &uve) {
  858. fmt.Fprintf(ss, "%s\r\n", uve.SSHTerminationMessage())
  859. } else {
  860. fmt.Fprintf(ss, "can't start new recording\r\n")
  861. }
  862. ss.logf("startNewRecording: %v", err)
  863. ss.Exit(1)
  864. return
  865. }
  866. ss.logf("startNewRecording: <nil>")
  867. if rec != nil {
  868. defer rec.Close()
  869. }
  870. }
  871. }
  872. err := ss.launchProcess()
  873. if err != nil {
  874. logf("start failed: %v", err.Error())
  875. if errors.Is(err, context.Canceled) {
  876. err := context.Cause(ss.ctx)
  877. var uve userVisibleError
  878. if errors.As(err, &uve) {
  879. fmt.Fprintf(ss, "%s\r\n", uve)
  880. }
  881. }
  882. ss.Exit(1)
  883. return
  884. }
  885. go ss.killProcessOnContextDone()
  886. var processDone atomic.Bool
  887. go func() {
  888. defer ss.wrStdin.Close()
  889. if _, err := io.Copy(rec.writer("i", ss.wrStdin), ss); err != nil {
  890. logf("stdin copy: %v", err)
  891. ss.cancelCtx(err)
  892. }
  893. }()
  894. outputDone := make(chan struct{})
  895. var openOutputStreams atomic.Int32
  896. if ss.rdStderr != nil {
  897. openOutputStreams.Store(2)
  898. } else {
  899. openOutputStreams.Store(1)
  900. }
  901. go func() {
  902. defer ss.rdStdout.Close()
  903. _, err := io.Copy(rec.writer("o", ss), ss.rdStdout)
  904. if err != nil && !errors.Is(err, io.EOF) {
  905. isErrBecauseProcessExited := processDone.Load() && errors.Is(err, syscall.EIO)
  906. if !isErrBecauseProcessExited {
  907. logf("stdout copy: %v", err)
  908. ss.cancelCtx(err)
  909. }
  910. }
  911. if openOutputStreams.Add(-1) == 0 {
  912. ss.CloseWrite()
  913. close(outputDone)
  914. }
  915. }()
  916. // rdStderr is nil for ptys.
  917. if ss.rdStderr != nil {
  918. go func() {
  919. defer ss.rdStderr.Close()
  920. _, err := io.Copy(ss.Stderr(), ss.rdStderr)
  921. if err != nil {
  922. logf("stderr copy: %v", err)
  923. }
  924. if openOutputStreams.Add(-1) == 0 {
  925. ss.CloseWrite()
  926. close(outputDone)
  927. }
  928. }()
  929. }
  930. err = ss.cmd.Wait()
  931. processDone.Store(true)
  932. // This will either make the SSH Termination goroutine be a no-op,
  933. // or itself will be a no-op because the process was killed by the
  934. // aforementioned goroutine.
  935. ss.exitOnce.Do(func() {})
  936. // Close the process-side of all pipes to signal the asynchronous
  937. // io.Copy routines reading/writing from the pipes to terminate.
  938. // Block for the io.Copy to finish before calling ss.Exit below.
  939. closeAll(ss.childPipes...)
  940. select {
  941. case <-outputDone:
  942. case <-ss.ctx.Done():
  943. }
  944. if err == nil {
  945. ss.logf("Session complete")
  946. ss.Exit(0)
  947. return
  948. }
  949. if ee, ok := err.(*exec.ExitError); ok {
  950. code := ee.ProcessState.ExitCode()
  951. ss.logf("Wait: code=%v", code)
  952. ss.Exit(code)
  953. return
  954. }
  955. ss.logf("Wait: %v", err)
  956. ss.Exit(1)
  957. return
  958. }
  959. // recordSSHToLocalDisk is a deprecated dev knob to allow recording SSH sessions
  960. // to local storage. It is only used if there is no recording configured by the
  961. // coordination server. This will be removed in the future.
  962. var recordSSHToLocalDisk = envknob.RegisterBool("TS_DEBUG_LOG_SSH")
  963. // recorders returns the list of recorders to use for this session.
  964. // If the final action has a non-empty list of recorders, that list is
  965. // returned. Otherwise, the list of recorders from the initial action
  966. // is returned.
  967. func (ss *sshSession) recorders() ([]netip.AddrPort, *tailcfg.SSHRecorderFailureAction) {
  968. if len(ss.conn.finalAction.Recorders) > 0 {
  969. return ss.conn.finalAction.Recorders, ss.conn.finalAction.OnRecordingFailure
  970. }
  971. return ss.conn.action0.Recorders, ss.conn.action0.OnRecordingFailure
  972. }
  973. func (ss *sshSession) shouldRecord() bool {
  974. recs, _ := ss.recorders()
  975. return len(recs) > 0 || recordSSHToLocalDisk()
  976. }
  977. type sshConnInfo struct {
  978. // sshUser is the requested local SSH username ("root", "alice", etc).
  979. sshUser string
  980. // src is the Tailscale IP and port that the connection came from.
  981. src netip.AddrPort
  982. // dst is the Tailscale IP and port that the connection came for.
  983. dst netip.AddrPort
  984. // node is srcIP's node.
  985. node tailcfg.NodeView
  986. // uprof is node's UserProfile.
  987. uprof tailcfg.UserProfile
  988. }
  989. func (ci *sshConnInfo) String() string {
  990. return fmt.Sprintf("%v->%v@%v", ci.src, ci.sshUser, ci.dst)
  991. }
  992. func (c *conn) ruleExpired(r *tailcfg.SSHRule) bool {
  993. if r.RuleExpires == nil {
  994. return false
  995. }
  996. return r.RuleExpires.Before(c.srv.now())
  997. }
  998. func (c *conn) evalSSHPolicy(pol *tailcfg.SSHPolicy) (a *tailcfg.SSHAction, localUser string, acceptEnv []string, result evalResult) {
  999. failedOnUser := false
  1000. for _, r := range pol.Rules {
  1001. if a, localUser, acceptEnv, err := c.matchRule(r); err == nil {
  1002. return a, localUser, acceptEnv, accepted
  1003. } else if errors.Is(err, errUserMatch) {
  1004. failedOnUser = true
  1005. }
  1006. }
  1007. result = rejected
  1008. if failedOnUser {
  1009. result = rejectedUser
  1010. }
  1011. return nil, "", nil, result
  1012. }
  1013. // internal errors for testing; they don't escape to callers or logs.
  1014. var (
  1015. errNilRule = errors.New("nil rule")
  1016. errNilAction = errors.New("nil action")
  1017. errRuleExpired = errors.New("rule expired")
  1018. errPrincipalMatch = errors.New("principal didn't match")
  1019. errUserMatch = errors.New("user didn't match")
  1020. errInvalidConn = errors.New("invalid connection state")
  1021. )
  1022. func (c *conn) matchRule(r *tailcfg.SSHRule) (a *tailcfg.SSHAction, localUser string, acceptEnv []string, err error) {
  1023. defer func() {
  1024. c.vlogf("matchRule(%+v): %v", r, err)
  1025. }()
  1026. if c == nil {
  1027. return nil, "", nil, errInvalidConn
  1028. }
  1029. if c.info == nil {
  1030. c.logf("invalid connection state")
  1031. return nil, "", nil, errInvalidConn
  1032. }
  1033. if r == nil {
  1034. return nil, "", nil, errNilRule
  1035. }
  1036. if r.Action == nil {
  1037. return nil, "", nil, errNilAction
  1038. }
  1039. if c.ruleExpired(r) {
  1040. return nil, "", nil, errRuleExpired
  1041. }
  1042. if !c.anyPrincipalMatches(r.Principals) {
  1043. return nil, "", nil, errPrincipalMatch
  1044. }
  1045. if !r.Action.Reject {
  1046. // For all but Reject rules, SSHUsers is required.
  1047. // If SSHUsers is nil or empty, mapLocalUser will return an
  1048. // empty string anyway.
  1049. localUser = mapLocalUser(r.SSHUsers, c.info.sshUser)
  1050. if localUser == "" {
  1051. return nil, "", nil, errUserMatch
  1052. }
  1053. }
  1054. return r.Action, localUser, r.AcceptEnv, nil
  1055. }
  1056. func mapLocalUser(ruleSSHUsers map[string]string, reqSSHUser string) (localUser string) {
  1057. v, ok := ruleSSHUsers[reqSSHUser]
  1058. if !ok {
  1059. v = ruleSSHUsers["*"]
  1060. }
  1061. if v == "=" {
  1062. return reqSSHUser
  1063. }
  1064. return v
  1065. }
  1066. func (c *conn) anyPrincipalMatches(ps []*tailcfg.SSHPrincipal) bool {
  1067. for _, p := range ps {
  1068. if p == nil {
  1069. continue
  1070. }
  1071. if c.principalMatchesTailscaleIdentity(p) {
  1072. return true
  1073. }
  1074. }
  1075. return false
  1076. }
  1077. // principalMatchesTailscaleIdentity reports whether one of p's four fields
  1078. // that match the Tailscale identity match (Node, NodeIP, UserLogin, Any).
  1079. func (c *conn) principalMatchesTailscaleIdentity(p *tailcfg.SSHPrincipal) bool {
  1080. ci := c.info
  1081. if p.Any {
  1082. return true
  1083. }
  1084. if !p.Node.IsZero() && ci.node.Valid() && p.Node == ci.node.StableID() {
  1085. return true
  1086. }
  1087. if p.NodeIP != "" {
  1088. if ip, _ := netip.ParseAddr(p.NodeIP); ip == ci.src.Addr() {
  1089. return true
  1090. }
  1091. }
  1092. if p.UserLogin != "" && ci.uprof.LoginName == p.UserLogin {
  1093. return true
  1094. }
  1095. return false
  1096. }
  1097. func randBytes(n int) []byte {
  1098. b := make([]byte, n)
  1099. if _, err := rand.Read(b); err != nil {
  1100. panic(err)
  1101. }
  1102. return b
  1103. }
  1104. func (ss *sshSession) openFileForRecording(now time.Time) (_ io.WriteCloser, err error) {
  1105. varRoot := ss.conn.srv.lb.TailscaleVarRoot()
  1106. if varRoot == "" {
  1107. return nil, errors.New("no var root for recording storage")
  1108. }
  1109. dir := filepath.Join(varRoot, "ssh-sessions")
  1110. if err := os.MkdirAll(dir, 0700); err != nil {
  1111. return nil, err
  1112. }
  1113. f, err := os.CreateTemp(dir, fmt.Sprintf("ssh-session-%v-*.cast", now.UnixNano()))
  1114. if err != nil {
  1115. return nil, err
  1116. }
  1117. return f, nil
  1118. }
  1119. // startNewRecording starts a new SSH session recording.
  1120. // It may return a nil recording if recording is not available.
  1121. func (ss *sshSession) startNewRecording() (_ *recording, err error) {
  1122. // We store the node key as soon as possible when creating
  1123. // a new recording incase of FUS.
  1124. nodeKey := ss.conn.srv.lb.NodeKey()
  1125. if nodeKey.IsZero() {
  1126. return nil, errors.New("ssh server is unavailable: no node key")
  1127. }
  1128. recorders, onFailure := ss.recorders()
  1129. var localRecording bool
  1130. if len(recorders) == 0 {
  1131. if recordSSHToLocalDisk() {
  1132. localRecording = true
  1133. } else {
  1134. return nil, errors.New("no recorders configured")
  1135. }
  1136. }
  1137. var w ssh.Window
  1138. if ptyReq, _, isPtyReq := ss.Pty(); isPtyReq {
  1139. w = ptyReq.Window
  1140. }
  1141. term := envValFromList(ss.Environ(), "TERM")
  1142. if term == "" {
  1143. term = "xterm-256color" // something non-empty
  1144. }
  1145. now := time.Now()
  1146. rec := &recording{
  1147. ss: ss,
  1148. start: now,
  1149. failOpen: onFailure == nil || onFailure.TerminateSessionWithMessage == "",
  1150. }
  1151. // We want to use a background context for uploading and not ss.ctx.
  1152. // ss.ctx is closed when the session closes, but we don't want to break the upload at that time.
  1153. // Instead we want to wait for the session to close the writer when it finishes.
  1154. ctx := context.Background()
  1155. if localRecording {
  1156. rec.out, err = ss.openFileForRecording(now)
  1157. if err != nil {
  1158. return nil, err
  1159. }
  1160. } else {
  1161. var errChan <-chan error
  1162. var attempts []*tailcfg.SSHRecordingAttempt
  1163. rec.out, attempts, errChan, err = sessionrecording.ConnectToRecorder(ctx, recorders, ss.conn.srv.lb.Dialer().UserDial)
  1164. if err != nil {
  1165. if onFailure != nil && onFailure.NotifyURL != "" && len(attempts) > 0 {
  1166. eventType := tailcfg.SSHSessionRecordingFailed
  1167. if onFailure.RejectSessionWithMessage != "" {
  1168. eventType = tailcfg.SSHSessionRecordingRejected
  1169. }
  1170. ss.notifyControl(ctx, nodeKey, eventType, attempts, onFailure.NotifyURL)
  1171. }
  1172. if onFailure != nil && onFailure.RejectSessionWithMessage != "" {
  1173. ss.logf("recording: error starting recording (rejecting session): %v", err)
  1174. return nil, userVisibleError{
  1175. error: err,
  1176. msg: onFailure.RejectSessionWithMessage,
  1177. }
  1178. }
  1179. ss.logf("recording: error starting recording (failing open): %v", err)
  1180. return nil, nil
  1181. }
  1182. go func() {
  1183. err := <-errChan
  1184. if err == nil {
  1185. select {
  1186. case <-ss.ctx.Done():
  1187. // Success.
  1188. ss.logf("recording: finished uploading recording")
  1189. return
  1190. default:
  1191. err = errors.New("recording upload ended before the SSH session")
  1192. }
  1193. }
  1194. if onFailure != nil && onFailure.NotifyURL != "" && len(attempts) > 0 {
  1195. lastAttempt := attempts[len(attempts)-1]
  1196. lastAttempt.FailureMessage = err.Error()
  1197. eventType := tailcfg.SSHSessionRecordingFailed
  1198. if onFailure.TerminateSessionWithMessage != "" {
  1199. eventType = tailcfg.SSHSessionRecordingTerminated
  1200. }
  1201. ss.notifyControl(ctx, nodeKey, eventType, attempts, onFailure.NotifyURL)
  1202. }
  1203. if onFailure != nil && onFailure.TerminateSessionWithMessage != "" {
  1204. ss.logf("recording: error uploading recording (closing session): %v", err)
  1205. ss.cancelCtx(userVisibleError{
  1206. error: err,
  1207. msg: onFailure.TerminateSessionWithMessage,
  1208. })
  1209. return
  1210. }
  1211. ss.logf("recording: error uploading recording (failing open): %v", err)
  1212. }()
  1213. }
  1214. ch := sessionrecording.CastHeader{
  1215. Version: 2,
  1216. Width: w.Width,
  1217. Height: w.Height,
  1218. Timestamp: now.Unix(),
  1219. Command: strings.Join(ss.Command(), " "),
  1220. Env: map[string]string{
  1221. "TERM": term,
  1222. // TODO(bradfitz): anything else important?
  1223. // including all seems noisey, but maybe we should
  1224. // for auditing. But first need to break
  1225. // launchProcess's startWithStdPipes and
  1226. // startWithPTY up so that they first return the cmd
  1227. // without starting it, and then a step that starts
  1228. // it. Then we can (1) make the cmd, (2) start the
  1229. // recording, (3) start the process.
  1230. },
  1231. SSHUser: ss.conn.info.sshUser,
  1232. LocalUser: ss.conn.localUser.Username,
  1233. SrcNode: strings.TrimSuffix(ss.conn.info.node.Name(), "."),
  1234. SrcNodeID: ss.conn.info.node.StableID(),
  1235. ConnectionID: ss.conn.connID,
  1236. }
  1237. if !ss.conn.info.node.IsTagged() {
  1238. ch.SrcNodeUser = ss.conn.info.uprof.LoginName
  1239. ch.SrcNodeUserID = ss.conn.info.node.User()
  1240. } else {
  1241. ch.SrcNodeTags = ss.conn.info.node.Tags().AsSlice()
  1242. }
  1243. j, err := json.Marshal(ch)
  1244. if err != nil {
  1245. return nil, err
  1246. }
  1247. j = append(j, '\n')
  1248. if _, err := rec.out.Write(j); err != nil {
  1249. if errors.Is(err, io.ErrClosedPipe) && ss.ctx.Err() != nil {
  1250. // If we got an io.ErrClosedPipe, it's likely because
  1251. // the recording server closed the connection on us. Return
  1252. // the original context error instead.
  1253. return nil, context.Cause(ss.ctx)
  1254. }
  1255. return nil, err
  1256. }
  1257. return rec, nil
  1258. }
  1259. // notifyControl sends a SSHEventNotifyRequest to control over noise.
  1260. // A SSHEventNotifyRequest is sent when an action or state reached during
  1261. // an SSH session is a defined EventType.
  1262. func (ss *sshSession) notifyControl(ctx context.Context, nodeKey key.NodePublic, notifyType tailcfg.SSHEventType, attempts []*tailcfg.SSHRecordingAttempt, url string) {
  1263. re := tailcfg.SSHEventNotifyRequest{
  1264. EventType: notifyType,
  1265. ConnectionID: ss.conn.connID,
  1266. CapVersion: tailcfg.CurrentCapabilityVersion,
  1267. NodeKey: nodeKey,
  1268. SrcNode: ss.conn.info.node.ID(),
  1269. SSHUser: ss.conn.info.sshUser,
  1270. LocalUser: ss.conn.localUser.Username,
  1271. RecordingAttempts: attempts,
  1272. }
  1273. body, err := json.Marshal(re)
  1274. if err != nil {
  1275. ss.logf("notifyControl: unable to marshal SSHNotifyRequest:", err)
  1276. return
  1277. }
  1278. req, err := http.NewRequestWithContext(ctx, httpm.POST, url, bytes.NewReader(body))
  1279. if err != nil {
  1280. ss.logf("notifyControl: unable to create request:", err)
  1281. return
  1282. }
  1283. resp, err := ss.conn.srv.lb.DoNoiseRequest(req)
  1284. if err != nil {
  1285. ss.logf("notifyControl: unable to send noise request:", err)
  1286. return
  1287. }
  1288. if resp.StatusCode != http.StatusCreated {
  1289. ss.logf("notifyControl: noise request returned status code %v", resp.StatusCode)
  1290. return
  1291. }
  1292. }
  1293. // recording is the state for an SSH session recording.
  1294. type recording struct {
  1295. ss *sshSession
  1296. start time.Time
  1297. // failOpen specifies whether the session should be allowed to
  1298. // continue if writing to the recording fails.
  1299. failOpen bool
  1300. mu sync.Mutex // guards writes to, close of out
  1301. out io.WriteCloser
  1302. }
  1303. func (r *recording) Close() error {
  1304. r.mu.Lock()
  1305. defer r.mu.Unlock()
  1306. if r.out == nil {
  1307. return nil
  1308. }
  1309. err := r.out.Close()
  1310. r.out = nil
  1311. return err
  1312. }
  1313. // writer returns an io.Writer around w that first records the write.
  1314. //
  1315. // The dir should be "i" for input or "o" for output.
  1316. //
  1317. // If r is nil, it returns w unchanged.
  1318. //
  1319. // Currently (2023-03-21) we only record output, not input.
  1320. func (r *recording) writer(dir string, w io.Writer) io.Writer {
  1321. if r == nil {
  1322. return w
  1323. }
  1324. if dir == "i" {
  1325. // TODO: record input? Maybe not, since it might contain
  1326. // passwords.
  1327. return w
  1328. }
  1329. return &loggingWriter{r: r, dir: dir, w: w}
  1330. }
  1331. // loggingWriter is an io.Writer wrapper that writes first an
  1332. // asciinema JSON cast format recording line, and then writes to w.
  1333. type loggingWriter struct {
  1334. r *recording
  1335. dir string // "i" or "o" (input or output)
  1336. w io.Writer // underlying Writer, after writing to r.out
  1337. // recordingFailedOpen specifies whether we've failed to write to
  1338. // r.out and should stop trying. It is set to true if we fail to write
  1339. // to r.out and r.failOpen is set.
  1340. recordingFailedOpen bool
  1341. }
  1342. func (w *loggingWriter) Write(p []byte) (n int, err error) {
  1343. if !w.recordingFailedOpen {
  1344. j, err := json.Marshal([]any{
  1345. time.Since(w.r.start).Seconds(),
  1346. w.dir,
  1347. string(p),
  1348. })
  1349. if err != nil {
  1350. return 0, err
  1351. }
  1352. j = append(j, '\n')
  1353. if err := w.writeCastLine(j); err != nil {
  1354. if !w.r.failOpen {
  1355. return 0, err
  1356. }
  1357. w.recordingFailedOpen = true
  1358. }
  1359. }
  1360. return w.w.Write(p)
  1361. }
  1362. func (w loggingWriter) writeCastLine(j []byte) error {
  1363. w.r.mu.Lock()
  1364. defer w.r.mu.Unlock()
  1365. if w.r.out == nil {
  1366. return errors.New("logger closed")
  1367. }
  1368. _, err := w.r.out.Write(j)
  1369. if err != nil {
  1370. return fmt.Errorf("logger Write: %w", err)
  1371. }
  1372. return nil
  1373. }
  1374. func envValFromList(env []string, wantKey string) (v string) {
  1375. for _, kv := range env {
  1376. if thisKey, v, ok := strings.Cut(kv, "="); ok && envEq(thisKey, wantKey) {
  1377. return v
  1378. }
  1379. }
  1380. return ""
  1381. }
  1382. // envEq reports whether environment variable a == b for the current
  1383. // operating system.
  1384. func envEq(a, b string) bool {
  1385. //lint:ignore SA4032 in case this func moves elsewhere, permit the GOOS check
  1386. if runtime.GOOS == "windows" {
  1387. return strings.EqualFold(a, b)
  1388. }
  1389. return a == b
  1390. }
  1391. var (
  1392. metricActiveSessions = clientmetric.NewGauge("ssh_active_sessions")
  1393. metricIncomingConnections = clientmetric.NewCounter("ssh_incoming_connections")
  1394. metricTerminalAccept = clientmetric.NewCounter("ssh_terminalaction_accept")
  1395. metricTerminalReject = clientmetric.NewCounter("ssh_terminalaction_reject")
  1396. metricTerminalMalformed = clientmetric.NewCounter("ssh_terminalaction_malformed")
  1397. metricTerminalFetchError = clientmetric.NewCounter("ssh_terminalaction_fetch_error")
  1398. metricHolds = clientmetric.NewCounter("ssh_holds")
  1399. metricPolicyChangeKick = clientmetric.NewCounter("ssh_policy_change_kick")
  1400. metricSFTP = clientmetric.NewCounter("ssh_sftp_sessions")
  1401. metricLocalPortForward = clientmetric.NewCounter("ssh_local_port_forward_requests")
  1402. metricRemotePortForward = clientmetric.NewCounter("ssh_remote_port_forward_requests")
  1403. )
  1404. // userVisibleError is a wrapper around an error that implements
  1405. // SSHTerminationError, so msg is written to their session.
  1406. type userVisibleError struct {
  1407. msg string
  1408. error
  1409. }
  1410. func (ue userVisibleError) SSHTerminationMessage() string { return ue.msg }
  1411. // SSHTerminationError is implemented by errors that terminate an SSH
  1412. // session and should be written to user's sessions.
  1413. type SSHTerminationError interface {
  1414. error
  1415. SSHTerminationMessage() string
  1416. }
  1417. func closeAll(cs ...io.Closer) {
  1418. for _, c := range cs {
  1419. if c != nil {
  1420. c.Close()
  1421. }
  1422. }
  1423. }