tailssh.go 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. //go:build linux || (darwin && !ios) || freebsd || openbsd
  4. // Package tailssh is an SSH server integrated into Tailscale.
  5. package tailssh
  6. import (
  7. "bytes"
  8. "context"
  9. "crypto/rand"
  10. "encoding/base64"
  11. "encoding/json"
  12. "errors"
  13. "fmt"
  14. "io"
  15. "net"
  16. "net/http"
  17. "net/http/httptrace"
  18. "net/netip"
  19. "net/url"
  20. "os"
  21. "os/exec"
  22. "os/user"
  23. "path/filepath"
  24. "runtime"
  25. "strconv"
  26. "strings"
  27. "sync"
  28. "sync/atomic"
  29. "time"
  30. gossh "github.com/tailscale/golang-x-crypto/ssh"
  31. "tailscale.com/envknob"
  32. "tailscale.com/ipn/ipnlocal"
  33. "tailscale.com/logtail/backoff"
  34. "tailscale.com/net/tsaddr"
  35. "tailscale.com/net/tsdial"
  36. "tailscale.com/tailcfg"
  37. "tailscale.com/tempfork/gliderlabs/ssh"
  38. "tailscale.com/types/logger"
  39. "tailscale.com/types/netmap"
  40. "tailscale.com/util/clientmetric"
  41. "tailscale.com/util/mak"
  42. "tailscale.com/util/multierr"
  43. "tailscale.com/version/distro"
  44. )
  45. var (
  46. sshVerboseLogging = envknob.RegisterBool("TS_DEBUG_SSH_VLOG")
  47. )
  48. const (
  49. // forcePasswordSuffix is the suffix at the end of a username that forces
  50. // Tailscale SSH into password authentication mode to work around buggy SSH
  51. // clients that get confused by successful replies to auth type "none".
  52. forcePasswordSuffix = "+password"
  53. )
  54. // ipnLocalBackend is the subset of ipnlocal.LocalBackend that we use.
  55. // It is used for testing.
  56. type ipnLocalBackend interface {
  57. GetSSH_HostKeys() ([]gossh.Signer, error)
  58. ShouldRunSSH() bool
  59. NetMap() *netmap.NetworkMap
  60. WhoIs(ipp netip.AddrPort) (n *tailcfg.Node, u tailcfg.UserProfile, ok bool)
  61. DoNoiseRequest(req *http.Request) (*http.Response, error)
  62. Dialer() *tsdial.Dialer
  63. }
  64. type server struct {
  65. lb ipnLocalBackend
  66. logf logger.Logf
  67. tailscaledPath string
  68. pubKeyHTTPClient *http.Client // or nil for http.DefaultClient
  69. timeNow func() time.Time // or nil for time.Now
  70. sessionWaitGroup sync.WaitGroup
  71. // mu protects the following
  72. mu sync.Mutex
  73. activeConns map[*conn]bool // set; value is always true
  74. fetchPublicKeysCache map[string]pubKeyCacheEntry // by https URL
  75. shutdownCalled bool
  76. }
  77. func (srv *server) now() time.Time {
  78. if srv != nil && srv.timeNow != nil {
  79. return srv.timeNow()
  80. }
  81. return time.Now()
  82. }
  83. func init() {
  84. ipnlocal.RegisterNewSSHServer(func(logf logger.Logf, lb *ipnlocal.LocalBackend) (ipnlocal.SSHServer, error) {
  85. tsd, err := os.Executable()
  86. if err != nil {
  87. return nil, err
  88. }
  89. srv := &server{
  90. lb: lb,
  91. logf: logf,
  92. tailscaledPath: tsd,
  93. }
  94. return srv, nil
  95. })
  96. }
  97. // attachSessionToConnIfNotShutdown ensures that srv is not shutdown before
  98. // attaching the session to the conn. This ensures that once Shutdown is called,
  99. // new sessions are not allowed and existing ones are cleaned up.
  100. // It reports whether ss was attached to the conn.
  101. func (srv *server) attachSessionToConnIfNotShutdown(ss *sshSession) bool {
  102. srv.mu.Lock()
  103. defer srv.mu.Unlock()
  104. if srv.shutdownCalled {
  105. // Do not start any new sessions.
  106. return false
  107. }
  108. ss.conn.attachSession(ss)
  109. return true
  110. }
  111. func (srv *server) trackActiveConn(c *conn, add bool) {
  112. srv.mu.Lock()
  113. defer srv.mu.Unlock()
  114. if add {
  115. mak.Set(&srv.activeConns, c, true)
  116. return
  117. }
  118. delete(srv.activeConns, c)
  119. }
  120. // HandleSSHConn handles a Tailscale SSH connection from c.
  121. // This is the entry point for all SSH connections.
  122. // When this returns, the connection is closed.
  123. func (srv *server) HandleSSHConn(nc net.Conn) error {
  124. metricIncomingConnections.Add(1)
  125. c, err := srv.newConn()
  126. if err != nil {
  127. return err
  128. }
  129. srv.trackActiveConn(c, true) // add
  130. defer srv.trackActiveConn(c, false) // remove
  131. c.HandleConn(nc)
  132. // Return nil to signal to netstack's interception that it doesn't need to
  133. // log. If ss.HandleConn had problems, it can log itself (ideally on an
  134. // sshSession.logf).
  135. return nil
  136. }
  137. // Shutdown terminates all active sessions.
  138. func (srv *server) Shutdown() {
  139. srv.mu.Lock()
  140. srv.shutdownCalled = true
  141. for c := range srv.activeConns {
  142. c.Close()
  143. }
  144. srv.mu.Unlock()
  145. srv.sessionWaitGroup.Wait()
  146. }
  147. // OnPolicyChange terminates any active sessions that no longer match
  148. // the SSH access policy.
  149. func (srv *server) OnPolicyChange() {
  150. srv.mu.Lock()
  151. defer srv.mu.Unlock()
  152. for c := range srv.activeConns {
  153. if c.info == nil {
  154. // c.info is nil when the connection hasn't been authenticated yet.
  155. // In that case, the connection will be terminated when it is.
  156. continue
  157. }
  158. go c.checkStillValid()
  159. }
  160. }
  161. // conn represents a single SSH connection and its associated
  162. // ssh.Server.
  163. //
  164. // During the lifecycle of a connection, the following are called in order:
  165. // Setup and discover server info
  166. // - ServerConfigCallback
  167. //
  168. // Do the user auth
  169. // - NoClientAuthHandler
  170. // - PublicKeyHandler (only if NoClientAuthHandler returns errPubKeyRequired)
  171. //
  172. // Once auth is done, the conn can be multiplexed with multiple sessions and
  173. // channels concurrently. At which point any of the following can be called
  174. // in any order.
  175. // - c.handleSessionPostSSHAuth
  176. // - c.mayForwardLocalPortTo followed by ssh.DirectTCPIPHandler
  177. type conn struct {
  178. *ssh.Server
  179. srv *server
  180. insecureSkipTailscaleAuth bool // used by tests.
  181. // idH is the RFC4253 sec8 hash H. It is used to identify the connection,
  182. // and is shared among all sessions. It should not be shared outside
  183. // process. It is confusingly referred to as SessionID by the gliderlabs/ssh
  184. // library.
  185. idH string
  186. connID string // ID that's shared with control
  187. // anyPasswordIsOkay is whether the client is authorized but has requested
  188. // password-based auth to work around their buggy SSH client. When set, we
  189. // accept any password in the PasswordHandler.
  190. anyPasswordIsOkay bool // set by NoClientAuthCallback
  191. action0 *tailcfg.SSHAction // set by doPolicyAuth; first matching action
  192. currentAction *tailcfg.SSHAction // set by doPolicyAuth, updated by resolveNextAction
  193. finalAction *tailcfg.SSHAction // set by doPolicyAuth or resolveNextAction
  194. finalActionErr error // set by doPolicyAuth or resolveNextAction
  195. info *sshConnInfo // set by setInfo
  196. localUser *user.User // set by doPolicyAuth
  197. userGroupIDs []string // set by doPolicyAuth
  198. pubKey gossh.PublicKey // set by doPolicyAuth
  199. // mu protects the following fields.
  200. //
  201. // srv.mu should be acquired prior to mu.
  202. // It is safe to just acquire mu, but unsafe to
  203. // acquire mu and then srv.mu.
  204. mu sync.Mutex // protects the following
  205. sessions []*sshSession
  206. }
  207. func (c *conn) logf(format string, args ...any) {
  208. format = fmt.Sprintf("%v: %v", c.connID, format)
  209. c.srv.logf(format, args...)
  210. }
  211. // isAuthorized walks through the action chain and returns nil if the connection
  212. // is authorized. If the connection is not authorized, it returns
  213. // gossh.ErrDenied. If the action chain resolution fails, it returns the
  214. // resolution error.
  215. func (c *conn) isAuthorized(ctx ssh.Context) error {
  216. action := c.currentAction
  217. for {
  218. if action.Accept {
  219. if c.pubKey != nil {
  220. metricPublicKeyAccepts.Add(1)
  221. }
  222. return nil
  223. }
  224. if action.Reject || action.HoldAndDelegate == "" {
  225. return gossh.ErrDenied
  226. }
  227. var err error
  228. action, err = c.resolveNextAction(ctx)
  229. if err != nil {
  230. return err
  231. }
  232. if action.Message != "" {
  233. if err := ctx.SendAuthBanner(action.Message); err != nil {
  234. return err
  235. }
  236. }
  237. }
  238. }
  239. // errPubKeyRequired is returned by NoClientAuthCallback to make the client
  240. // resort to public-key auth; not user visible.
  241. var errPubKeyRequired = errors.New("ssh publickey required")
  242. // NoClientAuthCallback implements gossh.NoClientAuthCallback and is called by
  243. // the ssh.Server when the client first connects with the "none"
  244. // authentication method.
  245. //
  246. // It is responsible for continuing policy evaluation from BannerCallback (or
  247. // starting it afresh). It returns an error if the policy evaluation fails, or
  248. // if the decision is "reject"
  249. //
  250. // It either returns nil (accept) or errPubKeyRequired or gossh.ErrDenied
  251. // (reject). The errors may be wrapped.
  252. func (c *conn) NoClientAuthCallback(ctx ssh.Context) error {
  253. if c.insecureSkipTailscaleAuth {
  254. return nil
  255. }
  256. if err := c.doPolicyAuth(ctx, nil /* no pub key */); err != nil {
  257. return err
  258. }
  259. if err := c.isAuthorized(ctx); err != nil {
  260. return err
  261. }
  262. // Let users specify a username ending in +password to force password auth.
  263. // This exists for buggy SSH clients that get confused by success from
  264. // "none" auth.
  265. if strings.HasSuffix(ctx.User(), forcePasswordSuffix) {
  266. c.anyPasswordIsOkay = true
  267. return errors.New("any password please") // not shown to users
  268. }
  269. return nil
  270. }
  271. func (c *conn) nextAuthMethodCallback(cm gossh.ConnMetadata, prevErrors []error) (nextMethod []string) {
  272. switch {
  273. case c.anyPasswordIsOkay:
  274. nextMethod = append(nextMethod, "password")
  275. case len(prevErrors) > 0 && prevErrors[len(prevErrors)-1] == errPubKeyRequired:
  276. nextMethod = append(nextMethod, "publickey")
  277. }
  278. // The fake "tailscale" method is always appended to next so OpenSSH renders
  279. // that in parens as the final failure. (It also shows up in "ssh -v", etc)
  280. nextMethod = append(nextMethod, "tailscale")
  281. return
  282. }
  283. // fakePasswordHandler is our implementation of the PasswordHandler hook that
  284. // checks whether the user's password is correct. But we don't actually use
  285. // passwords. This exists only for when the user's username ends in "+password"
  286. // to signal that their SSH client is buggy and gets confused by auth type
  287. // "none" succeeding and they want our SSH server to require a dummy password
  288. // prompt instead. We then accept any password since we've already authenticated
  289. // & authorized them.
  290. func (c *conn) fakePasswordHandler(ctx ssh.Context, password string) bool {
  291. return c.anyPasswordIsOkay
  292. }
  293. // PublicKeyHandler implements ssh.PublicKeyHandler is called by the
  294. // ssh.Server when the client presents a public key.
  295. func (c *conn) PublicKeyHandler(ctx ssh.Context, pubKey ssh.PublicKey) error {
  296. if err := c.doPolicyAuth(ctx, pubKey); err != nil {
  297. // TODO(maisem/bradfitz): surface the error here.
  298. c.logf("rejecting SSH public key %s: %v", bytes.TrimSpace(gossh.MarshalAuthorizedKey(pubKey)), err)
  299. return err
  300. }
  301. if err := c.isAuthorized(ctx); err != nil {
  302. return err
  303. }
  304. c.logf("accepting SSH public key %s", bytes.TrimSpace(gossh.MarshalAuthorizedKey(pubKey)))
  305. return nil
  306. }
  307. // doPolicyAuth verifies that conn can proceed with the specified (optional)
  308. // pubKey. It returns nil if the matching policy action is Accept or
  309. // HoldAndDelegate. If pubKey is nil, there was no policy match but there is a
  310. // policy that might match a public key it returns errPubKeyRequired. Otherwise,
  311. // it returns gossh.ErrDenied.
  312. func (c *conn) doPolicyAuth(ctx ssh.Context, pubKey ssh.PublicKey) error {
  313. if err := c.setInfo(ctx); err != nil {
  314. c.logf("failed to get conninfo: %v", err)
  315. return gossh.ErrDenied
  316. }
  317. a, localUser, err := c.evaluatePolicy(pubKey)
  318. if err != nil {
  319. if pubKey == nil && c.havePubKeyPolicy() {
  320. return errPubKeyRequired
  321. }
  322. return fmt.Errorf("%w: %v", gossh.ErrDenied, err)
  323. }
  324. c.action0 = a
  325. c.currentAction = a
  326. c.pubKey = pubKey
  327. if a.Message != "" {
  328. if err := ctx.SendAuthBanner(a.Message); err != nil {
  329. return fmt.Errorf("SendBanner: %w", err)
  330. }
  331. }
  332. if a.Accept || a.HoldAndDelegate != "" {
  333. if a.Accept {
  334. c.finalAction = a
  335. }
  336. if runtime.GOOS == "linux" && distro.Get() == distro.Gokrazy {
  337. // Gokrazy is a single-user appliance with ~no userspace.
  338. // There aren't users to look up (no /etc/passwd, etc)
  339. // so rather than fail below, just hardcode root.
  340. // TODO(bradfitz): fix os/user upstream instead?
  341. c.userGroupIDs = []string{"0"}
  342. c.localUser = &user.User{Uid: "0", Gid: "0", Username: "root"}
  343. return nil
  344. }
  345. lu, err := user.Lookup(localUser)
  346. if err != nil {
  347. c.logf("failed to look up %v: %v", localUser, err)
  348. ctx.SendAuthBanner(fmt.Sprintf("failed to look up %v\r\n", localUser))
  349. return err
  350. }
  351. gids, err := lu.GroupIds()
  352. if err != nil {
  353. c.logf("failed to look up local user's group IDs: %v", err)
  354. return err
  355. }
  356. c.userGroupIDs = gids
  357. c.localUser = lu
  358. return nil
  359. }
  360. if a.Reject {
  361. c.finalAction = a
  362. return gossh.ErrDenied
  363. }
  364. // Shouldn't get here, but:
  365. return gossh.ErrDenied
  366. }
  367. // ServerConfig implements ssh.ServerConfigCallback.
  368. func (c *conn) ServerConfig(ctx ssh.Context) *gossh.ServerConfig {
  369. return &gossh.ServerConfig{
  370. NoClientAuth: true, // required for the NoClientAuthCallback to run
  371. NextAuthMethodCallback: c.nextAuthMethodCallback,
  372. }
  373. }
  374. func (srv *server) newConn() (*conn, error) {
  375. srv.mu.Lock()
  376. if srv.shutdownCalled {
  377. srv.mu.Unlock()
  378. // Stop accepting new connections.
  379. // Connections in the auth phase are handled in handleConnPostSSHAuth.
  380. // Existing sessions are terminated by Shutdown.
  381. return nil, gossh.ErrDenied
  382. }
  383. srv.mu.Unlock()
  384. c := &conn{srv: srv}
  385. now := srv.now()
  386. c.connID = fmt.Sprintf("ssh-conn-%s-%02x", now.UTC().Format("20060102T150405"), randBytes(5))
  387. c.Server = &ssh.Server{
  388. Version: "Tailscale",
  389. ServerConfigCallback: c.ServerConfig,
  390. NoClientAuthHandler: c.NoClientAuthCallback,
  391. PublicKeyHandler: c.PublicKeyHandler,
  392. PasswordHandler: c.fakePasswordHandler,
  393. Handler: c.handleSessionPostSSHAuth,
  394. LocalPortForwardingCallback: c.mayForwardLocalPortTo,
  395. SubsystemHandlers: map[string]ssh.SubsystemHandler{
  396. "sftp": c.handleSessionPostSSHAuth,
  397. },
  398. // Note: the direct-tcpip channel handler and LocalPortForwardingCallback
  399. // only adds support for forwarding ports from the local machine.
  400. // TODO(maisem/bradfitz): add remote port forwarding support.
  401. ChannelHandlers: map[string]ssh.ChannelHandler{
  402. "direct-tcpip": ssh.DirectTCPIPHandler,
  403. },
  404. RequestHandlers: map[string]ssh.RequestHandler{},
  405. }
  406. ss := c.Server
  407. for k, v := range ssh.DefaultRequestHandlers {
  408. ss.RequestHandlers[k] = v
  409. }
  410. for k, v := range ssh.DefaultChannelHandlers {
  411. ss.ChannelHandlers[k] = v
  412. }
  413. for k, v := range ssh.DefaultSubsystemHandlers {
  414. ss.SubsystemHandlers[k] = v
  415. }
  416. keys, err := srv.lb.GetSSH_HostKeys()
  417. if err != nil {
  418. return nil, err
  419. }
  420. for _, signer := range keys {
  421. ss.AddHostKey(signer)
  422. }
  423. return c, nil
  424. }
  425. // mayForwardLocalPortTo reports whether the ctx should be allowed to port forward
  426. // to the specified host and port.
  427. // TODO(bradfitz/maisem): should we have more checks on host/port?
  428. func (c *conn) mayForwardLocalPortTo(ctx ssh.Context, destinationHost string, destinationPort uint32) bool {
  429. if c.finalAction != nil && c.finalAction.AllowLocalPortForwarding {
  430. metricLocalPortForward.Add(1)
  431. return true
  432. }
  433. return false
  434. }
  435. // havePubKeyPolicy reports whether any policy rule may provide access by means
  436. // of a ssh.PublicKey.
  437. func (c *conn) havePubKeyPolicy() bool {
  438. if c.info == nil {
  439. panic("havePubKeyPolicy called before setInfo")
  440. }
  441. // Is there any rule that looks like it'd require a public key for this
  442. // sshUser?
  443. pol, ok := c.sshPolicy()
  444. if !ok {
  445. return false
  446. }
  447. for _, r := range pol.Rules {
  448. if c.ruleExpired(r) {
  449. continue
  450. }
  451. if mapLocalUser(r.SSHUsers, c.info.sshUser) == "" {
  452. continue
  453. }
  454. for _, p := range r.Principals {
  455. if len(p.PubKeys) > 0 && c.principalMatchesTailscaleIdentity(p) {
  456. return true
  457. }
  458. }
  459. }
  460. return false
  461. }
  462. // sshPolicy returns the SSHPolicy for current node.
  463. // If there is no SSHPolicy in the netmap, it returns a debugPolicy
  464. // if one is defined.
  465. func (c *conn) sshPolicy() (_ *tailcfg.SSHPolicy, ok bool) {
  466. lb := c.srv.lb
  467. if !lb.ShouldRunSSH() {
  468. return nil, false
  469. }
  470. nm := lb.NetMap()
  471. if nm == nil {
  472. return nil, false
  473. }
  474. if pol := nm.SSHPolicy; pol != nil && !envknob.SSHIgnoreTailnetPolicy() {
  475. return pol, true
  476. }
  477. debugPolicyFile := envknob.SSHPolicyFile()
  478. if debugPolicyFile != "" {
  479. c.logf("reading debug SSH policy file: %v", debugPolicyFile)
  480. f, err := os.ReadFile(debugPolicyFile)
  481. if err != nil {
  482. c.logf("error reading debug SSH policy file: %v", err)
  483. return nil, false
  484. }
  485. p := new(tailcfg.SSHPolicy)
  486. if err := json.Unmarshal(f, p); err != nil {
  487. c.logf("invalid JSON in %v: %v", debugPolicyFile, err)
  488. return nil, false
  489. }
  490. return p, true
  491. }
  492. return nil, false
  493. }
  494. func toIPPort(a net.Addr) (ipp netip.AddrPort) {
  495. ta, ok := a.(*net.TCPAddr)
  496. if !ok {
  497. return
  498. }
  499. tanetaddr, ok := netip.AddrFromSlice(ta.IP)
  500. if !ok {
  501. return
  502. }
  503. return netip.AddrPortFrom(tanetaddr.Unmap(), uint16(ta.Port))
  504. }
  505. // connInfo returns a populated sshConnInfo from the provided arguments,
  506. // validating only that they represent a known Tailscale identity.
  507. func (c *conn) setInfo(ctx ssh.Context) error {
  508. if c.info != nil {
  509. return nil
  510. }
  511. ci := &sshConnInfo{
  512. sshUser: strings.TrimSuffix(ctx.User(), forcePasswordSuffix),
  513. src: toIPPort(ctx.RemoteAddr()),
  514. dst: toIPPort(ctx.LocalAddr()),
  515. }
  516. if !tsaddr.IsTailscaleIP(ci.dst.Addr()) {
  517. return fmt.Errorf("tailssh: rejecting non-Tailscale local address %v", ci.dst)
  518. }
  519. if !tsaddr.IsTailscaleIP(ci.src.Addr()) {
  520. return fmt.Errorf("tailssh: rejecting non-Tailscale remote address %v", ci.src)
  521. }
  522. node, uprof, ok := c.srv.lb.WhoIs(ci.src)
  523. if !ok {
  524. return fmt.Errorf("unknown Tailscale identity from src %v", ci.src)
  525. }
  526. ci.node = node
  527. ci.uprof = uprof
  528. c.idH = ctx.SessionID()
  529. c.info = ci
  530. c.logf("handling conn: %v", ci.String())
  531. return nil
  532. }
  533. // evaluatePolicy returns the SSHAction and localUser after evaluating
  534. // the SSHPolicy for this conn. The pubKey may be nil for "none" auth.
  535. func (c *conn) evaluatePolicy(pubKey gossh.PublicKey) (_ *tailcfg.SSHAction, localUser string, _ error) {
  536. pol, ok := c.sshPolicy()
  537. if !ok {
  538. return nil, "", fmt.Errorf("tailssh: rejecting connection; no SSH policy")
  539. }
  540. a, localUser, ok := c.evalSSHPolicy(pol, pubKey)
  541. if !ok {
  542. return nil, "", fmt.Errorf("tailssh: rejecting connection; no matching policy")
  543. }
  544. return a, localUser, nil
  545. }
  546. // pubKeyCacheEntry is the cache value for an HTTPS URL of public keys (like
  547. // "https://github.com/foo.keys")
  548. type pubKeyCacheEntry struct {
  549. lines []string
  550. etag string // if sent by server
  551. at time.Time
  552. }
  553. const (
  554. pubKeyCacheDuration = time.Minute // how long to cache non-empty public keys
  555. pubKeyCacheEmptyDuration = 15 * time.Second // how long to cache empty responses
  556. )
  557. func (srv *server) fetchPublicKeysURLCached(url string) (ce pubKeyCacheEntry, ok bool) {
  558. srv.mu.Lock()
  559. defer srv.mu.Unlock()
  560. // Mostly don't care about the size of this cache. Clean rarely.
  561. if m := srv.fetchPublicKeysCache; len(m) > 50 {
  562. tooOld := srv.now().Add(pubKeyCacheDuration * 10)
  563. for k, ce := range m {
  564. if ce.at.Before(tooOld) {
  565. delete(m, k)
  566. }
  567. }
  568. }
  569. ce, ok = srv.fetchPublicKeysCache[url]
  570. if !ok {
  571. return ce, false
  572. }
  573. maxAge := pubKeyCacheDuration
  574. if len(ce.lines) == 0 {
  575. maxAge = pubKeyCacheEmptyDuration
  576. }
  577. return ce, srv.now().Sub(ce.at) < maxAge
  578. }
  579. func (srv *server) pubKeyClient() *http.Client {
  580. if srv.pubKeyHTTPClient != nil {
  581. return srv.pubKeyHTTPClient
  582. }
  583. return http.DefaultClient
  584. }
  585. // fetchPublicKeysURL fetches the public keys from a URL. The strings are in the
  586. // the typical public key "type base64-string [comment]" format seen at e.g.
  587. // https://github.com/USER.keys
  588. func (srv *server) fetchPublicKeysURL(url string) ([]string, error) {
  589. if !strings.HasPrefix(url, "https://") {
  590. return nil, errors.New("invalid URL scheme")
  591. }
  592. ce, ok := srv.fetchPublicKeysURLCached(url)
  593. if ok {
  594. return ce.lines, nil
  595. }
  596. ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
  597. defer cancel()
  598. req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
  599. if err != nil {
  600. return nil, err
  601. }
  602. if ce.etag != "" {
  603. req.Header.Add("If-None-Match", ce.etag)
  604. }
  605. res, err := srv.pubKeyClient().Do(req)
  606. if err != nil {
  607. return nil, err
  608. }
  609. defer res.Body.Close()
  610. var lines []string
  611. var etag string
  612. switch res.StatusCode {
  613. default:
  614. err = fmt.Errorf("unexpected status %v", res.Status)
  615. srv.logf("fetching public keys from %s: %v", url, err)
  616. case http.StatusNotModified:
  617. lines = ce.lines
  618. etag = ce.etag
  619. case http.StatusOK:
  620. var all []byte
  621. all, err = io.ReadAll(io.LimitReader(res.Body, 4<<10))
  622. if s := strings.TrimSpace(string(all)); s != "" {
  623. lines = strings.Split(s, "\n")
  624. }
  625. etag = res.Header.Get("Etag")
  626. }
  627. srv.mu.Lock()
  628. defer srv.mu.Unlock()
  629. mak.Set(&srv.fetchPublicKeysCache, url, pubKeyCacheEntry{
  630. at: srv.now(),
  631. lines: lines,
  632. etag: etag,
  633. })
  634. return lines, err
  635. }
  636. // handleSessionPostSSHAuth runs an SSH session after the SSH-level authentication,
  637. // but not necessarily before all the Tailscale-level extra verification has
  638. // completed. It also handles SFTP requests.
  639. func (c *conn) handleSessionPostSSHAuth(s ssh.Session) {
  640. // Do this check after auth, but before starting the session.
  641. switch s.Subsystem() {
  642. case "sftp", "":
  643. metricSFTP.Add(1)
  644. default:
  645. fmt.Fprintf(s.Stderr(), "Unsupported subsystem %q\r\n", s.Subsystem())
  646. s.Exit(1)
  647. return
  648. }
  649. ss := c.newSSHSession(s)
  650. ss.logf("handling new SSH connection from %v (%v) to ssh-user %q", c.info.uprof.LoginName, c.info.src.Addr(), c.localUser.Username)
  651. ss.logf("access granted to %v as ssh-user %q", c.info.uprof.LoginName, c.localUser.Username)
  652. ss.run()
  653. }
  654. // resolveNextAction starts at c.currentAction and makes it way through the
  655. // action chain one step at a time. An action without a HoldAndDelegate is
  656. // considered the final action. Once a final action is reached, this function
  657. // will keep returning that action. It updates c.currentAction to the next
  658. // action in the chain. When the final action is reached, it also sets
  659. // c.finalAction to the final action.
  660. func (c *conn) resolveNextAction(sctx ssh.Context) (action *tailcfg.SSHAction, err error) {
  661. if c.finalAction != nil || c.finalActionErr != nil {
  662. return c.finalAction, c.finalActionErr
  663. }
  664. defer func() {
  665. if action != nil {
  666. c.currentAction = action
  667. if action.Accept || action.Reject {
  668. c.finalAction = action
  669. }
  670. }
  671. if err != nil {
  672. c.finalActionErr = err
  673. }
  674. }()
  675. ctx, cancel := context.WithCancel(sctx)
  676. defer cancel()
  677. // Loop processing/fetching Actions until one reaches a
  678. // terminal state (Accept, Reject, or invalid Action), or
  679. // until fetchSSHAction times out due to the context being
  680. // done (client disconnect) or its 30 minute timeout passes.
  681. // (Which is a long time for somebody to see login
  682. // instructions and go to a URL to do something.)
  683. action = c.currentAction
  684. if action.Accept || action.Reject {
  685. if action.Reject {
  686. metricTerminalReject.Add(1)
  687. } else {
  688. metricTerminalAccept.Add(1)
  689. }
  690. return action, nil
  691. }
  692. url := action.HoldAndDelegate
  693. if url == "" {
  694. metricTerminalMalformed.Add(1)
  695. return nil, errors.New("reached Action that lacked Accept, Reject, and HoldAndDelegate")
  696. }
  697. metricHolds.Add(1)
  698. url = c.expandDelegateURLLocked(url)
  699. nextAction, err := c.fetchSSHAction(ctx, url)
  700. if err != nil {
  701. metricTerminalFetchError.Add(1)
  702. return nil, fmt.Errorf("fetching SSHAction from %s: %w", url, err)
  703. }
  704. return nextAction, nil
  705. }
  706. func (c *conn) expandDelegateURLLocked(actionURL string) string {
  707. nm := c.srv.lb.NetMap()
  708. ci := c.info
  709. lu := c.localUser
  710. var dstNodeID string
  711. if nm != nil {
  712. dstNodeID = fmt.Sprint(int64(nm.SelfNode.ID))
  713. }
  714. return strings.NewReplacer(
  715. "$SRC_NODE_IP", url.QueryEscape(ci.src.Addr().String()),
  716. "$SRC_NODE_ID", fmt.Sprint(int64(ci.node.ID)),
  717. "$DST_NODE_IP", url.QueryEscape(ci.dst.Addr().String()),
  718. "$DST_NODE_ID", dstNodeID,
  719. "$SSH_USER", url.QueryEscape(ci.sshUser),
  720. "$LOCAL_USER", url.QueryEscape(lu.Username),
  721. ).Replace(actionURL)
  722. }
  723. func (c *conn) expandPublicKeyURL(pubKeyURL string) string {
  724. if !strings.Contains(pubKeyURL, "$") {
  725. return pubKeyURL
  726. }
  727. loginName := c.info.uprof.LoginName
  728. localPart, _, _ := strings.Cut(loginName, "@")
  729. return strings.NewReplacer(
  730. "$LOGINNAME_EMAIL", loginName,
  731. "$LOGINNAME_LOCALPART", localPart,
  732. ).Replace(pubKeyURL)
  733. }
  734. // sshSession is an accepted Tailscale SSH session.
  735. type sshSession struct {
  736. ssh.Session
  737. sharedID string // ID that's shared with control
  738. logf logger.Logf
  739. ctx context.Context
  740. cancelCtx context.CancelCauseFunc
  741. conn *conn
  742. agentListener net.Listener // non-nil if agent-forwarding requested+allowed
  743. // initialized by launchProcess:
  744. cmd *exec.Cmd
  745. stdin io.WriteCloser
  746. stdout io.ReadCloser
  747. stderr io.Reader // nil for pty sessions
  748. ptyReq *ssh.Pty // non-nil for pty sessions
  749. // We use this sync.Once to ensure that we only terminate the process once,
  750. // either it exits itself or is terminated
  751. exitOnce sync.Once
  752. }
  753. func (ss *sshSession) vlogf(format string, args ...any) {
  754. if sshVerboseLogging() {
  755. ss.logf(format, args...)
  756. }
  757. }
  758. func (c *conn) newSSHSession(s ssh.Session) *sshSession {
  759. sharedID := fmt.Sprintf("sess-%s-%02x", c.srv.now().UTC().Format("20060102T150405"), randBytes(5))
  760. c.logf("starting session: %v", sharedID)
  761. ctx, cancel := context.WithCancelCause(s.Context())
  762. return &sshSession{
  763. Session: s,
  764. sharedID: sharedID,
  765. ctx: ctx,
  766. cancelCtx: cancel,
  767. conn: c,
  768. logf: logger.WithPrefix(c.srv.logf, "ssh-session("+sharedID+"): "),
  769. }
  770. }
  771. // isStillValid reports whether the conn is still valid.
  772. func (c *conn) isStillValid() bool {
  773. a, localUser, err := c.evaluatePolicy(c.pubKey)
  774. if err != nil {
  775. return false
  776. }
  777. if !a.Accept && a.HoldAndDelegate == "" {
  778. return false
  779. }
  780. return c.localUser.Username == localUser
  781. }
  782. // checkStillValid checks that the conn is still valid per the latest SSHPolicy.
  783. // If not, it terminates all sessions associated with the conn.
  784. func (c *conn) checkStillValid() {
  785. if c.isStillValid() {
  786. return
  787. }
  788. metricPolicyChangeKick.Add(1)
  789. c.logf("session no longer valid per new SSH policy; closing")
  790. c.mu.Lock()
  791. defer c.mu.Unlock()
  792. for _, s := range c.sessions {
  793. s.cancelCtx(userVisibleError{
  794. fmt.Sprintf("Access revoked.\r\n"),
  795. context.Canceled,
  796. })
  797. }
  798. }
  799. func (c *conn) fetchSSHAction(ctx context.Context, url string) (*tailcfg.SSHAction, error) {
  800. ctx, cancel := context.WithTimeout(ctx, 30*time.Minute)
  801. defer cancel()
  802. bo := backoff.NewBackoff("fetch-ssh-action", c.logf, 10*time.Second)
  803. for {
  804. if err := ctx.Err(); err != nil {
  805. return nil, err
  806. }
  807. req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
  808. if err != nil {
  809. return nil, err
  810. }
  811. res, err := c.srv.lb.DoNoiseRequest(req)
  812. if err != nil {
  813. bo.BackOff(ctx, err)
  814. continue
  815. }
  816. if res.StatusCode != 200 {
  817. body, _ := io.ReadAll(res.Body)
  818. res.Body.Close()
  819. if len(body) > 1<<10 {
  820. body = body[:1<<10]
  821. }
  822. c.logf("fetch of %v: %s, %s", url, res.Status, body)
  823. bo.BackOff(ctx, fmt.Errorf("unexpected status: %v", res.Status))
  824. continue
  825. }
  826. a := new(tailcfg.SSHAction)
  827. err = json.NewDecoder(res.Body).Decode(a)
  828. res.Body.Close()
  829. if err != nil {
  830. c.logf("invalid next SSHAction JSON from %v: %v", url, err)
  831. bo.BackOff(ctx, err)
  832. continue
  833. }
  834. return a, nil
  835. }
  836. }
  837. // killProcessOnContextDone waits for ss.ctx to be done and kills the process,
  838. // unless the process has already exited.
  839. func (ss *sshSession) killProcessOnContextDone() {
  840. <-ss.ctx.Done()
  841. // Either the process has already exited, in which case this does nothing.
  842. // Or, the process is still running in which case this will kill it.
  843. ss.exitOnce.Do(func() {
  844. err := context.Cause(ss.ctx)
  845. if serr, ok := err.(SSHTerminationError); ok {
  846. msg := serr.SSHTerminationMessage()
  847. if msg != "" {
  848. io.WriteString(ss.Stderr(), "\r\n\r\n"+msg+"\r\n\r\n")
  849. }
  850. }
  851. ss.logf("terminating SSH session from %v: %v", ss.conn.info.src.Addr(), err)
  852. // We don't need to Process.Wait here, sshSession.run() does
  853. // the waiting regardless of termination reason.
  854. // TODO(maisem): should this be a SIGTERM followed by a SIGKILL?
  855. ss.cmd.Process.Kill()
  856. })
  857. }
  858. // attachSession registers ss as an active session.
  859. func (c *conn) attachSession(ss *sshSession) {
  860. c.srv.sessionWaitGroup.Add(1)
  861. if ss.sharedID == "" {
  862. panic("empty sharedID")
  863. }
  864. c.mu.Lock()
  865. defer c.mu.Unlock()
  866. c.sessions = append(c.sessions, ss)
  867. }
  868. // detachSession unregisters s from the list of active sessions.
  869. func (c *conn) detachSession(ss *sshSession) {
  870. defer c.srv.sessionWaitGroup.Done()
  871. c.mu.Lock()
  872. defer c.mu.Unlock()
  873. for i, s := range c.sessions {
  874. if s == ss {
  875. c.sessions = append(c.sessions[:i], c.sessions[i+1:]...)
  876. break
  877. }
  878. }
  879. }
  880. var errSessionDone = errors.New("session is done")
  881. // handleSSHAgentForwarding starts a Unix socket listener and in the background
  882. // forwards agent connections between the listener and the ssh.Session.
  883. // On success, it assigns ss.agentListener.
  884. func (ss *sshSession) handleSSHAgentForwarding(s ssh.Session, lu *user.User) error {
  885. if !ssh.AgentRequested(ss) || !ss.conn.finalAction.AllowAgentForwarding {
  886. return nil
  887. }
  888. ss.logf("ssh: agent forwarding requested")
  889. ln, err := ssh.NewAgentListener()
  890. if err != nil {
  891. return err
  892. }
  893. defer func() {
  894. if err != nil && ln != nil {
  895. ln.Close()
  896. }
  897. }()
  898. uid, err := strconv.ParseUint(lu.Uid, 10, 32)
  899. if err != nil {
  900. return err
  901. }
  902. gid, err := strconv.ParseUint(lu.Gid, 10, 32)
  903. if err != nil {
  904. return err
  905. }
  906. socket := ln.Addr().String()
  907. dir := filepath.Dir(socket)
  908. // Make sure the socket is accessible only by the user.
  909. if err := os.Chmod(socket, 0600); err != nil {
  910. return err
  911. }
  912. if err := os.Chown(socket, int(uid), int(gid)); err != nil {
  913. return err
  914. }
  915. // Make sure the dir is also accessible.
  916. if err := os.Chmod(dir, 0755); err != nil {
  917. return err
  918. }
  919. go ssh.ForwardAgentConnections(ln, s)
  920. ss.agentListener = ln
  921. return nil
  922. }
  923. // run is the entrypoint for a newly accepted SSH session.
  924. //
  925. // It handles ss once it's been accepted and determined
  926. // that it should run.
  927. func (ss *sshSession) run() {
  928. metricActiveSessions.Add(1)
  929. defer metricActiveSessions.Add(-1)
  930. defer ss.cancelCtx(errSessionDone)
  931. if attached := ss.conn.srv.attachSessionToConnIfNotShutdown(ss); !attached {
  932. fmt.Fprintf(ss, "Tailscale SSH is shutting down\r\n")
  933. ss.Exit(1)
  934. return
  935. }
  936. defer ss.conn.detachSession(ss)
  937. lu := ss.conn.localUser
  938. logf := ss.logf
  939. if ss.conn.finalAction.SessionDuration != 0 {
  940. t := time.AfterFunc(ss.conn.finalAction.SessionDuration, func() {
  941. ss.cancelCtx(userVisibleError{
  942. fmt.Sprintf("Session timeout of %v elapsed.", ss.conn.finalAction.SessionDuration),
  943. context.DeadlineExceeded,
  944. })
  945. })
  946. defer t.Stop()
  947. }
  948. if euid := os.Geteuid(); euid != 0 {
  949. if lu.Uid != fmt.Sprint(euid) {
  950. ss.logf("can't switch to user %q from process euid %v", lu.Username, euid)
  951. fmt.Fprintf(ss, "can't switch user\r\n")
  952. ss.Exit(1)
  953. return
  954. }
  955. }
  956. // Take control of the PTY so that we can configure it below.
  957. // See https://github.com/tailscale/tailscale/issues/4146
  958. ss.DisablePTYEmulation()
  959. var rec *recording // or nil if disabled
  960. if ss.Subsystem() != "sftp" {
  961. if err := ss.handleSSHAgentForwarding(ss, lu); err != nil {
  962. ss.logf("agent forwarding failed: %v", err)
  963. } else if ss.agentListener != nil {
  964. // TODO(maisem/bradfitz): add a way to close all session resources
  965. defer ss.agentListener.Close()
  966. }
  967. if ss.shouldRecord() {
  968. var err error
  969. rec, err = ss.startNewRecording()
  970. if err != nil {
  971. var uve userVisibleError
  972. if errors.As(err, &uve) {
  973. fmt.Fprintf(ss, "%s\r\n", uve.SSHTerminationMessage())
  974. } else {
  975. fmt.Fprintf(ss, "can't start new recording\r\n")
  976. }
  977. ss.logf("startNewRecording: %v", err)
  978. ss.Exit(1)
  979. return
  980. }
  981. if rec != nil {
  982. defer rec.Close()
  983. }
  984. }
  985. }
  986. err := ss.launchProcess()
  987. if err != nil {
  988. logf("start failed: %v", err.Error())
  989. if errors.Is(err, context.Canceled) {
  990. err := context.Cause(ss.ctx)
  991. var uve userVisibleError
  992. if errors.As(err, &uve) {
  993. fmt.Fprintf(ss, "%s\r\n", uve)
  994. }
  995. }
  996. ss.Exit(1)
  997. return
  998. }
  999. go ss.killProcessOnContextDone()
  1000. go func() {
  1001. defer ss.stdin.Close()
  1002. if _, err := io.Copy(rec.writer("i", ss.stdin), ss); err != nil {
  1003. logf("stdin copy: %v", err)
  1004. ss.cancelCtx(err)
  1005. }
  1006. }()
  1007. var openOutputStreams atomic.Int32
  1008. if ss.stderr != nil {
  1009. openOutputStreams.Store(2)
  1010. } else {
  1011. openOutputStreams.Store(1)
  1012. }
  1013. go func() {
  1014. defer ss.stdout.Close()
  1015. _, err := io.Copy(rec.writer("o", ss), ss.stdout)
  1016. if err != nil && !errors.Is(err, io.EOF) {
  1017. logf("stdout copy: %v", err)
  1018. ss.cancelCtx(err)
  1019. }
  1020. if openOutputStreams.Add(-1) == 0 {
  1021. ss.CloseWrite()
  1022. }
  1023. }()
  1024. // stderr is nil for ptys.
  1025. if ss.stderr != nil {
  1026. go func() {
  1027. _, err := io.Copy(ss.Stderr(), ss.stderr)
  1028. if err != nil {
  1029. logf("stderr copy: %v", err)
  1030. }
  1031. if openOutputStreams.Add(-1) == 0 {
  1032. ss.CloseWrite()
  1033. }
  1034. }()
  1035. }
  1036. err = ss.cmd.Wait()
  1037. // This will either make the SSH Termination goroutine be a no-op,
  1038. // or itself will be a no-op because the process was killed by the
  1039. // aforementioned goroutine.
  1040. ss.exitOnce.Do(func() {})
  1041. if err == nil {
  1042. ss.logf("Session complete")
  1043. ss.Exit(0)
  1044. return
  1045. }
  1046. if ee, ok := err.(*exec.ExitError); ok {
  1047. code := ee.ProcessState.ExitCode()
  1048. ss.logf("Wait: code=%v", code)
  1049. ss.Exit(code)
  1050. return
  1051. }
  1052. ss.logf("Wait: %v", err)
  1053. ss.Exit(1)
  1054. return
  1055. }
  1056. // recorders returns the list of recorders to use for this session.
  1057. // If the final action has a non-empty list of recorders, that list is
  1058. // returned. Otherwise, the list of recorders from the initial action
  1059. // is returned.
  1060. func (ss *sshSession) recorders() ([]netip.AddrPort, *tailcfg.SSHRecorderFailureAction) {
  1061. if len(ss.conn.finalAction.Recorders) > 0 {
  1062. return ss.conn.finalAction.Recorders, ss.conn.finalAction.OnRecordingFailure
  1063. }
  1064. return ss.conn.action0.Recorders, ss.conn.action0.OnRecordingFailure
  1065. }
  1066. func (ss *sshSession) shouldRecord() bool {
  1067. recs, _ := ss.recorders()
  1068. return len(recs) > 0
  1069. }
  1070. type sshConnInfo struct {
  1071. // sshUser is the requested local SSH username ("root", "alice", etc).
  1072. sshUser string
  1073. // src is the Tailscale IP and port that the connection came from.
  1074. src netip.AddrPort
  1075. // dst is the Tailscale IP and port that the connection came for.
  1076. dst netip.AddrPort
  1077. // node is srcIP's node.
  1078. node *tailcfg.Node
  1079. // uprof is node's UserProfile.
  1080. uprof tailcfg.UserProfile
  1081. }
  1082. func (ci *sshConnInfo) String() string {
  1083. return fmt.Sprintf("%v->%v@%v", ci.src, ci.sshUser, ci.dst)
  1084. }
  1085. func (c *conn) ruleExpired(r *tailcfg.SSHRule) bool {
  1086. if r.RuleExpires == nil {
  1087. return false
  1088. }
  1089. return r.RuleExpires.Before(c.srv.now())
  1090. }
  1091. func (c *conn) evalSSHPolicy(pol *tailcfg.SSHPolicy, pubKey gossh.PublicKey) (a *tailcfg.SSHAction, localUser string, ok bool) {
  1092. for _, r := range pol.Rules {
  1093. if a, localUser, err := c.matchRule(r, pubKey); err == nil {
  1094. return a, localUser, true
  1095. }
  1096. }
  1097. return nil, "", false
  1098. }
  1099. // internal errors for testing; they don't escape to callers or logs.
  1100. var (
  1101. errNilRule = errors.New("nil rule")
  1102. errNilAction = errors.New("nil action")
  1103. errRuleExpired = errors.New("rule expired")
  1104. errPrincipalMatch = errors.New("principal didn't match")
  1105. errUserMatch = errors.New("user didn't match")
  1106. errInvalidConn = errors.New("invalid connection state")
  1107. )
  1108. func (c *conn) matchRule(r *tailcfg.SSHRule, pubKey gossh.PublicKey) (a *tailcfg.SSHAction, localUser string, err error) {
  1109. if c == nil {
  1110. return nil, "", errInvalidConn
  1111. }
  1112. if c.info == nil {
  1113. c.logf("invalid connection state")
  1114. return nil, "", errInvalidConn
  1115. }
  1116. if r == nil {
  1117. return nil, "", errNilRule
  1118. }
  1119. if r.Action == nil {
  1120. return nil, "", errNilAction
  1121. }
  1122. if c.ruleExpired(r) {
  1123. return nil, "", errRuleExpired
  1124. }
  1125. if !r.Action.Reject {
  1126. // For all but Reject rules, SSHUsers is required.
  1127. // If SSHUsers is nil or empty, mapLocalUser will return an
  1128. // empty string anyway.
  1129. localUser = mapLocalUser(r.SSHUsers, c.info.sshUser)
  1130. if localUser == "" {
  1131. return nil, "", errUserMatch
  1132. }
  1133. }
  1134. if ok, err := c.anyPrincipalMatches(r.Principals, pubKey); err != nil {
  1135. return nil, "", err
  1136. } else if !ok {
  1137. return nil, "", errPrincipalMatch
  1138. }
  1139. return r.Action, localUser, nil
  1140. }
  1141. func mapLocalUser(ruleSSHUsers map[string]string, reqSSHUser string) (localUser string) {
  1142. v, ok := ruleSSHUsers[reqSSHUser]
  1143. if !ok {
  1144. v = ruleSSHUsers["*"]
  1145. }
  1146. if v == "=" {
  1147. return reqSSHUser
  1148. }
  1149. return v
  1150. }
  1151. func (c *conn) anyPrincipalMatches(ps []*tailcfg.SSHPrincipal, pubKey gossh.PublicKey) (bool, error) {
  1152. for _, p := range ps {
  1153. if p == nil {
  1154. continue
  1155. }
  1156. if ok, err := c.principalMatches(p, pubKey); err != nil {
  1157. return false, err
  1158. } else if ok {
  1159. return true, nil
  1160. }
  1161. }
  1162. return false, nil
  1163. }
  1164. func (c *conn) principalMatches(p *tailcfg.SSHPrincipal, pubKey gossh.PublicKey) (bool, error) {
  1165. if !c.principalMatchesTailscaleIdentity(p) {
  1166. return false, nil
  1167. }
  1168. return c.principalMatchesPubKey(p, pubKey)
  1169. }
  1170. // principalMatchesTailscaleIdentity reports whether one of p's four fields
  1171. // that match the Tailscale identity match (Node, NodeIP, UserLogin, Any).
  1172. // This function does not consider PubKeys.
  1173. func (c *conn) principalMatchesTailscaleIdentity(p *tailcfg.SSHPrincipal) bool {
  1174. ci := c.info
  1175. if p.Any {
  1176. return true
  1177. }
  1178. if !p.Node.IsZero() && ci.node != nil && p.Node == ci.node.StableID {
  1179. return true
  1180. }
  1181. if p.NodeIP != "" {
  1182. if ip, _ := netip.ParseAddr(p.NodeIP); ip == ci.src.Addr() {
  1183. return true
  1184. }
  1185. }
  1186. if p.UserLogin != "" && ci.uprof.LoginName == p.UserLogin {
  1187. return true
  1188. }
  1189. return false
  1190. }
  1191. func (c *conn) principalMatchesPubKey(p *tailcfg.SSHPrincipal, clientPubKey gossh.PublicKey) (bool, error) {
  1192. if len(p.PubKeys) == 0 {
  1193. return true, nil
  1194. }
  1195. if clientPubKey == nil {
  1196. return false, nil
  1197. }
  1198. knownKeys := p.PubKeys
  1199. if len(knownKeys) == 1 && strings.HasPrefix(knownKeys[0], "https://") {
  1200. var err error
  1201. knownKeys, err = c.srv.fetchPublicKeysURL(c.expandPublicKeyURL(knownKeys[0]))
  1202. if err != nil {
  1203. return false, err
  1204. }
  1205. }
  1206. for _, knownKey := range knownKeys {
  1207. if pubKeyMatchesAuthorizedKey(clientPubKey, knownKey) {
  1208. return true, nil
  1209. }
  1210. }
  1211. return false, nil
  1212. }
  1213. func pubKeyMatchesAuthorizedKey(pubKey ssh.PublicKey, wantKey string) bool {
  1214. wantKeyType, rest, ok := strings.Cut(wantKey, " ")
  1215. if !ok {
  1216. return false
  1217. }
  1218. if pubKey.Type() != wantKeyType {
  1219. return false
  1220. }
  1221. wantKeyB64, _, _ := strings.Cut(rest, " ")
  1222. wantKeyData, _ := base64.StdEncoding.DecodeString(wantKeyB64)
  1223. return len(wantKeyData) > 0 && bytes.Equal(pubKey.Marshal(), wantKeyData)
  1224. }
  1225. func randBytes(n int) []byte {
  1226. b := make([]byte, n)
  1227. if _, err := rand.Read(b); err != nil {
  1228. panic(err)
  1229. }
  1230. return b
  1231. }
  1232. // CastHeader is the header of an asciinema file.
  1233. type CastHeader struct {
  1234. // Version is the asciinema file format version.
  1235. Version int `json:"version"`
  1236. // Width is the terminal width in characters.
  1237. // It is non-zero for Pty sessions.
  1238. Width int `json:"width"`
  1239. // Height is the terminal height in characters.
  1240. // It is non-zero for Pty sessions.
  1241. Height int `json:"height"`
  1242. // Timestamp is the unix timestamp of when the recording started.
  1243. Timestamp int64 `json:"timestamp"`
  1244. // Env is the environment variables of the session.
  1245. // Only "TERM" is set (2023-03-22).
  1246. Env map[string]string `json:"env"`
  1247. // Command is the command that was executed.
  1248. // Typically empty for shell sessions.
  1249. Command string `json:"command,omitempty"`
  1250. // Tailscale-specific fields:
  1251. // SrcNode is the FQDN of the node originating the connection.
  1252. // It is also the MagicDNS name for the node.
  1253. // It does not have a trailing dot.
  1254. // e.g. "host.tail-scale.ts.net"
  1255. SrcNode string `json:"srcNode"`
  1256. // SrcNodeID is the node ID of the node originating the connection.
  1257. SrcNodeID tailcfg.StableNodeID `json:"srcNodeID"`
  1258. // SrcNodeTags is the list of tags on the node originating the connection (if any).
  1259. SrcNodeTags []string `json:"srcNodeTags,omitempty"`
  1260. // SrcNodeUserID is the user ID of the node originating the connection (if not tagged).
  1261. SrcNodeUserID tailcfg.UserID `json:"srcNodeUserID,omitempty"` // if not tagged
  1262. // SrcNodeUser is the LoginName of the node originating the connection (if not tagged).
  1263. SrcNodeUser string `json:"srcNodeUser,omitempty"`
  1264. // SSHUser is the username as presented by the client.
  1265. SSHUser string `json:"sshUser"` // as presented by the client
  1266. // LocalUser is the effective username on the server.
  1267. LocalUser string `json:"localUser"`
  1268. }
  1269. // sessionRecordingClient returns an http.Client that uses srv.lb.Dialer() to
  1270. // dial connections. This is used to make requests to the session recording
  1271. // server to upload session recordings.
  1272. // It uses the provided dialCtx to dial connections, and limits a single dial
  1273. // to 5 seconds.
  1274. func (ss *sshSession) sessionRecordingClient(dialCtx context.Context) (*http.Client, error) {
  1275. dialer := ss.conn.srv.lb.Dialer()
  1276. if dialer == nil {
  1277. return nil, errors.New("no peer API transport")
  1278. }
  1279. tr := dialer.PeerAPITransport().Clone()
  1280. dialContextFn := tr.DialContext
  1281. tr.DialContext = func(ctx context.Context, network, addr string) (net.Conn, error) {
  1282. perAttemptCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
  1283. defer cancel()
  1284. go func() {
  1285. select {
  1286. case <-perAttemptCtx.Done():
  1287. case <-dialCtx.Done():
  1288. cancel()
  1289. }
  1290. }()
  1291. return dialContextFn(perAttemptCtx, network, addr)
  1292. }
  1293. return &http.Client{
  1294. Transport: tr,
  1295. }, nil
  1296. }
  1297. // connectToRecorder connects to the recorder at any of the provided addresses.
  1298. // It returns the first successful response, or a multierr if all attempts fail.
  1299. //
  1300. // On success, it returns a WriteCloser that can be used to upload the
  1301. // recording, and a channel that will be sent an error (or nil) when the upload
  1302. // fails or completes.
  1303. func (ss *sshSession) connectToRecorder(ctx context.Context, recs []netip.AddrPort) (io.WriteCloser, <-chan error, error) {
  1304. if len(recs) == 0 {
  1305. return nil, nil, errors.New("no recorders configured")
  1306. }
  1307. // We use a special context for dialing the recorder, so that we can
  1308. // limit the time we spend dialing to 30 seconds and still have an
  1309. // unbounded context for the upload.
  1310. dialCtx, dialCancel := context.WithTimeout(ctx, 30*time.Second)
  1311. defer dialCancel()
  1312. hc, err := ss.sessionRecordingClient(dialCtx)
  1313. if err != nil {
  1314. return nil, nil, err
  1315. }
  1316. var errs []error
  1317. for _, ap := range recs {
  1318. // We dial the recorder and wait for it to send a 100-continue
  1319. // response before returning from this function. This ensures that
  1320. // the recorder is ready to accept the recording.
  1321. // got100 is closed when we receive the 100-continue response.
  1322. got100 := make(chan struct{})
  1323. ctx = httptrace.WithClientTrace(ctx, &httptrace.ClientTrace{
  1324. Got100Continue: func() {
  1325. close(got100)
  1326. },
  1327. })
  1328. pr, pw := io.Pipe()
  1329. req, err := http.NewRequestWithContext(ctx, "POST", fmt.Sprintf("http://%s:%d/record", ap.Addr(), ap.Port()), pr)
  1330. if err != nil {
  1331. errs = append(errs, fmt.Errorf("recording: error starting recording: %w", err))
  1332. continue
  1333. }
  1334. // We set the Expect header to 100-continue, so that the recorder
  1335. // will send a 100-continue response before it starts reading the
  1336. // request body.
  1337. req.Header.Set("Expect", "100-continue")
  1338. // errChan is used to indicate the result of the request.
  1339. errChan := make(chan error, 1)
  1340. go func() {
  1341. resp, err := hc.Do(req)
  1342. if err != nil {
  1343. errChan <- fmt.Errorf("recording: error starting recording: %w", err)
  1344. return
  1345. }
  1346. if resp.StatusCode != 200 {
  1347. errChan <- fmt.Errorf("recording: unexpected status: %v", resp.Status)
  1348. return
  1349. }
  1350. errChan <- nil
  1351. }()
  1352. select {
  1353. case <-got100:
  1354. case err := <-errChan:
  1355. // If we get an error before we get the 100-continue response,
  1356. // we need to try another recorder.
  1357. if err == nil {
  1358. // If the error is nil, we got a 200 response, which
  1359. // is unexpected as we haven't sent any data yet.
  1360. err = errors.New("recording: unexpected EOF")
  1361. }
  1362. errs = append(errs, err)
  1363. continue
  1364. }
  1365. return pw, errChan, nil
  1366. }
  1367. return nil, nil, multierr.New(errs...)
  1368. }
  1369. // startNewRecording starts a new SSH session recording.
  1370. // It may return a nil recording if recording is not available.
  1371. func (ss *sshSession) startNewRecording() (_ *recording, err error) {
  1372. recorders, onFailure := ss.recorders()
  1373. if len(recorders) == 0 {
  1374. return nil, errors.New("no recorders configured")
  1375. }
  1376. var w ssh.Window
  1377. if ptyReq, _, isPtyReq := ss.Pty(); isPtyReq {
  1378. w = ptyReq.Window
  1379. }
  1380. term := envValFromList(ss.Environ(), "TERM")
  1381. if term == "" {
  1382. term = "xterm-256color" // something non-empty
  1383. }
  1384. now := time.Now()
  1385. rec := &recording{
  1386. ss: ss,
  1387. start: now,
  1388. failOpen: onFailure == nil || onFailure.TerminateSessionWithMessage == "",
  1389. }
  1390. // We want to use a background context for uploading and not ss.ctx.
  1391. // ss.ctx is closed when the session closes, but we don't want to break the upload at that time.
  1392. // Instead we want to wait for the session to close the writer when it finishes.
  1393. ctx := context.Background()
  1394. wc, errChan, err := ss.connectToRecorder(ctx, recorders)
  1395. if err != nil {
  1396. // TODO(catzkorn): notify control here.
  1397. if onFailure != nil && onFailure.RejectSessionWithMessage != "" {
  1398. ss.logf("recording: error starting recording (rejecting session): %v", err)
  1399. return nil, userVisibleError{
  1400. error: err,
  1401. msg: onFailure.RejectSessionWithMessage,
  1402. }
  1403. }
  1404. ss.logf("recording: error starting recording (failing open): %v", err)
  1405. return nil, nil
  1406. }
  1407. go func() {
  1408. err := <-errChan
  1409. if err == nil {
  1410. // Success.
  1411. return
  1412. }
  1413. // TODO(catzkorn): notify control here.
  1414. if onFailure != nil && onFailure.TerminateSessionWithMessage != "" {
  1415. ss.logf("recording: error uploading recording (closing session): %v", err)
  1416. ss.cancelCtx(userVisibleError{
  1417. error: err,
  1418. msg: onFailure.TerminateSessionWithMessage,
  1419. })
  1420. return
  1421. }
  1422. ss.logf("recording: error uploading recording (failing open): %v", err)
  1423. }()
  1424. rec.out = wc
  1425. ch := CastHeader{
  1426. Version: 2,
  1427. Width: w.Width,
  1428. Height: w.Height,
  1429. Timestamp: now.Unix(),
  1430. Command: strings.Join(ss.Command(), " "),
  1431. Env: map[string]string{
  1432. "TERM": term,
  1433. // TODO(bradfitz): anything else important?
  1434. // including all seems noisey, but maybe we should
  1435. // for auditing. But first need to break
  1436. // launchProcess's startWithStdPipes and
  1437. // startWithPTY up so that they first return the cmd
  1438. // without starting it, and then a step that starts
  1439. // it. Then we can (1) make the cmd, (2) start the
  1440. // recording, (3) start the process.
  1441. },
  1442. SSHUser: ss.conn.info.sshUser,
  1443. LocalUser: ss.conn.localUser.Username,
  1444. SrcNode: strings.TrimSuffix(ss.conn.info.node.Name, "."),
  1445. SrcNodeID: ss.conn.info.node.StableID,
  1446. }
  1447. if !ss.conn.info.node.IsTagged() {
  1448. ch.SrcNodeUser = ss.conn.info.uprof.LoginName
  1449. ch.SrcNodeUserID = ss.conn.info.node.User
  1450. } else {
  1451. ch.SrcNodeTags = ss.conn.info.node.Tags
  1452. }
  1453. j, err := json.Marshal(ch)
  1454. if err != nil {
  1455. return nil, err
  1456. }
  1457. j = append(j, '\n')
  1458. if _, err := rec.out.Write(j); err != nil {
  1459. if errors.Is(err, io.ErrClosedPipe) && ss.ctx.Err() != nil {
  1460. // If we got an io.ErrClosedPipe, it's likely because
  1461. // the recording server closed the connection on us. Return
  1462. // the original context error instead.
  1463. return nil, context.Cause(ss.ctx)
  1464. }
  1465. return nil, err
  1466. }
  1467. return rec, nil
  1468. }
  1469. // recording is the state for an SSH session recording.
  1470. type recording struct {
  1471. ss *sshSession
  1472. start time.Time
  1473. // failOpen specifies whether the session should be allowed to
  1474. // continue if writing to the recording fails.
  1475. failOpen bool
  1476. mu sync.Mutex // guards writes to, close of out
  1477. out io.WriteCloser
  1478. }
  1479. func (r *recording) Close() error {
  1480. r.mu.Lock()
  1481. defer r.mu.Unlock()
  1482. if r.out == nil {
  1483. return nil
  1484. }
  1485. err := r.out.Close()
  1486. r.out = nil
  1487. return err
  1488. }
  1489. // writer returns an io.Writer around w that first records the write.
  1490. //
  1491. // The dir should be "i" for input or "o" for output.
  1492. //
  1493. // If r is nil, it returns w unchanged.
  1494. //
  1495. // Currently (2023-03-21) we only record output, not input.
  1496. func (r *recording) writer(dir string, w io.Writer) io.Writer {
  1497. if r == nil {
  1498. return w
  1499. }
  1500. if dir == "i" {
  1501. // TODO: record input? Maybe not, since it might contain
  1502. // passwords.
  1503. return w
  1504. }
  1505. return &loggingWriter{r: r, dir: dir, w: w}
  1506. }
  1507. // loggingWriter is an io.Writer wrapper that writes first an
  1508. // asciinema JSON cast format recording line, and then writes to w.
  1509. type loggingWriter struct {
  1510. r *recording
  1511. dir string // "i" or "o" (input or output)
  1512. w io.Writer // underlying Writer, after writing to r.out
  1513. // recordingFailedOpen specifies whether we've failed to write to
  1514. // r.out and should stop trying. It is set to true if we fail to write
  1515. // to r.out and r.failOpen is set.
  1516. recordingFailedOpen bool
  1517. }
  1518. func (w *loggingWriter) Write(p []byte) (n int, err error) {
  1519. if !w.recordingFailedOpen {
  1520. j, err := json.Marshal([]any{
  1521. time.Since(w.r.start).Seconds(),
  1522. w.dir,
  1523. string(p),
  1524. })
  1525. if err != nil {
  1526. return 0, err
  1527. }
  1528. j = append(j, '\n')
  1529. if err := w.writeCastLine(j); err != nil {
  1530. if !w.r.failOpen {
  1531. return 0, err
  1532. }
  1533. w.recordingFailedOpen = true
  1534. }
  1535. }
  1536. return w.w.Write(p)
  1537. }
  1538. func (w loggingWriter) writeCastLine(j []byte) error {
  1539. w.r.mu.Lock()
  1540. defer w.r.mu.Unlock()
  1541. if w.r.out == nil {
  1542. return errors.New("logger closed")
  1543. }
  1544. _, err := w.r.out.Write(j)
  1545. if err != nil {
  1546. return fmt.Errorf("logger Write: %w", err)
  1547. }
  1548. return nil
  1549. }
  1550. func envValFromList(env []string, wantKey string) (v string) {
  1551. for _, kv := range env {
  1552. if thisKey, v, ok := strings.Cut(kv, "="); ok && envEq(thisKey, wantKey) {
  1553. return v
  1554. }
  1555. }
  1556. return ""
  1557. }
  1558. // envEq reports whether environment variable a == b for the current
  1559. // operating system.
  1560. func envEq(a, b string) bool {
  1561. if runtime.GOOS == "windows" {
  1562. return strings.EqualFold(a, b)
  1563. }
  1564. return a == b
  1565. }
  1566. var (
  1567. metricActiveSessions = clientmetric.NewGauge("ssh_active_sessions")
  1568. metricIncomingConnections = clientmetric.NewCounter("ssh_incoming_connections")
  1569. metricPublicKeyConnections = clientmetric.NewCounter("ssh_publickey_connections") // total
  1570. metricPublicKeyAccepts = clientmetric.NewCounter("ssh_publickey_accepts") // accepted subset of ssh_publickey_connections
  1571. metricTerminalAccept = clientmetric.NewCounter("ssh_terminalaction_accept")
  1572. metricTerminalReject = clientmetric.NewCounter("ssh_terminalaction_reject")
  1573. metricTerminalInterrupt = clientmetric.NewCounter("ssh_terminalaction_interrupt")
  1574. metricTerminalMalformed = clientmetric.NewCounter("ssh_terminalaction_malformed")
  1575. metricTerminalFetchError = clientmetric.NewCounter("ssh_terminalaction_fetch_error")
  1576. metricHolds = clientmetric.NewCounter("ssh_holds")
  1577. metricPolicyChangeKick = clientmetric.NewCounter("ssh_policy_change_kick")
  1578. metricSFTP = clientmetric.NewCounter("ssh_sftp_requests")
  1579. metricLocalPortForward = clientmetric.NewCounter("ssh_local_port_forward_requests")
  1580. )
  1581. // userVisibleError is a wrapper around an error that implements
  1582. // SSHTerminationError, so msg is written to their session.
  1583. type userVisibleError struct {
  1584. msg string
  1585. error
  1586. }
  1587. func (ue userVisibleError) SSHTerminationMessage() string { return ue.msg }
  1588. // SSHTerminationError is implemented by errors that terminate an SSH
  1589. // session and should be written to user's sessions.
  1590. type SSHTerminationError interface {
  1591. error
  1592. SSHTerminationMessage() string
  1593. }