Browse Source

ssh/tailssh: add a new sshSession type to clean up existing+future code

Updates #3802

Change-Id: I7054dca387f5e5aee1185937ecf41b77a5a07f1a
Signed-off-by: Brad Fitzpatrick <[email protected]>
Co-authored-by: Maisem Ali <[email protected]>
Brad Fitzpatrick 4 years ago
parent
commit
6e86bbcb06
3 changed files with 188 additions and 89 deletions
  1. 60 28
      ssh/tailssh/incubator.go
  2. 126 57
      ssh/tailssh/tailssh.go
  3. 2 4
      ssh/tailssh/tailssh_test.go

+ 60 - 28
ssh/tailssh/incubator.go

@@ -14,6 +14,7 @@ package tailssh
 
 import (
 	"context"
+	"errors"
 	"flag"
 	"fmt"
 	"io"
@@ -54,11 +55,15 @@ var maybeStartLoginSession = func(logf logger.Logf, uid uint32, localUser, remot
 
 // newIncubatorCommand returns a new exec.Cmd configured with
 // `tailscaled be-child ssh` as the entrypoint.
-// If tailscaled is empty, the desired cmd is executed directly.
-func newIncubatorCommand(ctx context.Context, ci *sshConnInfo, lu *user.User, tailscaled, name string, args []string) *exec.Cmd {
-	if tailscaled == "" {
+//
+// If ss.srv.tailscaledPath is empty, this method is equivalent to
+// exec.CommandContext.
+func (ss *sshSession) newIncubatorCommand(ctx context.Context, name string, args []string) *exec.Cmd {
+	if ss.srv.tailscaledPath == "" {
 		return exec.CommandContext(ctx, name, args...)
 	}
+	lu := ss.localUser
+	ci := ss.connInfo
 	remoteUser := ci.uprof.LoginName
 	if len(ci.node.Tags) > 0 {
 		remoteUser = strings.Join(ci.node.Tags, ",")
@@ -80,7 +85,7 @@ func newIncubatorCommand(ctx context.Context, ci *sshConnInfo, lu *user.User, ta
 		incubatorArgs = append(incubatorArgs, args...)
 	}
 
-	return exec.CommandContext(ctx, tailscaled, incubatorArgs...)
+	return exec.CommandContext(ctx, ss.srv.tailscaledPath, incubatorArgs...)
 }
 
 const debugIncubator = false
@@ -158,37 +163,44 @@ func beIncubator(args []string) error {
 // launchProcess launches an incubator process for the provided session.
 // It is responsible for configuring the process execution environment.
 // The caller can wait for the process to exit by calling cmd.Wait().
-func (srv *server) launchProcess(ctx context.Context, s ssh.Session, ci *sshConnInfo, lu *user.User) (cmd *exec.Cmd, stdin io.WriteCloser, stdout, stderr io.Reader, err error) {
-	shell := loginShell(lu.Uid)
+//
+// It sets ss.cmd, stdin, stdout, and stderr.
+func (ss *sshSession) launchProcess(ctx context.Context) error {
+	shell := loginShell(ss.localUser.Uid)
 	var args []string
-	if rawCmd := s.RawCommand(); rawCmd != "" {
+	if rawCmd := ss.RawCommand(); rawCmd != "" {
 		args = append(args, "-c", rawCmd)
 	} else {
 		args = append(args, "-l") // login shell
 	}
-	ptyReq, winCh, isPty := s.Pty()
 
-	cmd = newIncubatorCommand(ctx, ci, lu, srv.tailscaledPath, shell, args)
-	cmd.Dir = lu.HomeDir
-	cmd.Env = append(cmd.Env, envForUser(lu)...)
-	cmd.Env = append(cmd.Env, s.Environ()...)
+	ci := ss.connInfo
+	cmd := ss.newIncubatorCommand(ctx, shell, args)
+	cmd.Dir = ss.localUser.HomeDir
+	cmd.Env = append(cmd.Env, envForUser(ss.localUser)...)
+	cmd.Env = append(cmd.Env, ss.Environ()...)
 	cmd.Env = append(cmd.Env,
 		fmt.Sprintf("SSH_CLIENT=%s %d %d", ci.src.IP(), ci.src.Port(), ci.dst.Port()),
 		fmt.Sprintf("SSH_CONNECTION=%s %d %s %d", ci.src.IP(), ci.src.Port(), ci.dst.IP(), ci.dst.Port()),
 	)
-	srv.logf("ssh: starting: %+v", cmd.Args)
 
+	ss.cmd = cmd
+
+	ptyReq, winCh, isPty := ss.Pty()
 	if !isPty {
-		stdin, stdout, stderr, err = startWithStdPipes(cmd)
-		return
+		ss.logf("starting non-pty command: %+v", cmd.Args)
+		return ss.startWithStdPipes()
 	}
-	pty, err := srv.startWithPTY(cmd, ptyReq)
+	ss.ptyReq = &ptyReq
+	ss.logf("starting pty command: %+v", cmd.Args)
+	pty, err := ss.startWithPTY()
 	if err != nil {
-		return nil, nil, nil, nil, err
+		return err
 	}
 	go resizeWindow(pty, winCh)
-	// When using a pty we don't get a separate reader for stderr.
-	return cmd, pty, pty, nil, nil
+	ss.stdout = pty // no stderr for a pty
+	ss.stdin = pty
+	return nil
 }
 
 func resizeWindow(f *os.File, winCh <-chan ssh.Window) {
@@ -263,7 +275,16 @@ var opcodeShortName = map[uint8]string{
 }
 
 // startWithPTY starts cmd with a psuedo-terminal attached to Stdin, Stdout and Stderr.
-func (srv *server) startWithPTY(cmd *exec.Cmd, ptyReq ssh.Pty) (ptyFile *os.File, err error) {
+func (ss *sshSession) startWithPTY() (ptyFile *os.File, err error) {
+	ptyReq := ss.ptyReq
+	cmd := ss.cmd
+	if cmd == nil {
+		return nil, errors.New("nil ss.cmd")
+	}
+	if ptyReq == nil {
+		return nil, errors.New("nil ss.ptyReq")
+	}
+
 	var tty *os.File
 	ptyFile, tty, err = pty.Open()
 	if err != nil {
@@ -305,7 +326,7 @@ func (srv *server) startWithPTY(cmd *exec.Cmd, ptyReq ssh.Pty) (ptyFile *os.File
 			}
 			k, ok := opcodeShortName[c]
 			if !ok {
-				srv.logf("unknown opcode: %d", c)
+				ss.logf("unknown opcode: %d", c)
 				continue
 			}
 			if _, ok := tios.CC[k]; ok {
@@ -316,7 +337,7 @@ func (srv *server) startWithPTY(cmd *exec.Cmd, ptyReq ssh.Pty) (ptyFile *os.File
 				tios.Opts[k] = v > 0
 				continue
 			}
-			srv.logf("unsupported opcode: %v(%d)=%v", k, c, v)
+			ss.logf("unsupported opcode: %v(%d)=%v", k, c, v)
 		}
 
 		// Save PTY settings.
@@ -355,7 +376,9 @@ func (srv *server) startWithPTY(cmd *exec.Cmd, ptyReq ssh.Pty) (ptyFile *os.File
 }
 
 // startWithStdPipes starts cmd with os.Pipe for Stdin, Stdout and Stderr.
-func startWithStdPipes(cmd *exec.Cmd) (stdin io.WriteCloser, stdout, stderr io.ReadCloser, err error) {
+func (ss *sshSession) startWithStdPipes() (err error) {
+	var stdin io.WriteCloser
+	var stdout, stderr io.ReadCloser
 	defer func() {
 		if err != nil {
 			for _, c := range []io.Closer{stdin, stdout, stderr} {
@@ -365,20 +388,29 @@ func startWithStdPipes(cmd *exec.Cmd) (stdin io.WriteCloser, stdout, stderr io.R
 			}
 		}
 	}()
+	cmd := ss.cmd
+	if cmd == nil {
+		return errors.New("nil cmd")
+	}
 	stdin, err = cmd.StdinPipe()
 	if err != nil {
-		return
+		return err
 	}
 	stdout, err = cmd.StdoutPipe()
 	if err != nil {
-		return
+		return err
 	}
 	stderr, err = cmd.StderrPipe()
 	if err != nil {
-		return
+		return err
+	}
+	if err := cmd.Start(); err != nil {
+		return err
 	}
-	err = cmd.Start()
-	return
+	ss.stdin = stdin
+	ss.stdout = stdout
+	ss.stderr = stderr
+	return nil
 }
 
 func loginShell(uid string) string {

+ 126 - 57
ssh/tailssh/tailssh.go

@@ -10,6 +10,7 @@ package tailssh
 
 import (
 	"context"
+	"crypto/rand"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -95,8 +96,9 @@ type server struct {
 	tailscaledPath string
 
 	// mu protects activeSessions.
-	mu             sync.Mutex
-	activeSessions map[string]bool
+	mu                      sync.Mutex
+	activeSessionByH        map[string]*sshSession // ssh.SessionID (DH H) => that session
+	activeSessionBySharedID map[string]*sshSession // yyymmddThhmmss-XXXXX => session
 }
 
 var debugPolicyFile = envknob.String("TS_DEBUG_SSH_POLICY_FILE")
@@ -244,19 +246,48 @@ ProcessAction:
 		return
 	}
 
-	var ctx context.Context = context.Background()
-	if action.SesssionDuration != 0 {
-		sctx := newSSHContext()
-		ctx = sctx
-		t := time.AfterFunc(action.SesssionDuration, func() {
-			sctx.CloseWithError(userVisibleError{
-				fmt.Sprintf("Session timeout of %v elapsed.", action.SesssionDuration),
-				context.DeadlineExceeded,
-			})
-		})
-		defer t.Stop()
+	ss := srv.newSSHSession(s, ci, lu, action)
+	ss.run()
+}
+
+// sshSession is an accepted Tailscale SSH session.
+type sshSession struct {
+	ssh.Session
+	idH      string // the RFC4253 sec8 hash H; don't share outside process
+	sharedID string // ID that's shared with control
+	logf     logger.Logf
+
+	ctx       *sshContext // implements context.Context
+	srv       *server
+	connInfo  *sshConnInfo
+	action    *tailcfg.SSHAction
+	localUser *user.User
+
+	// initialized by launchProcess:
+	cmd    *exec.Cmd
+	stdin  io.WriteCloser
+	stdout io.Reader
+	stderr io.Reader // nil for pty sessions
+	ptyReq *ssh.Pty  // non-nil for pty sessions
+
+	// We use this sync.Once to ensure that we only terminate the process once,
+	// either it exits itself or is terminated
+	exitOnce sync.Once
+}
+
+func (srv *server) newSSHSession(s ssh.Session, ci *sshConnInfo, lu *user.User, action *tailcfg.SSHAction) *sshSession {
+	sharedID := fmt.Sprintf("%s-%02x", ci.now.UTC().Format("20060102T150405"), randBytes(5))
+	return &sshSession{
+		Session:   s,
+		idH:       s.Context().(ssh.Context).SessionID(),
+		sharedID:  sharedID,
+		ctx:       newSSHContext(),
+		srv:       srv,
+		action:    action,
+		localUser: lu,
+		connInfo:  ci,
+		logf:      logger.WithPrefix(srv.logf, "ssh-session("+sharedID+"): "),
 	}
-	srv.handleAcceptedSSH(ctx, s, ci, lu)
 }
 
 func (srv *server) fetchSSHAction(ctx context.Context, url string) (*tailcfg.SSHAction, error) {
@@ -290,20 +321,22 @@ func (srv *server) fetchSSHAction(ctx context.Context, url string) (*tailcfg.SSH
 	}
 }
 
-func (srv *server) handleSessionTermination(ctx context.Context, s ssh.Session, ci *sshConnInfo, cmd *exec.Cmd, exitOnce *sync.Once) {
-	<-ctx.Done()
+// killProcessOnContextDone waits for ss.ctx to be done and kills the process,
+// unless the process has already exited.
+func (ss *sshSession) killProcessOnContextDone() {
+	<-ss.ctx.Done()
 	// Either the process has already existed, in which case this does nothing.
 	// Or, the process is still running in which case this will kill it.
-	exitOnce.Do(func() {
-		err := ctx.Err()
+	ss.exitOnce.Do(func() {
+		err := ss.ctx.Err()
 		if serr, ok := err.(SSHTerminationError); ok {
 			msg := serr.SSHTerminationMessage()
 			if msg != "" {
-				io.WriteString(s.Stderr(), "\r\n\r\n"+msg+"\r\n\r\n")
+				io.WriteString(ss.Stderr(), "\r\n\r\n"+msg+"\r\n\r\n")
 			}
 		}
-		srv.logf("terminating SSH session from %v: %v", ci.src.IP(), err)
-		cmd.Process.Kill()
+		ss.logf("terminating SSH session from %v: %v", ss.connInfo.src.IP(), err)
+		ss.cmd.Process.Kill()
 	})
 }
 
@@ -312,110 +345,138 @@ func (srv *server) handleSessionTermination(ctx context.Context, s ssh.Session,
 func (srv *server) isActiveSession(sctx ssh.Context) bool {
 	srv.mu.Lock()
 	defer srv.mu.Unlock()
-	return srv.activeSessions[sctx.SessionID()]
+	_, ok := srv.activeSessionByH[sctx.SessionID()]
+	return ok
 }
 
-// startSession registers s as an active session.
-func (srv *server) startSession(s ssh.Session) {
+// startSession registers ss as an active session.
+func (srv *server) startSession(ss *sshSession) {
 	srv.mu.Lock()
 	defer srv.mu.Unlock()
-	if srv.activeSessions == nil {
-		srv.activeSessions = make(map[string]bool)
+	if srv.activeSessionByH == nil {
+		srv.activeSessionByH = make(map[string]*sshSession)
+	}
+	if srv.activeSessionBySharedID == nil {
+		srv.activeSessionBySharedID = make(map[string]*sshSession)
+	}
+	if ss.idH == "" {
+		panic("empty idH")
 	}
-	srv.activeSessions[s.Context().(ssh.Context).SessionID()] = true
+	if _, dup := srv.activeSessionByH[ss.idH]; dup {
+		panic("dup idH")
+	}
+	if ss.sharedID == "" {
+		panic("empty sharedID")
+	}
+	if _, dup := srv.activeSessionBySharedID[ss.sharedID]; dup {
+		panic("dup sharedID")
+	}
+	srv.activeSessionByH[ss.idH] = ss
+	srv.activeSessionBySharedID[ss.sharedID] = ss
 }
 
 // endSession unregisters s from the list of active sessions.
-func (srv *server) endSession(s ssh.Session) {
+func (srv *server) endSession(ss *sshSession) {
 	srv.mu.Lock()
 	defer srv.mu.Unlock()
-	delete(srv.activeSessions, s.Context().(ssh.Context).SessionID())
+	delete(srv.activeSessionByH, ss.idH)
+	delete(srv.activeSessionBySharedID, ss.sharedID)
 }
 
-// handleAcceptedSSH handles s once it's been accepted and determined
-// that it should run as local system user lu.
+var errSessionDone = errors.New("session is done")
+
+// run is the entrypoint for a newly accepted SSH session.
 //
 // When ctx is done, the session is forcefully terminated. If its Err
 // is an SSHTerminationError, its SSHTerminationMessage is sent to the
 // user.
-func (srv *server) handleAcceptedSSH(ctx context.Context, s ssh.Session, ci *sshConnInfo, lu *user.User) {
-	srv.startSession(s)
-	defer srv.endSession(s)
+func (ss *sshSession) run() {
+	srv := ss.srv
+	srv.startSession(ss)
+	defer srv.endSession(ss)
+
+	defer ss.ctx.CloseWithError(errSessionDone)
+
+	if ss.action.SesssionDuration != 0 {
+		t := time.AfterFunc(ss.action.SesssionDuration, func() {
+			ss.ctx.CloseWithError(userVisibleError{
+				fmt.Sprintf("Session timeout of %v elapsed.", ss.action.SesssionDuration),
+				context.DeadlineExceeded,
+			})
+		})
+		defer t.Stop()
+	}
+
 	logf := srv.logf
+	lu := ss.localUser
 	localUser := lu.Username
 
 	if euid := os.Geteuid(); euid != 0 {
 		if lu.Uid != fmt.Sprint(euid) {
 			logf("ssh: can't switch to user %q from process euid %v", localUser, euid)
-			fmt.Fprintf(s, "can't switch user\n")
-			s.Exit(1)
+			fmt.Fprintf(ss, "can't switch user\n")
+			ss.Exit(1)
 			return
 		}
 	}
 
 	// Take control of the PTY so that we can configure it below.
 	// See https://github.com/tailscale/tailscale/issues/4146
-	s.DisablePTYEmulation()
+	ss.DisablePTYEmulation()
 
-	cmd, stdin, stdout, stderr, err := srv.launchProcess(ctx, s, ci, lu)
+	err := ss.launchProcess(ss.ctx)
 	if err != nil {
 		logf("start failed: %v", err.Error())
-		s.Exit(1)
+		ss.Exit(1)
 		return
 	}
-	// We use this sync.Once to ensure that we only terminate the process once,
-	// either it exits itself or is terminated
-	var exitOnce sync.Once
-	if ctx.Done() != nil {
-		ctx, cancel := context.WithCancel(ctx)
-		defer cancel()
-		go srv.handleSessionTermination(ctx, s, ci, cmd, &exitOnce)
-	}
+	go ss.killProcessOnContextDone()
+
 	go func() {
-		_, err := io.Copy(stdin, s)
+		_, err := io.Copy(ss.stdin, ss)
 		if err != nil {
 			// TODO: don't log in the success case.
 			logf("ssh: stdin copy: %v", err)
 		}
-		stdin.Close()
+		ss.stdin.Close()
 	}()
 	go func() {
-		_, err := io.Copy(s, stdout)
+		_, err := io.Copy(ss, ss.stdout)
 		if err != nil {
 			// TODO: don't log in the success case.
 			logf("ssh: stdout copy: %v", err)
 		}
 	}()
 	// stderr is nil for ptys.
-	if stderr != nil {
+	if ss.stderr != nil {
 		go func() {
-			_, err := io.Copy(s.Stderr(), stderr)
+			_, err := io.Copy(ss.Stderr(), ss.stderr)
 			if err != nil {
 				// TODO: don't log in the success case.
 				logf("ssh: stderr copy: %v", err)
 			}
 		}()
 	}
-	err = cmd.Wait()
+	err = ss.cmd.Wait()
 	// This will either make the SSH Termination goroutine be a no-op,
 	// or itself will be a no-op because the process was killed by the
 	// aforementioned goroutine.
-	exitOnce.Do(func() {})
+	ss.exitOnce.Do(func() {})
 
 	if err == nil {
 		logf("ssh: Wait: ok")
-		s.Exit(0)
+		ss.Exit(0)
 		return
 	}
 	if ee, ok := err.(*exec.ExitError); ok {
 		code := ee.ProcessState.ExitCode()
 		logf("ssh: Wait: code=%v", code)
-		s.Exit(code)
+		ss.Exit(code)
 		return
 	}
 
 	logf("ssh: Wait: %v", err)
-	s.Exit(1)
+	ss.Exit(1)
 	return
 }
 
@@ -509,3 +570,11 @@ func matchesPrincipal(ps []*tailcfg.SSHPrincipal, ci *sshConnInfo) bool {
 	}
 	return false
 }
+
+func randBytes(n int) []byte {
+	b := make([]byte, n)
+	if _, err := rand.Read(b); err != nil {
+		panic(err)
+	}
+	return b
+}

+ 2 - 4
ssh/tailssh/tailssh_test.go

@@ -9,7 +9,6 @@ package tailssh
 
 import (
 	"bytes"
-	"context"
 	"errors"
 	"fmt"
 	"net"
@@ -211,10 +210,9 @@ func TestSSH(t *testing.T) {
 		uprof:   &tailcfg.UserProfile{},
 	}
 
-	ctx, cancel := context.WithCancel(context.Background())
-	defer cancel()
 	ss.Handler = func(s ssh.Session) {
-		srv.handleAcceptedSSH(ctx, s, ci, u)
+		ss := srv.newSSHSession(s, ci, u, &tailcfg.SSHAction{Accept: true})
+		ss.run()
 	}
 
 	ln, err := net.Listen("tcp4", "127.0.0.1:0")