Browse Source

all: implement lock revoke-keys command

The revoke-keys command allows nodes with tailnet lock keys
to collaborate to erase the use of a compromised key, and remove trust
in it.

Signed-off-by: Tom DNetto <[email protected]>
Updates ENG-1848
Tom DNetto 2 years ago
parent
commit
767e839db5

+ 36 - 0
client/tailscale/localclient.go

@@ -961,6 +961,42 @@ func (lc *LocalClient) NetworkLockVerifySigningDeeplink(ctx context.Context, url
 	return decodeJSON[*tka.DeeplinkValidationResult](body)
 }
 
+// NetworkLockGenRecoveryAUM generates an AUM for recovering from a tailnet-lock key compromise.
+func (lc *LocalClient) NetworkLockGenRecoveryAUM(ctx context.Context, removeKeys []tkatype.KeyID, forkFrom tka.AUMHash) ([]byte, error) {
+	vr := struct {
+		Keys     []tkatype.KeyID
+		ForkFrom string
+	}{removeKeys, forkFrom.String()}
+
+	body, err := lc.send(ctx, "POST", "/localapi/v0/tka/generate-recovery-aum", 200, jsonBody(vr))
+	if err != nil {
+		return nil, fmt.Errorf("sending generate-recovery-aum: %w", err)
+	}
+
+	return body, nil
+}
+
+// NetworkLockCosignRecoveryAUM co-signs a recovery AUM using the node's tailnet lock key.
+func (lc *LocalClient) NetworkLockCosignRecoveryAUM(ctx context.Context, aum tka.AUM) ([]byte, error) {
+	r := bytes.NewReader(aum.Serialize())
+	body, err := lc.send(ctx, "POST", "/localapi/v0/tka/cosign-recovery-aum", 200, r)
+	if err != nil {
+		return nil, fmt.Errorf("sending cosign-recovery-aum: %w", err)
+	}
+
+	return body, nil
+}
+
+// NetworkLockSubmitRecoveryAUM submits a recovery AUM to the control plane.
+func (lc *LocalClient) NetworkLockSubmitRecoveryAUM(ctx context.Context, aum tka.AUM) error {
+	r := bytes.NewReader(aum.Serialize())
+	_, err := lc.send(ctx, "POST", "/localapi/v0/tka/submit-recovery-aum", 200, r)
+	if err != nil {
+		return fmt.Errorf("sending cosign-recovery-aum: %w", err)
+	}
+	return nil
+}
+
 // SetServeConfig sets or replaces the serving settings.
 // If config is nil, settings are cleared and serving is disabled.
 func (lc *LocalClient) SetServeConfig(ctx context.Context, config *ipn.ServeConfig) error {

+ 113 - 0
cmd/tailscale/cli/network-lock.go

@@ -23,6 +23,7 @@ import (
 	"tailscale.com/ipn/ipnstate"
 	"tailscale.com/tka"
 	"tailscale.com/types/key"
+	"tailscale.com/types/tkatype"
 )
 
 var netlockCmd = &ffcli.Command{
@@ -40,6 +41,7 @@ var netlockCmd = &ffcli.Command{
 		nlDisablementKDFCmd,
 		nlLogCmd,
 		nlLocalDisableCmd,
+		nlRevokeKeysCmd,
 	},
 	Exec: runNetworkLockNoSubcommand,
 }
@@ -711,3 +713,114 @@ func wrapAuthKey(ctx context.Context, keyStr string, status *ipnstate.Status) er
 	fmt.Println(wrapped)
 	return nil
 }
+
+var nlRevokeKeysArgs struct {
+	cosign   bool
+	finish   bool
+	forkFrom string
+}
+
+var nlRevokeKeysCmd = &ffcli.Command{
+	Name:       "revoke-keys",
+	ShortUsage: "revoke-keys <tailnet-lock-key>...\n  revoke-keys [--cosign] [--finish] <recovery-blob>",
+	ShortHelp:  "Revoke compromised tailnet-lock keys",
+	LongHelp: `Retroactively revoke the specified tailnet lock keys (tlpub:abc).
+
+Revoked keys are prevented from being used in the future. Any nodes previously signed
+by revoked keys lose their authorization and must be signed again.
+
+Revocation is a multi-step process that requires several signing nodes to ` + "`--cosign`" + ` the revocation. Use ` + "`tailscale lock remove`" + ` instead if the key has not been compromised.
+
+1. To start, run ` + "`tailscale revoke-keys <tlpub-keys>`" + ` with the tailnet lock keys to revoke.
+2. Re-run the ` + "`--cosign`" + ` command output by ` + "`revoke-keys`" + ` on other signing nodes. Use the
+   most recent command output on the next signing node in sequence.
+3. Once the number of ` + "`--cosign`" + `s is greater than the number of keys being revoked,
+   run the command one final time with ` + "`--finish`" + ` instead of ` + "`--cosign`" + `.`,
+	Exec: runNetworkLockRevokeKeys,
+	FlagSet: (func() *flag.FlagSet {
+		fs := newFlagSet("lock revoke-keys")
+		fs.BoolVar(&nlRevokeKeysArgs.cosign, "cosign", false, "continue generating the recovery using the tailnet lock key on this device and the provided recovery blob")
+		fs.BoolVar(&nlRevokeKeysArgs.finish, "finish", false, "finish the recovery process by transmitting the revocation")
+		fs.StringVar(&nlRevokeKeysArgs.forkFrom, "fork-from", "", "parent AUM hash to rewrite from (advanced users only)")
+		return fs
+	})(),
+}
+
+func runNetworkLockRevokeKeys(ctx context.Context, args []string) error {
+	// First step in the process
+	if !nlRevokeKeysArgs.cosign && !nlRevokeKeysArgs.finish {
+		removeKeys, _, err := parseNLArgs(args, true, false)
+		if err != nil {
+			return err
+		}
+
+		keyIDs := make([]tkatype.KeyID, len(removeKeys))
+		for i, k := range removeKeys {
+			keyIDs[i], err = k.ID()
+			if err != nil {
+				return fmt.Errorf("generating keyID: %v", err)
+			}
+		}
+
+		var forkFrom tka.AUMHash
+		if nlRevokeKeysArgs.forkFrom != "" {
+			if len(nlRevokeKeysArgs.forkFrom) == (len(forkFrom) * 2) {
+				// Hex-encoded: like the output of the lock log command.
+				b, err := hex.DecodeString(nlRevokeKeysArgs.forkFrom)
+				if err != nil {
+					return fmt.Errorf("invalid fork-from hash: %v", err)
+				}
+				copy(forkFrom[:], b)
+			} else {
+				if err := forkFrom.UnmarshalText([]byte(nlRevokeKeysArgs.forkFrom)); err != nil {
+					return fmt.Errorf("invalid fork-from hash: %v", err)
+				}
+			}
+		}
+
+		aumBytes, err := localClient.NetworkLockGenRecoveryAUM(ctx, keyIDs, forkFrom)
+		if err != nil {
+			return fmt.Errorf("generation of recovery AUM failed: %w", err)
+		}
+
+		fmt.Printf(`Run the following command on another machine with a trusted tailnet lock key:
+	%s lock recover-compromised-key --cosign %X
+`, os.Args[0], aumBytes)
+		return nil
+	}
+
+	// If we got this far, we need to co-sign the AUM and/or transmit it for distribution.
+	b, err := hex.DecodeString(args[0])
+	if err != nil {
+		return fmt.Errorf("parsing hex: %v", err)
+	}
+	var recoveryAUM tka.AUM
+	if err := recoveryAUM.Unserialize(b); err != nil {
+		return fmt.Errorf("decoding recovery AUM: %v", err)
+	}
+
+	if nlRevokeKeysArgs.cosign {
+		aumBytes, err := localClient.NetworkLockCosignRecoveryAUM(ctx, recoveryAUM)
+		if err != nil {
+			return fmt.Errorf("co-signing recovery AUM failed: %w", err)
+		}
+
+		fmt.Printf(`Co-signing completed successfully.
+
+To accumulate an additional signature, run the following command on another machine with a trusted tailnet lock key:
+	%s lock recover-compromised-key --cosign %X
+
+Alternatively if you are done with co-signing, complete recovery by running the following command:
+	%s lock recover-compromised-key --finish %X
+`, os.Args[0], aumBytes, os.Args[0], aumBytes)
+	}
+
+	if nlRevokeKeysArgs.finish {
+		if err := localClient.NetworkLockSubmitRecoveryAUM(ctx, recoveryAUM); err != nil {
+			return fmt.Errorf("submitting recovery AUM failed: %w", err)
+		}
+		fmt.Println("Recovery completed.")
+	}
+
+	return nil
+}

+ 87 - 0
ipn/ipnlocal/network-lock.go

@@ -845,6 +845,93 @@ func (b *LocalBackend) NetworkLockAffectedSigs(keyID tkatype.KeyID) ([]tkatype.M
 	return resp.Signatures, nil
 }
 
+// NetworkLockGenerateRecoveryAUM generates an AUM which retroactively removes trust in the
+// specified keys. This AUM is signed by the current node and returned.
+//
+// If forkFrom is specified, it is used as the parent AUM to fork from. If the zero value,
+// the parent AUM is determined automatically.
+func (b *LocalBackend) NetworkLockGenerateRecoveryAUM(removeKeys []tkatype.KeyID, forkFrom tka.AUMHash) (*tka.AUM, error) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	if b.tka == nil {
+		return nil, errNetworkLockNotActive
+	}
+	var nlPriv key.NLPrivate
+	if p := b.pm.CurrentPrefs(); p.Valid() && p.Persist().Valid() {
+		nlPriv = p.Persist().NetworkLockKey()
+	}
+	if nlPriv.IsZero() {
+		return nil, errMissingNetmap
+	}
+
+	aum, err := b.tka.authority.MakeRetroactiveRevocation(b.tka.storage, removeKeys, nlPriv.KeyID(), forkFrom)
+	if err != nil {
+		return nil, err
+	}
+
+	// Sign it ourselves.
+	aum.Signatures, err = nlPriv.SignAUM(aum.SigHash())
+	if err != nil {
+		return nil, fmt.Errorf("signing failed: %w", err)
+	}
+
+	return aum, nil
+}
+
+// NetworkLockCosignRecoveryAUM co-signs the provided recovery AUM and returns
+// the updated structure.
+//
+// The recovery AUM provided should be the output from a previous call to
+// NetworkLockGenerateRecoveryAUM or NetworkLockCosignRecoveryAUM.
+func (b *LocalBackend) NetworkLockCosignRecoveryAUM(aum *tka.AUM) (*tka.AUM, error) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	if b.tka == nil {
+		return nil, errNetworkLockNotActive
+	}
+	var nlPriv key.NLPrivate
+	if p := b.pm.CurrentPrefs(); p.Valid() && p.Persist().Valid() {
+		nlPriv = p.Persist().NetworkLockKey()
+	}
+	if nlPriv.IsZero() {
+		return nil, errMissingNetmap
+	}
+	for _, sig := range aum.Signatures {
+		if bytes.Equal(sig.KeyID, nlPriv.KeyID()) {
+			return nil, errors.New("this node has already signed this recovery AUM")
+		}
+	}
+
+	// Sign it ourselves.
+	sigs, err := nlPriv.SignAUM(aum.SigHash())
+	if err != nil {
+		return nil, fmt.Errorf("signing failed: %w", err)
+	}
+	aum.Signatures = append(aum.Signatures, sigs...)
+
+	return aum, nil
+}
+
+func (b *LocalBackend) NetworkLockSubmitRecoveryAUM(aum *tka.AUM) error {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	if b.tka == nil {
+		return errNetworkLockNotActive
+	}
+	var ourNodeKey key.NodePublic
+	if p := b.pm.CurrentPrefs(); p.Valid() && p.Persist().Valid() && !p.Persist().PrivateNodeKey().IsZero() {
+		ourNodeKey = p.Persist().PublicNodeKey()
+	}
+	if ourNodeKey.IsZero() {
+		return errors.New("no node-key: is tailscale logged in?")
+	}
+
+	b.mu.Unlock()
+	_, err := b.tkaDoSyncSend(ourNodeKey, aum.Hash(), []tka.AUM{*aum}, false)
+	b.mu.Lock()
+	return err
+}
+
 var tkaSuffixEncoder = base64.RawStdEncoding
 
 // NetworkLockWrapPreauthKey wraps a pre-auth key with information to

+ 126 - 0
ipn/ipnlocal/network-lock_test.go

@@ -994,3 +994,129 @@ func TestTKAAffectedSigs(t *testing.T) {
 		})
 	}
 }
+
+func TestTKARecoverCompromisedKeyFlow(t *testing.T) {
+	nodePriv := key.NewNode()
+	nlPriv := key.NewNLPrivate()
+	cosignPriv := key.NewNLPrivate()
+	compromisedPriv := key.NewNLPrivate()
+
+	pm := must.Get(newProfileManager(new(mem.Store), t.Logf))
+	must.Do(pm.SetPrefs((&ipn.Prefs{
+		Persist: &persist.Persist{
+			PrivateNodeKey: nodePriv,
+			NetworkLockKey: nlPriv,
+		},
+	}).View()))
+
+	// Make a fake TKA authority, to seed local state.
+	disablementSecret := bytes.Repeat([]byte{0xa5}, 32)
+	key := tka.Key{Kind: tka.Key25519, Public: nlPriv.Public().Verifier(), Votes: 2}
+	cosignKey := tka.Key{Kind: tka.Key25519, Public: cosignPriv.Public().Verifier(), Votes: 2}
+	compromisedKey := tka.Key{Kind: tka.Key25519, Public: compromisedPriv.Public().Verifier(), Votes: 1}
+
+	temp := t.TempDir()
+	tkaPath := filepath.Join(temp, "tka-profile", string(pm.CurrentProfile().ID))
+	os.Mkdir(tkaPath, 0755)
+	chonk, err := tka.ChonkDir(tkaPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	authority, _, err := tka.Create(chonk, tka.State{
+		Keys:               []tka.Key{key, compromisedKey, cosignKey},
+		DisablementSecrets: [][]byte{tka.DisablementKDF(disablementSecret)},
+	}, nlPriv)
+	if err != nil {
+		t.Fatalf("tka.Create() failed: %v", err)
+	}
+
+	ts, client := fakeNoiseServer(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		defer r.Body.Close()
+		switch r.URL.Path {
+		case "/machine/tka/sync/send":
+			body := new(tailcfg.TKASyncSendRequest)
+			if err := json.NewDecoder(r.Body).Decode(body); err != nil {
+				t.Fatal(err)
+			}
+			t.Logf("got sync send:\n%+v", body)
+
+			var remoteHead tka.AUMHash
+			if err := remoteHead.UnmarshalText([]byte(body.Head)); err != nil {
+				t.Fatalf("head unmarshal: %v", err)
+			}
+			toApply := make([]tka.AUM, len(body.MissingAUMs))
+			for i, a := range body.MissingAUMs {
+				if err := toApply[i].Unserialize(a); err != nil {
+					t.Fatalf("decoding missingAUM[%d]: %v", i, err)
+				}
+			}
+
+			// Apply the recovery AUM to an authority to make sure it works.
+			if err := authority.Inform(chonk, toApply); err != nil {
+				t.Errorf("recovery AUM could not be applied: %v", err)
+			}
+			// Make sure the key we removed isn't trusted.
+			if authority.KeyTrusted(compromisedPriv.KeyID()) {
+				t.Error("compromised key was not removed from tka")
+			}
+
+			w.WriteHeader(200)
+			if err := json.NewEncoder(w).Encode(tailcfg.TKASubmitSignatureResponse{}); err != nil {
+				t.Fatal(err)
+			}
+
+		default:
+			t.Errorf("unhandled endpoint path: %v", r.URL.Path)
+			w.WriteHeader(404)
+		}
+	}))
+	defer ts.Close()
+	cc := fakeControlClient(t, client)
+	b := LocalBackend{
+		varRoot: temp,
+		cc:      cc,
+		ccAuto:  cc,
+		logf:    t.Logf,
+		tka: &tkaState{
+			authority: authority,
+			storage:   chonk,
+		},
+		pm:    pm,
+		store: pm.Store(),
+	}
+
+	aum, err := b.NetworkLockGenerateRecoveryAUM([]tkatype.KeyID{compromisedPriv.KeyID()}, tka.AUMHash{})
+	if err != nil {
+		t.Fatalf("NetworkLockGenerateRecoveryAUM() failed: %v", err)
+	}
+
+	// Cosign using the cosigning key.
+	{
+		pm := must.Get(newProfileManager(new(mem.Store), t.Logf))
+		must.Do(pm.SetPrefs((&ipn.Prefs{
+			Persist: &persist.Persist{
+				PrivateNodeKey: nodePriv,
+				NetworkLockKey: cosignPriv,
+			},
+		}).View()))
+		b := LocalBackend{
+			varRoot: temp,
+			logf:    t.Logf,
+			tka: &tkaState{
+				authority: authority,
+				storage:   chonk,
+			},
+			pm:    pm,
+			store: pm.Store(),
+		}
+		if aum, err = b.NetworkLockCosignRecoveryAUM(aum); err != nil {
+			t.Fatalf("NetworkLockCosignRecoveryAUM() failed: %v", err)
+		}
+	}
+
+	// Finally, submit the recovery AUM. Validation is done
+	// in the fake control handler.
+	if err := b.NetworkLockSubmitRecoveryAUM(aum); err != nil {
+		t.Errorf("NetworkLockSubmitRecoveryAUM() failed: %v", err)
+	}
+}

+ 101 - 0
ipn/localapi/localapi.go

@@ -44,6 +44,7 @@ import (
 	"tailscale.com/types/logger"
 	"tailscale.com/types/logid"
 	"tailscale.com/types/ptr"
+	"tailscale.com/types/tkatype"
 	"tailscale.com/util/clientmetric"
 	"tailscale.com/util/httpm"
 	"tailscale.com/util/mak"
@@ -106,6 +107,9 @@ var handler = map[string]localAPIHandler{
 	"tka/affected-sigs":           (*Handler).serveTKAAffectedSigs,
 	"tka/wrap-preauth-key":        (*Handler).serveTKAWrapPreauthKey,
 	"tka/verify-deeplink":         (*Handler).serveTKAVerifySigningDeeplink,
+	"tka/generate-recovery-aum":   (*Handler).serveTKAGenerateRecoveryAUM,
+	"tka/cosign-recovery-aum":     (*Handler).serveTKACosignRecoveryAUM,
+	"tka/submit-recovery-aum":     (*Handler).serveTKASubmitRecoveryAUM,
 	"upload-client-metrics":       (*Handler).serveUploadClientMetrics,
 	"watch-ipn-bus":               (*Handler).serveWatchIPNBus,
 	"whois":                       (*Handler).serveWhoIs,
@@ -1747,6 +1751,103 @@ func (h *Handler) serveTKAAffectedSigs(w http.ResponseWriter, r *http.Request) {
 	w.Write(j)
 }
 
+func (h *Handler) serveTKAGenerateRecoveryAUM(w http.ResponseWriter, r *http.Request) {
+	if !h.PermitWrite {
+		http.Error(w, "access denied", http.StatusForbidden)
+		return
+	}
+	if r.Method != httpm.POST {
+		http.Error(w, "use POST", http.StatusMethodNotAllowed)
+		return
+	}
+
+	type verifyRequest struct {
+		Keys     []tkatype.KeyID
+		ForkFrom string
+	}
+	var req verifyRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		http.Error(w, "invalid JSON for verifyRequest body", http.StatusBadRequest)
+		return
+	}
+
+	var forkFrom tka.AUMHash
+	if req.ForkFrom != "" {
+		if err := forkFrom.UnmarshalText([]byte(req.ForkFrom)); err != nil {
+			http.Error(w, "decoding fork-from: "+err.Error(), http.StatusBadRequest)
+			return
+		}
+	}
+
+	res, err := h.b.NetworkLockGenerateRecoveryAUM(req.Keys, forkFrom)
+	if err != nil {
+		http.Error(w, err.Error(), 500)
+		return
+	}
+	w.Header().Set("Content-Type", "application/octet-stream")
+	w.Write(res.Serialize())
+}
+
+func (h *Handler) serveTKACosignRecoveryAUM(w http.ResponseWriter, r *http.Request) {
+	if !h.PermitWrite {
+		http.Error(w, "access denied", http.StatusForbidden)
+		return
+	}
+	if r.Method != httpm.POST {
+		http.Error(w, "use POST", http.StatusMethodNotAllowed)
+		return
+	}
+
+	body := io.LimitReader(r.Body, 1024*1024)
+	aumBytes, err := ioutil.ReadAll(body)
+	if err != nil {
+		http.Error(w, "reading AUM", http.StatusBadRequest)
+		return
+	}
+	var aum tka.AUM
+	if err := aum.Unserialize(aumBytes); err != nil {
+		http.Error(w, "decoding AUM", http.StatusBadRequest)
+		return
+	}
+
+	res, err := h.b.NetworkLockCosignRecoveryAUM(&aum)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	w.Header().Set("Content-Type", "application/octet-stream")
+	w.Write(res.Serialize())
+}
+
+func (h *Handler) serveTKASubmitRecoveryAUM(w http.ResponseWriter, r *http.Request) {
+	if !h.PermitWrite {
+		http.Error(w, "access denied", http.StatusForbidden)
+		return
+	}
+	if r.Method != httpm.POST {
+		http.Error(w, "use POST", http.StatusMethodNotAllowed)
+		return
+	}
+
+	body := io.LimitReader(r.Body, 1024*1024)
+	aumBytes, err := ioutil.ReadAll(body)
+	if err != nil {
+		http.Error(w, "reading AUM", http.StatusBadRequest)
+		return
+	}
+	var aum tka.AUM
+	if err := aum.Unserialize(aumBytes); err != nil {
+		http.Error(w, "decoding AUM", http.StatusBadRequest)
+		return
+	}
+
+	if err := h.b.NetworkLockSubmitRecoveryAUM(&aum); err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	w.WriteHeader(http.StatusOK)
+}
+
 // serveProfiles serves profile switching-related endpoints. Supported methods
 // and paths are:
 //   - GET /profiles/: list all profiles (JSON-encoded array of ipn.LoginProfiles)

+ 118 - 3
tka/tka.go

@@ -28,6 +28,9 @@ var cborDecOpts = cbor.DecOptions{
 	MaxMapPairs:      1024,
 }
 
+// Arbitrarily chosen limit on scanning AUM trees.
+const maxScanIterations = 2000
+
 // Authority is a Tailnet Key Authority. This type is the main coupling
 // point to the rest of the tailscale client.
 //
@@ -471,7 +474,7 @@ func Open(storage Chonk) (*Authority, error) {
 		return nil, fmt.Errorf("reading last ancestor: %v", err)
 	}
 
-	c, err := computeActiveChain(storage, a, 2000)
+	c, err := computeActiveChain(storage, a, maxScanIterations)
 	if err != nil {
 		return nil, fmt.Errorf("active chain: %v", err)
 	}
@@ -604,7 +607,7 @@ func (a *Authority) InformIdempotent(storage Chonk, updates []AUM) (Authority, e
 		state, hasState := stateAt[parent]
 		var err error
 		if !hasState {
-			if state, err = computeStateAt(storage, 2000, parent); err != nil {
+			if state, err = computeStateAt(storage, maxScanIterations, parent); err != nil {
 				return Authority{}, fmt.Errorf("update %d computing state: %v", i, err)
 			}
 			stateAt[parent] = state
@@ -639,7 +642,7 @@ func (a *Authority) InformIdempotent(storage Chonk, updates []AUM) (Authority, e
 	}
 
 	oldestAncestor := a.oldestAncestor.Hash()
-	c, err := computeActiveChain(storage, &oldestAncestor, 2000)
+	c, err := computeActiveChain(storage, &oldestAncestor, maxScanIterations)
 	if err != nil {
 		return Authority{}, fmt.Errorf("recomputing active chain: %v", err)
 	}
@@ -721,3 +724,115 @@ func (a *Authority) Compact(storage CompactableChonk, o CompactionOptions) error
 	a.oldestAncestor = ancestor
 	return nil
 }
+
+// findParentForRewrite finds the parent AUM to use when rewriting state to
+// retroactively remove trust in the specified keys.
+func (a *Authority) findParentForRewrite(storage Chonk, removeKeys []tkatype.KeyID, ourKey tkatype.KeyID) (AUMHash, error) {
+	cursor := a.Head()
+
+	for {
+		if cursor == a.oldestAncestor.Hash() {
+			// We've reached as far back in our history as we can,
+			// so we have to rewrite from here.
+			break
+		}
+
+		aum, err := storage.AUM(cursor)
+		if err != nil {
+			return AUMHash{}, fmt.Errorf("reading AUM %v: %w", cursor, err)
+		}
+
+		// An ideal rewrite parent trusts none of the keys to be removed.
+		state, err := computeStateAt(storage, maxScanIterations, cursor)
+		if err != nil {
+			return AUMHash{}, fmt.Errorf("computing state for %v: %w", cursor, err)
+		}
+		keyTrusted := false
+		for _, key := range removeKeys {
+			if _, err := state.GetKey(key); err == nil {
+				keyTrusted = true
+			}
+		}
+		if !keyTrusted {
+			// Success: the revoked keys are not trusted!
+			// Lets check that our key was trusted to ensure
+			// we can sign a fork from here.
+			if _, err := state.GetKey(ourKey); err == nil {
+				break
+			}
+		}
+
+		parent, hasParent := aum.Parent()
+		if !hasParent {
+			// This is the genesis AUM, so we have to rewrite from here.
+			break
+		}
+		cursor = parent
+	}
+
+	return cursor, nil
+}
+
+// MakeRetroactiveRevocation generates a forking update which revokes the specified keys, in
+// such a manner that any malicious use of those keys is erased.
+//
+// If forkFrom is specified, it is used as the parent AUM to fork from. If the zero value,
+// the parent AUM is determined automatically.
+//
+// The generated AUM must be signed with more signatures than the sum of key votes that
+// were compromised, before being consumed by tka.Authority methods.
+func (a *Authority) MakeRetroactiveRevocation(storage Chonk, removeKeys []tkatype.KeyID, ourKey tkatype.KeyID, forkFrom AUMHash) (*AUM, error) {
+	var parent AUMHash
+	if forkFrom == (AUMHash{}) {
+		// Make sure at least one of the recovery keys is currently trusted.
+		foundKey := false
+		for _, k := range removeKeys {
+			if _, err := a.state.GetKey(k); err == nil {
+				foundKey = true
+				break
+			}
+		}
+		if !foundKey {
+			return nil, errors.New("no provided key is currently trusted")
+		}
+
+		p, err := a.findParentForRewrite(storage, removeKeys, ourKey)
+		if err != nil {
+			return nil, fmt.Errorf("finding parent: %v", err)
+		}
+		parent = p
+	} else {
+		parent = forkFrom
+	}
+
+	// Construct the new state where the revoked keys are no longer trusted.
+	state := a.state.Clone()
+	for _, keyToRevoke := range removeKeys {
+		idx := -1
+		for i := range state.Keys {
+			keyID, err := state.Keys[i].ID()
+			if err != nil {
+				return nil, fmt.Errorf("computing keyID: %v", err)
+			}
+			if bytes.Equal(keyToRevoke, keyID) {
+				idx = i
+				break
+			}
+		}
+		if idx >= 0 {
+			state.Keys = append(state.Keys[:idx], state.Keys[idx+1:]...)
+		}
+	}
+	if len(state.Keys) == 0 {
+		return nil, errors.New("cannot revoke all trusted keys")
+	}
+	state.LastAUMHash = nil // checkpoints can't specify a LastAUMHash
+
+	forkingAUM := &AUM{
+		MessageKind: AUMCheckpoint,
+		State:       &state,
+		PrevAUMHash: parent[:],
+	}
+
+	return forkingAUM, forkingAUM.StaticValidate()
+}

+ 128 - 0
tka/tka_test.go

@@ -524,3 +524,131 @@ func TestAuthorityCompact(t *testing.T) {
 		t.Errorf("ancestor = %v, want %v", anc, c.AUMHashes["C"])
 	}
 }
+
+func TestFindParentForRewrite(t *testing.T) {
+	pub, _ := testingKey25519(t, 1)
+	k1 := Key{Kind: Key25519, Public: pub, Votes: 1}
+
+	pub2, _ := testingKey25519(t, 2)
+	k2 := Key{Kind: Key25519, Public: pub2, Votes: 1}
+	k2ID, _ := k2.ID()
+	pub3, _ := testingKey25519(t, 3)
+	k3 := Key{Kind: Key25519, Public: pub3, Votes: 1}
+
+	c := newTestchain(t, `
+        A -> B -> C -> D -> E
+        A.template = genesis
+        B.template = add2
+        C.template = add3
+        D.template = remove2
+    `,
+		optTemplate("genesis", AUM{MessageKind: AUMCheckpoint, State: &State{
+			Keys:               []Key{k1},
+			DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},
+		}}),
+		optTemplate("add2", AUM{MessageKind: AUMAddKey, Key: &k2}),
+		optTemplate("add3", AUM{MessageKind: AUMAddKey, Key: &k3}),
+		optTemplate("remove2", AUM{MessageKind: AUMRemoveKey, KeyID: k2ID}))
+
+	a, err := Open(c.Chonk())
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// k1 was trusted at genesis, so there's no better rewrite parent
+	// than the genesis.
+	k1ID, _ := k1.ID()
+	k1P, err := a.findParentForRewrite(c.Chonk(), []tkatype.KeyID{k1ID}, k1ID)
+	if err != nil {
+		t.Fatalf("FindParentForRewrite(k1) failed: %v", err)
+	}
+	if k1P != a.oldestAncestor.Hash() {
+		t.Errorf("FindParentForRewrite(k1) = %v, want %v", k1P, a.oldestAncestor.Hash())
+	}
+
+	// k3 was trusted at C, so B would be an ideal rewrite point.
+	k3ID, _ := k3.ID()
+	k3P, err := a.findParentForRewrite(c.Chonk(), []tkatype.KeyID{k3ID}, k1ID)
+	if err != nil {
+		t.Fatalf("FindParentForRewrite(k3) failed: %v", err)
+	}
+	if k3P != c.AUMHashes["B"] {
+		t.Errorf("FindParentForRewrite(k3) = %v, want %v", k3P, c.AUMHashes["B"])
+	}
+
+	// k2 was added but then removed, so HEAD is an appropriate rewrite point.
+	k2P, err := a.findParentForRewrite(c.Chonk(), []tkatype.KeyID{k2ID}, k1ID)
+	if err != nil {
+		t.Fatalf("FindParentForRewrite(k2) failed: %v", err)
+	}
+	if k3P != c.AUMHashes["B"] {
+		t.Errorf("FindParentForRewrite(k2) = %v, want %v", k2P, a.Head())
+	}
+
+	// There's no appropriate point where both k2 and k3 are simultaneously not trusted,
+	// so the best rewrite point is the genesis AUM.
+	doubleP, err := a.findParentForRewrite(c.Chonk(), []tkatype.KeyID{k2ID, k3ID}, k1ID)
+	if err != nil {
+		t.Fatalf("FindParentForRewrite({k2, k3}) failed: %v", err)
+	}
+	if doubleP != a.oldestAncestor.Hash() {
+		t.Errorf("FindParentForRewrite({k2, k3}) = %v, want %v", doubleP, a.oldestAncestor.Hash())
+	}
+}
+
+func TestMakeRetroactiveRevocation(t *testing.T) {
+	pub, _ := testingKey25519(t, 1)
+	k1 := Key{Kind: Key25519, Public: pub, Votes: 1}
+
+	pub2, _ := testingKey25519(t, 2)
+	k2 := Key{Kind: Key25519, Public: pub2, Votes: 1}
+	pub3, _ := testingKey25519(t, 3)
+	k3 := Key{Kind: Key25519, Public: pub3, Votes: 1}
+
+	c := newTestchain(t, `
+        A -> B -> C -> D
+        A.template = genesis
+        C.template = add2
+        D.template = add3
+    `,
+		optTemplate("genesis", AUM{MessageKind: AUMCheckpoint, State: &State{
+			Keys:               []Key{k1},
+			DisablementSecrets: [][]byte{DisablementKDF([]byte{1, 2, 3})},
+		}}),
+		optTemplate("add2", AUM{MessageKind: AUMAddKey, Key: &k2}),
+		optTemplate("add3", AUM{MessageKind: AUMAddKey, Key: &k3}))
+
+	a, err := Open(c.Chonk())
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// k2 was added by C, so a forking revocation should:
+	// - have B as a parent
+	// - trust the remaining keys at the time, k1 & k3.
+	k1ID, _ := k1.ID()
+	k2ID, _ := k2.ID()
+	k3ID, _ := k3.ID()
+	forkingAUM, err := a.MakeRetroactiveRevocation(c.Chonk(), []tkatype.KeyID{k2ID}, k1ID, AUMHash{})
+	if err != nil {
+		t.Fatalf("MakeRetroactiveRevocation(k2) failed: %v", err)
+	}
+	if bHash := c.AUMHashes["B"]; !bytes.Equal(forkingAUM.PrevAUMHash, bHash[:]) {
+		t.Errorf("forking AUM has parent %v, want %v", forkingAUM.PrevAUMHash, bHash[:])
+	}
+	if _, err := forkingAUM.State.GetKey(k1ID); err != nil {
+		t.Error("Forked state did not trust k1")
+	}
+	if _, err := forkingAUM.State.GetKey(k3ID); err != nil {
+		t.Error("Forked state did not trust k3")
+	}
+	if _, err := forkingAUM.State.GetKey(k2ID); err == nil {
+		t.Error("Forked state trusted removed-key k2")
+	}
+
+	// Test that removing all trusted keys results in an error.
+	_, err = a.MakeRetroactiveRevocation(c.Chonk(), []tkatype.KeyID{k1ID, k2ID, k3ID}, k1ID, AUMHash{})
+	if wantErr := "cannot revoke all trusted keys"; err == nil || err.Error() != wantErr {
+		t.Fatalf("MakeRetroactiveRevocation({k1, k2, k3}) returned %v, expected %q", err, wantErr)
+	}
+}