Przeglądaj źródła

feat: add --sanitize flag to opencode export to strip PII or confidential info (#22489)

Aiden Cline 3 dni temu
rodzic
commit
3695057bee
1 zmienionych plików z 226 dodań i 19 usunięć
  1. 226 19
      packages/opencode/src/cli/cmd/export.ts

+ 226 - 19
packages/opencode/src/cli/cmd/export.ts

@@ -1,5 +1,6 @@
 import type { Argv } from "yargs"
 import { Session } from "../../session"
+import { MessageV2 } from "../../session/message-v2"
 import { SessionID } from "../../session/schema"
 import { cmd } from "./cmd"
 import { bootstrap } from "../bootstrap"
@@ -7,16 +8,231 @@ import { UI } from "../ui"
 import * as prompts from "@clack/prompts"
 import { EOL } from "os"
 import { AppRuntime } from "@/effect/app-runtime"
-import { Effect } from "effect"
+
+function redact(kind: string, id: string, value: string) {
+  return value.trim() ? `[redacted:${kind}:${id}]` : value
+}
+
+function data(kind: string, id: string, value: Record<string, unknown> | undefined) {
+  if (!value) return value
+  return Object.keys(value).length ? { redacted: `${kind}:${id}` } : value
+}
+
+function span(id: string, value: { value: string; start: number; end: number }) {
+  return {
+    ...value,
+    value: redact("file-text", id, value.value),
+  }
+}
+
+function diff(kind: string, diffs: { file: string; patch: string }[] | undefined) {
+  return diffs?.map((item, i) => ({
+    ...item,
+    file: redact(`${kind}-file`, String(i), item.file),
+    patch: redact(`${kind}-patch`, String(i), item.patch),
+  }))
+}
+
+function source(part: MessageV2.FilePart) {
+  if (!part.source) return part.source
+  if (part.source.type === "symbol") {
+    return {
+      ...part.source,
+      path: redact("file-path", part.id, part.source.path),
+      name: redact("file-symbol", part.id, part.source.name),
+      text: span(part.id, part.source.text),
+    }
+  }
+  if (part.source.type === "resource") {
+    return {
+      ...part.source,
+      clientName: redact("file-client", part.id, part.source.clientName),
+      uri: redact("file-uri", part.id, part.source.uri),
+      text: span(part.id, part.source.text),
+    }
+  }
+  return {
+    ...part.source,
+    path: redact("file-path", part.id, part.source.path),
+    text: span(part.id, part.source.text),
+  }
+}
+
+function filepart(part: MessageV2.FilePart): MessageV2.FilePart {
+  return {
+    ...part,
+    url: redact("file-url", part.id, part.url),
+    filename: part.filename === undefined ? undefined : redact("file-name", part.id, part.filename),
+    source: source(part),
+  }
+}
+
+function part(part: MessageV2.Part): MessageV2.Part {
+  switch (part.type) {
+    case "text":
+      return {
+        ...part,
+        text: redact("text", part.id, part.text),
+        metadata: data("text-metadata", part.id, part.metadata),
+      }
+    case "reasoning":
+      return {
+        ...part,
+        text: redact("reasoning", part.id, part.text),
+        metadata: data("reasoning-metadata", part.id, part.metadata),
+      }
+    case "file":
+      return filepart(part)
+    case "subtask":
+      return {
+        ...part,
+        prompt: redact("subtask-prompt", part.id, part.prompt),
+        description: redact("subtask-description", part.id, part.description),
+        command: part.command === undefined ? undefined : redact("subtask-command", part.id, part.command),
+      }
+    case "tool":
+      return {
+        ...part,
+        metadata: data("tool-metadata", part.id, part.metadata),
+        state:
+          part.state.status === "pending"
+            ? {
+                ...part.state,
+                input: data("tool-input", part.id, part.state.input) ?? part.state.input,
+                raw: redact("tool-raw", part.id, part.state.raw),
+              }
+            : part.state.status === "running"
+              ? {
+                  ...part.state,
+                  input: data("tool-input", part.id, part.state.input) ?? part.state.input,
+                  title: part.state.title === undefined ? undefined : redact("tool-title", part.id, part.state.title),
+                  metadata: data("tool-state-metadata", part.id, part.state.metadata),
+                }
+              : part.state.status === "completed"
+                ? {
+                    ...part.state,
+                    input: data("tool-input", part.id, part.state.input) ?? part.state.input,
+                    output: redact("tool-output", part.id, part.state.output),
+                    title: redact("tool-title", part.id, part.state.title),
+                    metadata: data("tool-state-metadata", part.id, part.state.metadata) ?? part.state.metadata,
+                    attachments: part.state.attachments?.map(filepart),
+                  }
+                : {
+                    ...part.state,
+                    input: data("tool-input", part.id, part.state.input) ?? part.state.input,
+                    metadata: data("tool-state-metadata", part.id, part.state.metadata),
+                  },
+      }
+    case "patch":
+      return {
+        ...part,
+        hash: redact("patch", part.id, part.hash),
+        files: part.files.map((item: string, i: number) => redact("patch-file", `${part.id}-${i}`, item)),
+      }
+    case "snapshot":
+      return {
+        ...part,
+        snapshot: redact("snapshot", part.id, part.snapshot),
+      }
+    case "step-start":
+      return {
+        ...part,
+        snapshot: part.snapshot === undefined ? undefined : redact("snapshot", part.id, part.snapshot),
+      }
+    case "step-finish":
+      return {
+        ...part,
+        snapshot: part.snapshot === undefined ? undefined : redact("snapshot", part.id, part.snapshot),
+      }
+    case "agent":
+      return {
+        ...part,
+        source: !part.source
+          ? part.source
+          : {
+              ...part.source,
+              value: redact("agent-source", part.id, part.source.value),
+            },
+      }
+    default:
+      return part
+  }
+}
+
+const partFn = part
+
+function sanitize(data: { info: Session.Info; messages: MessageV2.WithParts[] }) {
+  return {
+    info: {
+      ...data.info,
+      title: redact("session-title", data.info.id, data.info.title),
+      directory: redact("session-directory", data.info.id, data.info.directory),
+      summary: !data.info.summary
+        ? data.info.summary
+        : {
+            ...data.info.summary,
+            diffs: diff("session-diff", data.info.summary.diffs),
+          },
+      revert: !data.info.revert
+        ? data.info.revert
+        : {
+            ...data.info.revert,
+            snapshot:
+              data.info.revert.snapshot === undefined
+                ? undefined
+                : redact("revert-snapshot", data.info.id, data.info.revert.snapshot),
+            diff:
+              data.info.revert.diff === undefined
+                ? undefined
+                : redact("revert-diff", data.info.id, data.info.revert.diff),
+          },
+    },
+    messages: data.messages.map((msg) => ({
+      info:
+        msg.info.role === "user"
+          ? {
+              ...msg.info,
+              system: msg.info.system === undefined ? undefined : redact("system", msg.info.id, msg.info.system),
+              summary: !msg.info.summary
+                ? msg.info.summary
+                : {
+                    ...msg.info.summary,
+                    title:
+                      msg.info.summary.title === undefined
+                        ? undefined
+                        : redact("summary-title", msg.info.id, msg.info.summary.title),
+                    body:
+                      msg.info.summary.body === undefined
+                        ? undefined
+                        : redact("summary-body", msg.info.id, msg.info.summary.body),
+                    diffs: diff("message-diff", msg.info.summary.diffs),
+                  },
+            }
+          : {
+              ...msg.info,
+              path: {
+                cwd: redact("cwd", msg.info.id, msg.info.path.cwd),
+                root: redact("root", msg.info.id, msg.info.path.root),
+              },
+            },
+      parts: msg.parts.map(partFn),
+    })),
+  }
+}
 
 export const ExportCommand = cmd({
   command: "export [sessionID]",
   describe: "export session data as JSON",
   builder: (yargs: Argv) => {
-    return yargs.positional("sessionID", {
-      describe: "session id to export",
-      type: "string",
-    })
+    return yargs
+      .positional("sessionID", {
+        describe: "session id to export",
+        type: "string",
+      })
+      .option("sanitize", {
+        describe: "redact sensitive transcript and file data",
+        type: "boolean",
+      })
   },
   handler: async (args) => {
     await bootstrap(process.cwd(), async () => {
@@ -69,26 +285,17 @@ export const ExportCommand = cmd({
       }
 
       try {
-        const { sessionInfo, messages } = await AppRuntime.runPromise(
-          Effect.gen(function* () {
-            const session = yield* Session.Service
-            const sessionInfo = yield* session.get(sessionID!)
-            return {
-              sessionInfo,
-              messages: yield* session.messages({ sessionID: sessionInfo.id }),
-            }
-          }),
+        const sessionInfo = await AppRuntime.runPromise(Session.Service.use((svc) => svc.get(sessionID!)))
+        const messages = await AppRuntime.runPromise(
+          Session.Service.use((svc) => svc.messages({ sessionID: sessionInfo.id })),
         )
 
         const exportData = {
           info: sessionInfo,
-          messages: messages.map((msg) => ({
-            info: msg.info,
-            parts: msg.parts,
-          })),
+          messages,
         }
 
-        process.stdout.write(JSON.stringify(exportData, null, 2))
+        process.stdout.write(JSON.stringify(args.sanitize ? sanitize(exportData) : exportData, null, 2))
         process.stdout.write(EOL)
       } catch (error) {
         UI.error(`Session not found: ${sessionID!}`)