Procházet zdrojové kódy

pdf support in read tool (#5222)

Co-authored-by: ammi1378 <[email protected]>
Aiden Cline před 2 měsíci
rodič
revize
a3bb4a3c85

+ 36 - 0
packages/opencode/src/provider/transform.ts

@@ -2,6 +2,17 @@ import type { APICallError, ModelMessage } from "ai"
 import { unique } from "remeda"
 import type { JSONSchema } from "zod/v4/core"
 import type { Provider } from "./provider"
+import type { ModelsDev } from "./models"
+
+type Modality = NonNullable<ModelsDev.Model["modalities"]>["input"][number]
+
+function mimeToModality(mime: string): Modality | undefined {
+  if (mime.startsWith("image/")) return "image"
+  if (mime.startsWith("audio/")) return "audio"
+  if (mime.startsWith("video/")) return "video"
+  if (mime === "application/pdf") return "pdf"
+  return undefined
+}
 
 export namespace ProviderTransform {
   function normalizeMessages(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
@@ -148,7 +159,32 @@ export namespace ProviderTransform {
     return msgs
   }
 
+  function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
+    return msgs.map((msg) => {
+      if (msg.role !== "user" || !Array.isArray(msg.content)) return msg
+
+      const filtered = msg.content.map((part) => {
+        if (part.type !== "file" && part.type !== "image") return part
+
+        const mime = part.type === "image" ? part.image.toString().split(";")[0].replace("data:", "") : part.mediaType
+        const filename = part.type === "file" ? part.filename : undefined
+        const modality = mimeToModality(mime)
+        if (!modality) return part
+        if (model.capabilities.input[modality]) return part
+
+        const name = filename ? `"${filename}"` : modality
+        return {
+          type: "text" as const,
+          text: `ERROR: Cannot read ${name} (this model does not support ${modality} input). Inform the user.`,
+        }
+      })
+
+      return { ...msg, content: filtered }
+    })
+  }
+
   export function message(msgs: ModelMessage[], model: Provider.Model) {
+    msgs = unsupportedParts(msgs, model)
     msgs = normalizeMessages(msgs, model)
     if (model.providerID === "anthropic" || model.api.id.includes("anthropic") || model.api.id.includes("claude")) {
       msgs = applyCaching(msgs, model.providerID)

+ 0 - 141
packages/opencode/src/session/message-v2.ts

@@ -411,147 +411,6 @@ export namespace MessageV2 {
   })
   export type WithParts = z.infer<typeof WithParts>
 
-  export function fromV1(v1: Message.Info) {
-    if (v1.role === "assistant") {
-      const info: Assistant = {
-        id: v1.id,
-        parentID: "",
-        sessionID: v1.metadata.sessionID,
-        role: "assistant",
-        time: {
-          created: v1.metadata.time.created,
-          completed: v1.metadata.time.completed,
-        },
-        cost: v1.metadata.assistant!.cost,
-        path: v1.metadata.assistant!.path,
-        summary: v1.metadata.assistant!.summary,
-        tokens: v1.metadata.assistant!.tokens,
-        modelID: v1.metadata.assistant!.modelID,
-        providerID: v1.metadata.assistant!.providerID,
-        mode: "build",
-        error: v1.metadata.error,
-      }
-      const parts = v1.parts.flatMap((part): Part[] => {
-        const base = {
-          id: Identifier.ascending("part"),
-          messageID: v1.id,
-          sessionID: v1.metadata.sessionID,
-        }
-        if (part.type === "text") {
-          return [
-            {
-              ...base,
-              type: "text",
-              text: part.text,
-            },
-          ]
-        }
-        if (part.type === "step-start") {
-          return [
-            {
-              ...base,
-              type: "step-start",
-            },
-          ]
-        }
-        if (part.type === "tool-invocation") {
-          return [
-            {
-              ...base,
-              type: "tool",
-              callID: part.toolInvocation.toolCallId,
-              tool: part.toolInvocation.toolName,
-              state: (() => {
-                if (part.toolInvocation.state === "partial-call") {
-                  return {
-                    status: "pending",
-                    input: {},
-                    raw: "",
-                  }
-                }
-
-                const { title, time, ...metadata } = v1.metadata.tool[part.toolInvocation.toolCallId] ?? {}
-                if (part.toolInvocation.state === "call") {
-                  return {
-                    status: "running",
-                    input: part.toolInvocation.args,
-                    time: {
-                      start: time?.start,
-                    },
-                  }
-                }
-
-                if (part.toolInvocation.state === "result") {
-                  return {
-                    status: "completed",
-                    input: part.toolInvocation.args,
-                    output: part.toolInvocation.result,
-                    title,
-                    time,
-                    metadata,
-                  }
-                }
-                throw new Error("unknown tool invocation state")
-              })(),
-            },
-          ]
-        }
-        return []
-      })
-      return {
-        info,
-        parts,
-      }
-    }
-
-    if (v1.role === "user") {
-      const info: User = {
-        id: v1.id,
-        sessionID: v1.metadata.sessionID,
-        role: "user",
-        time: {
-          created: v1.metadata.time.created,
-        },
-        agent: "build",
-        model: {
-          providerID: "opencode",
-          modelID: "opencode",
-        },
-      }
-      const parts = v1.parts.flatMap((part): Part[] => {
-        const base = {
-          id: Identifier.ascending("part"),
-          messageID: v1.id,
-          sessionID: v1.metadata.sessionID,
-        }
-        if (part.type === "text") {
-          return [
-            {
-              ...base,
-              type: "text",
-              text: part.text,
-            },
-          ]
-        }
-        if (part.type === "file") {
-          return [
-            {
-              ...base,
-              type: "file",
-              mime: part.mediaType,
-              filename: part.filename,
-              url: part.url,
-            },
-          ]
-        }
-        return []
-      })
-      return { info, parts }
-    }
-
-    throw new Error("unknown message type")
-  }
-
   export function toModelMessage(
     input: {
       info: Info

+ 4 - 28
packages/opencode/src/tool/read.ts

@@ -7,7 +7,6 @@ import { FileTime } from "../file/time"
 import DESCRIPTION from "./read.txt"
 import { Filesystem } from "../util/filesystem"
 import { Instance } from "../project/instance"
-import { Provider } from "../provider/provider"
 import { Identifier } from "../id/id"
 import { Permission } from "../permission"
 import { Agent } from "@/agent/agent"
@@ -94,15 +93,11 @@ export const ReadTool = Tool.define("read", {
       throw new Error(`File not found: ${filepath}`)
     }
 
-    const isImage = isImageFile(filepath)
-    const model = ctx.extra?.model as Provider.Model | undefined
-    const supportsImages = model?.capabilities.input.image ?? false
-    if (isImage) {
-      if (!supportsImages) {
-        throw new Error(`Failed to read image: ${filepath}, model may not be able to read images`)
-      }
+    const isImage = file.type.startsWith("image/")
+    const isPdf = file.type === "application/pdf"
+    if (isImage || isPdf) {
       const mime = file.type
-      const msg = "Image read successfully"
+      const msg = `${isImage ? "Image" : "PDF"} read successfully`
       return {
         title,
         output: msg,
@@ -164,25 +159,6 @@ export const ReadTool = Tool.define("read", {
   },
 })
 
-function isImageFile(filePath: string): string | false {
-  const ext = path.extname(filePath).toLowerCase()
-  switch (ext) {
-    case ".jpg":
-    case ".jpeg":
-      return "JPEG"
-    case ".png":
-      return "PNG"
-    case ".gif":
-      return "GIF"
-    case ".bmp":
-      return "BMP"
-    case ".webp":
-      return "WebP"
-    default:
-      return false
-  }
-}
-
 async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
   const ext = path.extname(filepath).toLowerCase()
   // binary check for common non-text extensions