2 недель назад · 49aac7ea00
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -1060,8 +1060,8 @@ importers:
 
				         specifier: ^5.0.5
			
 
				         version: 5.0.5
			
 
				       '@types/vscode':
			
 
				-        specifier: ^1.84.0
			
 
				-        version: 1.100.0
			
 
				+        specifier: ^1.106.0
			
 
				+        version: 1.108.1
			
 
				       '@vscode/test-electron':
			
 
				         specifier: ^2.5.2
			
 
				         version: 2.5.2
			
@@ -4454,6 +4454,9 @@ packages:
 
				   '@types/[email protected]':
			
 
				     resolution: {integrity: sha512-o4hanZAQdNfsKecexq9L3eHICd0AAvdbLk6hA60UzGXbGH/q8b/9xv2RgR7vV3ZcHuyKVq7b37IGd/+gM4Tu+Q==}
			
 
				 
			
 
				+  '@types/[email protected]':
			
 
				+    resolution: {integrity: sha512-DerV0BbSzt87TbrqmZ7lRDIYaMiqvP8tmJTzW2p49ZBVtGUnGAu2RGQd1Wv4XMzEVUpaHbsemVM5nfuQJj7H6w==}
			
 
				+
			
 
				   '@types/[email protected]':
			
 
				     resolution: {integrity: sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==}
			
 
				 
			
@@ -14488,6 +14491,8 @@ snapshots:
 
				 
			
 
				   '@types/[email protected]': {}
			
 
				 
			
 
				+  '@types/[email protected]': {}
			
 
				+
			
 
				   '@types/[email protected]':
			
 
				     dependencies:
			
 
				       '@types/node': 24.2.1
			
--- a/src/api/providers/__tests__/vscode-lm.spec.ts
+++ b/src/api/providers/__tests__/vscode-lm.spec.ts
@@ -1,4 +1,5 @@
 
				 import type { Mock } from "vitest"
			
 
				+import { checkModelSupportsImages, IMAGE_CAPABLE_MODEL_PREFIXES } from "../vscode-lm"
			
 
				 
			
 
				 // Mocks must come first, before imports
			
 
				 vi.mock("vscode", () => {
			
@@ -537,3 +538,92 @@ describe("VsCodeLmHandler", () => {
 
				 		})
			
 
				 	})
			
 
				 })
			
 
				+
			
 
				+describe("checkModelSupportsImages", () => {
			
 
				+	describe("OpenAI GPT models", () => {
			
 
				+		it("should return true for all gpt-* models (GitHub Copilot)", () => {
			
 
				+			// All GPT models in GitHub Copilot support images
			
 
				+			expect(checkModelSupportsImages("gpt", "gpt-4o")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gpt", "gpt-4.1")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gpt", "gpt-5")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gpt", "gpt-5.1")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gpt", "gpt-5.2")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gpt-mini", "gpt-5-mini")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gpt-codex", "gpt-5.1-codex")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gpt-codex", "gpt-5.2-codex")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gpt-codex", "gpt-5.1-codex-max")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gpt-codex", "gpt-5.1-codex-mini")).toBe(true)
			
 
				+		})
			
 
				+
			
 
				+		it("should return true for o1 and o3 reasoning models", () => {
			
 
				+			expect(checkModelSupportsImages("o1", "o1-preview")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("o1", "o1-mini")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("o3", "o3")).toBe(true)
			
 
				+		})
			
 
				+	})
			
 
				+
			
 
				+	describe("Anthropic Claude models", () => {
			
 
				+		it("should return true for all claude-* models (GitHub Copilot)", () => {
			
 
				+			// All Claude models in GitHub Copilot support images
			
 
				+			expect(checkModelSupportsImages("claude-haiku", "claude-haiku-4.5")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("claude-opus", "claude-opus-4.5")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("claude-sonnet", "claude-sonnet-4")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("claude-sonnet", "claude-sonnet-4.5")).toBe(true)
			
 
				+		})
			
 
				+	})
			
 
				+
			
 
				+	describe("Google Gemini models", () => {
			
 
				+		it("should return true for all gemini-* models (GitHub Copilot)", () => {
			
 
				+			// All Gemini models in GitHub Copilot support images
			
 
				+			expect(checkModelSupportsImages("gemini-pro", "gemini-2.5-pro")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gemini-flash", "gemini-3-flash-preview")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("gemini-pro", "gemini-3-pro-preview")).toBe(true)
			
 
				+		})
			
 
				+	})
			
 
				+
			
 
				+	describe("non-vision models", () => {
			
 
				+		it("should return false for grok models (text-only in GitHub Copilot)", () => {
			
 
				+			// Grok is the only model family in GitHub Copilot that doesn't support images
			
 
				+			expect(checkModelSupportsImages("grok", "grok-code-fast-1")).toBe(false)
			
 
				+		})
			
 
				+
			
 
				+		it("should return false for models with non-matching prefixes", () => {
			
 
				+			// Models that don't start with gpt, claude, gemini, o1, or o3
			
 
				+			expect(checkModelSupportsImages("mistral", "mistral-large")).toBe(false)
			
 
				+			expect(checkModelSupportsImages("llama", "llama-3-70b")).toBe(false)
			
 
				+			expect(checkModelSupportsImages("unknown", "some-random-model")).toBe(false)
			
 
				+		})
			
 
				+	})
			
 
				+
			
 
				+	describe("case insensitivity", () => {
			
 
				+		it("should match regardless of case", () => {
			
 
				+			expect(checkModelSupportsImages("GPT", "GPT-4O")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("CLAUDE", "CLAUDE-SONNET-4")).toBe(true)
			
 
				+			expect(checkModelSupportsImages("GEMINI", "GEMINI-2.5-PRO")).toBe(true)
			
 
				+		})
			
 
				+	})
			
 
				+
			
 
				+	describe("prefix matching", () => {
			
 
				+		it("should only match IDs that start with known prefixes", () => {
			
 
				+			// ID must START with the prefix, not just contain it
			
 
				+			expect(checkModelSupportsImages("custom", "gpt-4o")).toBe(true) // ID starts with gpt
			
 
				+			expect(checkModelSupportsImages("custom", "my-gpt-model")).toBe(false) // gpt not at start
			
 
				+			expect(checkModelSupportsImages("custom", "not-claude-model")).toBe(false) // claude not at start
			
 
				+		})
			
 
				+	})
			
 
				+})
			
 
				+
			
 
				+describe("IMAGE_CAPABLE_MODEL_PREFIXES", () => {
			
 
				+	it("should export the model prefixes array", () => {
			
 
				+		expect(Array.isArray(IMAGE_CAPABLE_MODEL_PREFIXES)).toBe(true)
			
 
				+		expect(IMAGE_CAPABLE_MODEL_PREFIXES.length).toBeGreaterThan(0)
			
 
				+	})
			
 
				+
			
 
				+	it("should include key model prefixes", () => {
			
 
				+		expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("gpt")
			
 
				+		expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("claude")
			
 
				+		expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("gemini")
			
 
				+		expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("o1")
			
 
				+		expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("o3")
			
 
				+	})
			
 
				+})
			
--- a/src/api/providers/vscode-lm.ts
+++ b/src/api/providers/vscode-lm.ts
@@ -529,6 +529,10 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 
				 
			
 
				 			const modelId = this.client.id || modelParts.join(SELECTOR_SEPARATOR)
			
 
				 
			
 
				+			// Check if the model supports images based on known model families
			
 
				+			// VS Code Language Model API 1.106+ supports image inputs via LanguageModelDataPart
			
 
				+			const supportsImages = checkModelSupportsImages(this.client.family, this.client.id)
			
 
				+
			
 
				 			// Build model info with conservative defaults for missing values
			
 
				 			const modelInfo: ModelInfo = {
			
 
				 				maxTokens: -1, // Unlimited tokens by default
			
@@ -536,7 +540,7 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 
				 					typeof this.client.maxInputTokens === "number"
			
 
				 						? Math.max(0, this.client.maxInputTokens)
			
 
				 						: openAiModelInfoSaneDefaults.contextWindow,
			
 
				-				supportsImages: false, // VSCode Language Model API currently doesn't support image inputs
			
 
				+				supportsImages,
			
 
				 				supportsPromptCache: true,
			
 
				 				inputPrice: 0,
			
 
				 				outputPrice: 0,
			
@@ -586,8 +590,43 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// Static blacklist of VS Code Language Model IDs that should be excluded from the model list e.g. because they will never work
			
 
				-const VSCODE_LM_STATIC_BLACKLIST: string[] = ["claude-3.7-sonnet", "claude-3.7-sonnet-thought"]
			
 
				+/**
			
 
				+ * Model ID prefixes that support image inputs via VS Code Language Model API.
			
 
				+ * These models support the LanguageModelDataPart.image() API introduced in VS Code 1.106+.
			
 
				+ *
			
 
				+ * All GitHub Copilot models with these prefixes support images.
			
 
				+ * Only grok-* models don't support images (text only).
			
 
				+ *
			
 
				+ * Source: https://models.dev/api.json (github-copilot provider models)
			
 
				+ */
			
 
				+export const IMAGE_CAPABLE_MODEL_PREFIXES = [
			
 
				+	"gpt", // All GPT models (gpt-4o, gpt-4.1, gpt-5, gpt-5.1, gpt-5.2, gpt-5-mini, gpt-5.1-codex, etc.)
			
 
				+	"claude", // All Claude models (claude-haiku-4.5, claude-opus-4.5, claude-sonnet-4, claude-sonnet-4.5)
			
 
				+	"gemini", // All Gemini models (gemini-2.5-pro, gemini-3-flash-preview, gemini-3-pro-preview)
			
 
				+	"o1", // OpenAI o1 reasoning models
			
 
				+	"o3", // OpenAI o3 reasoning models
			
 
				+]
			
 
				+
			
 
				+/**
			
 
				+ * Checks if a model supports image inputs based on its model ID.
			
 
				+ * Uses prefix matching against known image-capable model families.
			
 
				+ *
			
 
				+ * @param _family The model family (unused, kept for API compatibility)
			
 
				+ * @param id The model ID
			
 
				+ * @returns true if the model supports image inputs
			
 
				+ */
			
 
				+export function checkModelSupportsImages(_family: string, id: string): boolean {
			
 
				+	const idLower = id.toLowerCase()
			
 
				+	return IMAGE_CAPABLE_MODEL_PREFIXES.some((prefix) => idLower.startsWith(prefix))
			
 
				+}
			
 
				+
			
 
				+// Static blacklist of VS Code Language Model IDs that should be excluded from the model list
			
 
				+// e.g. because they don't support native tool calling or will never work
			
 
				+const VSCODE_LM_STATIC_BLACKLIST: string[] = [
			
 
				+	"claude-3.7-sonnet",
			
 
				+	"claude-3.7-sonnet-thought",
			
 
				+	"claude-opus-41", // Does not support native tool calling
			
 
				+]
			
 
				 
			
 
				 export async function getVsCodeLmModels() {
			
 
				 	try {
			
--- a/src/api/transform/__tests__/vscode-lm-format.spec.ts
+++ b/src/api/transform/__tests__/vscode-lm-format.spec.ts
@@ -26,7 +26,13 @@ interface MockLanguageModelToolCallPart {
 
				 interface MockLanguageModelToolResultPart {
			
 
				 	type: "tool_result"
			
 
				 	callId: string
			
 
				-	content: MockLanguageModelTextPart[]
			
 
				+	content: (MockLanguageModelTextPart | MockLanguageModelDataPart)[]
			
 
				+}
			
 
				+
			
 
				+interface MockLanguageModelDataPart {
			
 
				+	type: "data"
			
 
				+	data: Uint8Array
			
 
				+	mimeType: string
			
 
				 }
			
 
				 
			
 
				 // Mock vscode namespace
			
@@ -54,10 +60,32 @@ vitest.mock("vscode", () => {
 
				 		type = "tool_result"
			
 
				 		constructor(
			
 
				 			public callId: string,
			
 
				-			public content: MockLanguageModelTextPart[],
			
 
				+			public content: (MockLanguageModelTextPart | MockLanguageModelDataPart)[],
			
 
				 		) {}
			
 
				 	}
			
 
				 
			
 
				+	class MockLanguageModelDataPart {
			
 
				+		type = "data"
			
 
				+		constructor(
			
 
				+			public data: Uint8Array,
			
 
				+			public mimeType: string,
			
 
				+		) {}
			
 
				+
			
 
				+		static image(data: Uint8Array, mime: string) {
			
 
				+			return new MockLanguageModelDataPart(data, mime)
			
 
				+		}
			
 
				+
			
 
				+		static json(value: any, mime?: string) {
			
 
				+			const bytes = new TextEncoder().encode(JSON.stringify(value))
			
 
				+			return new MockLanguageModelDataPart(bytes, mime || "application/json")
			
 
				+		}
			
 
				+
			
 
				+		static text(value: string, mime?: string) {
			
 
				+			const bytes = new TextEncoder().encode(value)
			
 
				+			return new MockLanguageModelDataPart(bytes, mime || "text/plain")
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	return {
			
 
				 		LanguageModelChatMessage: {
			
 
				 			Assistant: vitest.fn((content) => ({
			
@@ -75,6 +103,7 @@ vitest.mock("vscode", () => {
 
				 		LanguageModelTextPart: MockLanguageModelTextPart,
			
 
				 		LanguageModelToolCallPart: MockLanguageModelToolCallPart,
			
 
				 		LanguageModelToolResultPart: MockLanguageModelToolResultPart,
			
 
				+		LanguageModelDataPart: MockLanguageModelDataPart,
			
 
				 	}
			
 
				 })
			
 
				 
			
@@ -150,7 +179,7 @@ describe("convertToVsCodeLmMessages", () => {
 
				 		expect(toolCall.type).toBe("tool_call")
			
 
				 	})
			
 
				 
			
 
				-	it("should handle image blocks with appropriate placeholders", () => {
			
 
				+	it("should convert image blocks to LanguageModelDataPart", () => {
			
 
				 		const messages: Anthropic.Messages.MessageParam[] = [
			
 
				 			{
			
 
				 				role: "user",
			
@@ -161,7 +190,7 @@ describe("convertToVsCodeLmMessages", () => {
 
				 						source: {
			
 
				 							type: "base64",
			
 
				 							media_type: "image/png",
			
 
				-							data: "base64data",
			
 
				+							data: "dGVzdA==", // "test" in base64
			
 
				 						},
			
 
				 					},
			
 
				 				],
			
@@ -171,8 +200,123 @@ describe("convertToVsCodeLmMessages", () => {
 
				 		const result = convertToVsCodeLmMessages(messages)
			
 
				 
			
 
				 		expect(result).toHaveLength(1)
			
 
				+		expect(result[0].content).toHaveLength(2)
			
 
				+
			
 
				+		// First part should be text
			
 
				+		const textPart = result[0].content[0] as MockLanguageModelTextPart
			
 
				+		expect(textPart.type).toBe("text")
			
 
				+		expect(textPart.value).toBe("Look at this:")
			
 
				+
			
 
				+		// Second part should be a LanguageModelDataPart for the image
			
 
				+		const imagePart = result[0].content[1] as unknown as MockLanguageModelDataPart
			
 
				+		expect(imagePart.type).toBe("data")
			
 
				+		expect(imagePart.mimeType).toBe("image/png")
			
 
				+		expect(imagePart.data).toBeInstanceOf(Uint8Array)
			
 
				+	})
			
 
				+
			
 
				+	it("should handle images in tool results", () => {
			
 
				+		const messages: Anthropic.Messages.MessageParam[] = [
			
 
				+			{
			
 
				+				role: "user",
			
 
				+				content: [
			
 
				+					{
			
 
				+						type: "tool_result",
			
 
				+						tool_use_id: "tool-1",
			
 
				+						content: [
			
 
				+							{ type: "text", text: "Screenshot result:" },
			
 
				+							{
			
 
				+								type: "image",
			
 
				+								source: {
			
 
				+									type: "base64",
			
 
				+									media_type: "image/jpeg",
			
 
				+									data: "dGVzdA==",
			
 
				+								},
			
 
				+							},
			
 
				+						],
			
 
				+					},
			
 
				+				],
			
 
				+			},
			
 
				+		]
			
 
				+
			
 
				+		const result = convertToVsCodeLmMessages(messages)
			
 
				+
			
 
				+		expect(result).toHaveLength(1)
			
 
				+		expect(result[0].content).toHaveLength(1)
			
 
				+
			
 
				+		const toolResult = result[0].content[0] as MockLanguageModelToolResultPart
			
 
				+		expect(toolResult.type).toBe("tool_result")
			
 
				+		expect(toolResult.content).toHaveLength(2)
			
 
				+
			
 
				+		// First item in tool result should be text
			
 
				+		const textPart = toolResult.content[0] as MockLanguageModelTextPart
			
 
				+		expect(textPart.type).toBe("text")
			
 
				+
			
 
				+		// Second item should be image data
			
 
				+		const imagePart = toolResult.content[1] as MockLanguageModelDataPart
			
 
				+		expect(imagePart.type).toBe("data")
			
 
				+		expect(imagePart.mimeType).toBe("image/jpeg")
			
 
				+	})
			
 
				+
			
 
				+	it("should return text placeholder for URL-based images", () => {
			
 
				+		const messages: Anthropic.Messages.MessageParam[] = [
			
 
				+			{
			
 
				+				role: "user",
			
 
				+				content: [
			
 
				+					{ type: "text", text: "Check this image:" },
			
 
				+					{
			
 
				+						type: "image",
			
 
				+						source: {
			
 
				+							type: "url",
			
 
				+							url: "https://example.com/image.png",
			
 
				+						} as any,
			
 
				+					},
			
 
				+				],
			
 
				+			},
			
 
				+		]
			
 
				+
			
 
				+		const result = convertToVsCodeLmMessages(messages)
			
 
				+
			
 
				+		expect(result).toHaveLength(1)
			
 
				+		expect(result[0].content).toHaveLength(2)
			
 
				+
			
 
				+		// First part should be text
			
 
				+		const textPart = result[0].content[0] as MockLanguageModelTextPart
			
 
				+		expect(textPart.type).toBe("text")
			
 
				+		expect(textPart.value).toBe("Check this image:")
			
 
				+
			
 
				+		// Second part should be a text placeholder (not an empty DataPart)
			
 
				 		const imagePlaceholder = result[0].content[1] as MockLanguageModelTextPart
			
 
				-		expect(imagePlaceholder.value).toContain("[Image (base64): image/png not supported by VSCode LM API]")
			
 
				+		expect(imagePlaceholder.type).toBe("text")
			
 
				+		expect(imagePlaceholder.value).toContain("URL not supported")
			
 
				+		expect(imagePlaceholder.value).toContain("https://example.com/image.png")
			
 
				+	})
			
 
				+
			
 
				+	it("should return text placeholder for unknown image source types", () => {
			
 
				+		const messages: Anthropic.Messages.MessageParam[] = [
			
 
				+			{
			
 
				+				role: "user",
			
 
				+				content: [
			
 
				+					{
			
 
				+						type: "image",
			
 
				+						source: {
			
 
				+							type: "unknown",
			
 
				+							media_type: "image/png",
			
 
				+							data: "", // Required by type but ignored for unknown source types
			
 
				+						} as any,
			
 
				+					},
			
 
				+				],
			
 
				+			},
			
 
				+		]
			
 
				+
			
 
				+		const result = convertToVsCodeLmMessages(messages)
			
 
				+
			
 
				+		expect(result).toHaveLength(1)
			
 
				+		expect(result[0].content).toHaveLength(1)
			
 
				+
			
 
				+		// Should return a text placeholder for unknown source types
			
 
				+		const placeholder = result[0].content[0] as MockLanguageModelTextPart
			
 
				+		expect(placeholder.type).toBe("text")
			
 
				+		expect(placeholder.value).toContain("unsupported source type")
			
 
				 	})
			
 
				 })
			
 
				 
			
--- a/src/api/transform/vscode-lm-format.ts
+++ b/src/api/transform/vscode-lm-format.ts
@@ -28,6 +28,46 @@ function asObjectSafe(value: any): object {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * Converts an Anthropic image block to a VS Code LanguageModelDataPart or TextPart.
			
 
				+ * Uses the new LanguageModelDataPart.image() API available in VS Code 1.106+.
			
 
				+ * @param imageBlock The Anthropic image block param
			
 
				+ * @returns A LanguageModelDataPart for the image, or TextPart if the image cannot be converted
			
 
				+ */
			
 
				+function convertImageToDataPart(
			
 
				+	imageBlock: Anthropic.ImageBlockParam,
			
 
				+): vscode.LanguageModelDataPart | vscode.LanguageModelTextPart {
			
 
				+	const source = imageBlock.source
			
 
				+	const mediaType = source.media_type || "image/png"
			
 
				+
			
 
				+	if (source.type === "base64") {
			
 
				+		// Convert base64 string to Uint8Array
			
 
				+		const binaryString = atob(source.data)
			
 
				+		const bytes = new Uint8Array(binaryString.length)
			
 
				+		for (let i = 0; i < binaryString.length; i++) {
			
 
				+			bytes[i] = binaryString.charCodeAt(i)
			
 
				+		}
			
 
				+		return vscode.LanguageModelDataPart.image(bytes, mediaType)
			
 
				+	} else if (source.type === "url") {
			
 
				+		// URL-based images cannot be directly converted - return a text placeholder
			
 
				+		// explaining the limitation. URL images should be fetched and converted to base64 upstream.
			
 
				+		console.warn(
			
 
				+			"Roo Code <Language Model API>: URL-based images are not supported by the VS Code LM API. " +
			
 
				+				"Images must be provided as base64 data.",
			
 
				+		)
			
 
				+		return new vscode.LanguageModelTextPart(
			
 
				+			`[Image from URL not supported: ${(source as any).url || "unknown URL"}. ` +
			
 
				+				`VS Code LM API requires base64-encoded image data.]`,
			
 
				+		)
			
 
				+	}
			
 
				+
			
 
				+	// Fallback for unknown source types - return a text placeholder
			
 
				+	console.warn(`Roo Code <Language Model API>: Unknown image source type: ${(source as any).type}`)
			
 
				+	return new vscode.LanguageModelTextPart(
			
 
				+		`[Image with unsupported source type "${(source as any).type}" cannot be displayed]`,
			
 
				+	)
			
 
				+}
			
 
				+
			
 
				 export function convertToVsCodeLmMessages(
			
 
				 	anthropicMessages: Anthropic.Messages.MessageParam[],
			
 
				 ): vscode.LanguageModelChatMessage[] {
			
@@ -66,15 +106,13 @@ export function convertToVsCodeLmMessages(
 
				 				const contentParts = [
			
 
				 					// Convert tool messages to ToolResultParts
			
 
				 					...toolMessages.map((toolMessage) => {
			
 
				-						// Process tool result content into TextParts
			
 
				-						const toolContentParts: vscode.LanguageModelTextPart[] =
			
 
				+						// Process tool result content into TextParts or DataParts
			
 
				+						const toolContentParts: (vscode.LanguageModelTextPart | vscode.LanguageModelDataPart)[] =
			
 
				 							typeof toolMessage.content === "string"
			
 
				 								? [new vscode.LanguageModelTextPart(toolMessage.content)]
			
 
				 								: (toolMessage.content?.map((part) => {
			
 
				 										if (part.type === "image") {
			
 
				-											return new vscode.LanguageModelTextPart(
			
 
				-												`[Image (${part.source?.type || "Unknown source-type"}): ${part.source?.media_type || "unknown media-type"} not supported by VSCode LM API]`,
			
 
				-											)
			
 
				+											return convertImageToDataPart(part)
			
 
				 										}
			
 
				 										return new vscode.LanguageModelTextPart(part.text)
			
 
				 									}) ?? [new vscode.LanguageModelTextPart("")])
			
@@ -82,12 +120,10 @@ export function convertToVsCodeLmMessages(
 
				 						return new vscode.LanguageModelToolResultPart(toolMessage.tool_use_id, toolContentParts)
			
 
				 					}),
			
 
				 
			
 
				-					// Convert non-tool messages to TextParts after tool messages
			
 
				+					// Convert non-tool messages to TextParts or DataParts after tool messages
			
 
				 					...nonToolMessages.map((part) => {
			
 
				 						if (part.type === "image") {
			
 
				-							return new vscode.LanguageModelTextPart(
			
 
				-								`[Image (${part.source?.type || "Unknown source-type"}): ${part.source?.media_type || "unknown media-type"} not supported by VSCode LM API]`,
			
 
				-							)
			
 
				+							return convertImageToDataPart(part)
			
 
				 						}
			
 
				 						return new vscode.LanguageModelTextPart(part.text)
			
 
				 					}),
			
--- a/src/package.json
+++ b/src/package.json
@@ -555,7 +555,7 @@
 
				 		"@types/string-similarity": "^4.0.2",
			
 
				 		"@types/tmp": "^0.2.6",
			
 
				 		"@types/turndown": "^5.0.5",
			
 
				-		"@types/vscode": "^1.84.0",
			
 
				+		"@types/vscode": "^1.106.0",
			
 
				 		"@vscode/test-electron": "^2.5.2",
			
 
				 		"@vscode/vsce": "3.3.2",
			
 
				 		"ai": "^6.0.0",
			
--- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts
+++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
@@ -312,7 +312,8 @@ function getSelectedModel({
 
				 				: vscodeLlmDefaultModelId
			
 
				 			const modelFamily = apiConfiguration?.vsCodeLmModelSelector?.family ?? vscodeLlmDefaultModelId
			
 
				 			const info = vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels]
			
 
				-			return { id, info: { ...openAiModelInfoSaneDefaults, ...info, supportsImages: false } } // VSCode LM API currently doesn't support images.
			
 
				+			// VS Code LM API 1.106+ supports images via LanguageModelDataPart - use model's supportsImages capability
			
 
				+			return { id, info: { ...openAiModelInfoSaneDefaults, ...info } }
			
 
				 		}
			
 
				 		case "cerebras": {
			
 
				 			const id = apiConfiguration.apiModelId ?? defaultModelId