Przeglądaj źródła

feat: add image support documentation to read_file native tool description (#10442)

Co-authored-by: Roo Code <[email protected]>
roomote[bot] 3 tygodni temu
rodzic
commit
cbc0ae4726

+ 85 - 0
src/core/prompts/tools/native-tools/__tests__/read_file.spec.ts

@@ -96,6 +96,48 @@ describe("createReadFileTool", () => {
 		})
 		})
 	})
 	})
 
 
+	describe("supportsImages option", () => {
+		it("should include image format documentation when supportsImages is true", () => {
+			const tool = createReadFileTool({ supportsImages: true })
+			const description = getFunctionDef(tool).description
+
+			expect(description).toContain(
+				"Automatically processes and returns image files (PNG, JPG, JPEG, GIF, BMP, SVG, WEBP, ICO, AVIF) for visual analysis",
+			)
+		})
+
+		it("should not include image format documentation when supportsImages is false", () => {
+			const tool = createReadFileTool({ supportsImages: false })
+			const description = getFunctionDef(tool).description
+
+			expect(description).not.toContain(
+				"Automatically processes and returns image files (PNG, JPG, JPEG, GIF, BMP, SVG, WEBP, ICO, AVIF) for visual analysis",
+			)
+			expect(description).toContain("may not handle other binary files properly")
+		})
+
+		it("should default supportsImages to false", () => {
+			const tool = createReadFileTool({})
+			const description = getFunctionDef(tool).description
+
+			expect(description).not.toContain(
+				"Automatically processes and returns image files (PNG, JPG, JPEG, GIF, BMP, SVG, WEBP, ICO, AVIF) for visual analysis",
+			)
+		})
+
+		it("should always include PDF and DOCX support in description", () => {
+			const toolWithImages = createReadFileTool({ supportsImages: true })
+			const toolWithoutImages = createReadFileTool({ supportsImages: false })
+
+			expect(getFunctionDef(toolWithImages).description).toContain(
+				"Supports text extraction from PDF and DOCX files",
+			)
+			expect(getFunctionDef(toolWithoutImages).description).toContain(
+				"Supports text extraction from PDF and DOCX files",
+			)
+		})
+	})
+
 	describe("combined options", () => {
 	describe("combined options", () => {
 		it("should correctly combine low maxConcurrentFileReads with partialReadsEnabled", () => {
 		it("should correctly combine low maxConcurrentFileReads with partialReadsEnabled", () => {
 			const tool = createReadFileTool({
 			const tool = createReadFileTool({
@@ -120,6 +162,49 @@ describe("createReadFileTool", () => {
 			expect(description).not.toContain("line_ranges")
 			expect(description).not.toContain("line_ranges")
 			expect(description).not.toContain("Example multiple files")
 			expect(description).not.toContain("Example multiple files")
 		})
 		})
+
+		it("should correctly combine partialReadsEnabled and supportsImages", () => {
+			const tool = createReadFileTool({
+				partialReadsEnabled: true,
+				supportsImages: true,
+			})
+			const description = getFunctionDef(tool).description
+
+			// Should have both line_ranges and image support
+			expect(description).toContain("line_ranges")
+			expect(description).toContain(
+				"Automatically processes and returns image files (PNG, JPG, JPEG, GIF, BMP, SVG, WEBP, ICO, AVIF) for visual analysis",
+			)
+		})
+
+		it("should work with partialReadsEnabled=false and supportsImages=true", () => {
+			const tool = createReadFileTool({
+				partialReadsEnabled: false,
+				supportsImages: true,
+			})
+			const description = getFunctionDef(tool).description
+
+			// Should have image support but no line_ranges
+			expect(description).not.toContain("line_ranges")
+			expect(description).toContain(
+				"Automatically processes and returns image files (PNG, JPG, JPEG, GIF, BMP, SVG, WEBP, ICO, AVIF) for visual analysis",
+			)
+		})
+
+		it("should correctly combine all three options", () => {
+			const tool = createReadFileTool({
+				maxConcurrentFileReads: 3,
+				partialReadsEnabled: true,
+				supportsImages: true,
+			})
+			const description = getFunctionDef(tool).description
+
+			expect(description).toContain("maximum of 3 files")
+			expect(description).toContain("line_ranges")
+			expect(description).toContain(
+				"Automatically processes and returns image files (PNG, JPG, JPEG, GIF, BMP, SVG, WEBP, ICO, AVIF) for visual analysis",
+			)
+		})
 	})
 	})
 
 
 	describe("tool structure", () => {
 	describe("tool structure", () => {

+ 4 - 1
src/core/prompts/tools/native-tools/index.ts

@@ -33,6 +33,8 @@ export interface NativeToolsOptions {
 	partialReadsEnabled?: boolean
 	partialReadsEnabled?: boolean
 	/** Maximum number of files that can be read in a single read_file request (default: 5) */
 	/** Maximum number of files that can be read in a single read_file request (default: 5) */
 	maxConcurrentFileReads?: number
 	maxConcurrentFileReads?: number
+	/** Whether the model supports image processing (default: false) */
+	supportsImages?: boolean
 }
 }
 
 
 /**
 /**
@@ -42,11 +44,12 @@ export interface NativeToolsOptions {
  * @returns Array of native tool definitions
  * @returns Array of native tool definitions
  */
  */
 export function getNativeTools(options: NativeToolsOptions = {}): OpenAI.Chat.ChatCompletionTool[] {
 export function getNativeTools(options: NativeToolsOptions = {}): OpenAI.Chat.ChatCompletionTool[] {
-	const { partialReadsEnabled = true, maxConcurrentFileReads = 5 } = options
+	const { partialReadsEnabled = true, maxConcurrentFileReads = 5, supportsImages = false } = options
 
 
 	const readFileOptions: ReadFileToolOptions = {
 	const readFileOptions: ReadFileToolOptions = {
 		partialReadsEnabled,
 		partialReadsEnabled,
 		maxConcurrentFileReads,
 		maxConcurrentFileReads,
+		supportsImages,
 	}
 	}
 
 
 	return [
 	return [

+ 17 - 3
src/core/prompts/tools/native-tools/read_file.ts

@@ -1,6 +1,17 @@
 import type OpenAI from "openai"
 import type OpenAI from "openai"
 
 
-const READ_FILE_SUPPORTS_NOTE = `Supports text extraction from PDF and DOCX files, but may not handle other binary files properly.`
+/**
+ * Generates the file support note, optionally including image format support.
+ *
+ * @param supportsImages - Whether the model supports image processing
+ * @returns Support note string
+ */
+function getReadFileSupportsNote(supportsImages: boolean): string {
+	if (supportsImages) {
+		return `Supports text extraction from PDF and DOCX files. Automatically processes and returns image files (PNG, JPG, JPEG, GIF, BMP, SVG, WEBP, ICO, AVIF) for visual analysis. May not handle other binary files properly.`
+	}
+	return `Supports text extraction from PDF and DOCX files, but may not handle other binary files properly.`
+}
 
 
 /**
 /**
  * Options for creating the read_file tool definition.
  * Options for creating the read_file tool definition.
@@ -10,6 +21,8 @@ export interface ReadFileToolOptions {
 	partialReadsEnabled?: boolean
 	partialReadsEnabled?: boolean
 	/** Maximum number of files that can be read in a single request (default: 5) */
 	/** Maximum number of files that can be read in a single request (default: 5) */
 	maxConcurrentFileReads?: number
 	maxConcurrentFileReads?: number
+	/** Whether the model supports image processing (default: false) */
+	supportsImages?: boolean
 }
 }
 
 
 /**
 /**
@@ -20,7 +33,7 @@ export interface ReadFileToolOptions {
  * @returns Native tool definition for read_file
  * @returns Native tool definition for read_file
  */
  */
 export function createReadFileTool(options: ReadFileToolOptions = {}): OpenAI.Chat.ChatCompletionTool {
 export function createReadFileTool(options: ReadFileToolOptions = {}): OpenAI.Chat.ChatCompletionTool {
-	const { partialReadsEnabled = true, maxConcurrentFileReads = 5 } = options
+	const { partialReadsEnabled = true, maxConcurrentFileReads = 5, supportsImages = false } = options
 	const isMultipleReadsEnabled = maxConcurrentFileReads > 1
 	const isMultipleReadsEnabled = maxConcurrentFileReads > 1
 
 
 	// Build description intro with concurrent reads limit message
 	// Build description intro with concurrent reads limit message
@@ -50,7 +63,8 @@ export function createReadFileTool(options: ReadFileToolOptions = {}): OpenAI.Ch
 				? `Example multiple files (within ${maxConcurrentFileReads}-file limit): { files: [{ path: 'file1.ts' }, { path: 'file2.ts' }] }`
 				? `Example multiple files (within ${maxConcurrentFileReads}-file limit): { files: [{ path: 'file1.ts' }, { path: 'file2.ts' }] }`
 				: "")
 				: "")
 
 
-	const description = baseDescription + optionalRangesDescription + READ_FILE_SUPPORTS_NOTE + " " + examples
+	const description =
+		baseDescription + optionalRangesDescription + getReadFileSupportsNote(supportsImages) + " " + examples
 
 
 	// Build the properties object conditionally
 	// Build the properties object conditionally
 	const fileProperties: Record<string, any> = {
 	const fileProperties: Record<string, any> = {

+ 4 - 0
src/core/task/build-tools.ts

@@ -64,10 +64,14 @@ export async function buildNativeToolsArray(options: BuildToolsOptions): Promise
 	// Determine if partial reads are enabled based on maxReadFileLine setting.
 	// Determine if partial reads are enabled based on maxReadFileLine setting.
 	const partialReadsEnabled = maxReadFileLine !== -1
 	const partialReadsEnabled = maxReadFileLine !== -1
 
 
+	// Check if the model supports images for read_file tool description.
+	const supportsImages = modelInfo?.supportsImages ?? false
+
 	// Build native tools with dynamic read_file tool based on settings.
 	// Build native tools with dynamic read_file tool based on settings.
 	const nativeTools = getNativeTools({
 	const nativeTools = getNativeTools({
 		partialReadsEnabled,
 		partialReadsEnabled,
 		maxConcurrentFileReads,
 		maxConcurrentFileReads,
+		supportsImages,
 	})
 	})
 
 
 	// Filter native tools based on mode restrictions.
 	// Filter native tools based on mode restrictions.