Просмотр исходного кода

feat(agent-manager): add image support

- Paste images from clipboard (Ctrl/Cmd+V) or select via file browser
- Works in new agent prompts, follow-up messages, and resumed sessions
- Support for PNG, JPEG, WebP, and GIF formats (up to 20 images)
- Click thumbnails to preview, hover to remove
- New `newTask` stdin message type for initial prompts with images
- Temp image files cleaned up when extension deactivates
Jérémy Beutin 3 недель назад
Родитель
Сommit
52ea3c5a91

+ 14 - 0
.changeset/agent-manager-image-paste.md

@@ -0,0 +1,14 @@
+---
+"kilo-code": minor
+"@kilocode/cli": minor
+"@kilocode/core-schemas": patch
+---
+
+Add image support to Agent Manager
+
+- Paste images from clipboard (Ctrl/Cmd+V) or select via file browser button
+- Works in new agent prompts, follow-up messages, and resumed sessions
+- Support for PNG, JPEG, WebP, and GIF formats (up to 20 images per message)
+- Click thumbnails to preview, hover to remove
+- New `newTask` stdin message type for initial prompts with images
+- Temp image files are automatically cleaned up when extension deactivates

+ 3 - 1
cli/src/index.ts

@@ -101,8 +101,10 @@ program
 		}
 		}
 
 
 		// Read from stdin if no prompt argument is provided and stdin is piped
 		// Read from stdin if no prompt argument is provided and stdin is piped
+		// BUT NOT in json-io mode, where stdin is used for bidirectional communication
+		// and the prompt will come via a "newTask" message
 		let finalPrompt = prompt || ""
 		let finalPrompt = prompt || ""
-		if (!finalPrompt && !process.stdin.isTTY) {
+		if (!finalPrompt && !process.stdin.isTTY && !options.jsonIo) {
 			// Read from stdin
 			// Read from stdin
 			const chunks: Buffer[] = []
 			const chunks: Buffer[] = []
 			for await (const chunk of process.stdin) {
 			for await (const chunk of process.stdin) {

+ 17 - 2
cli/src/state/hooks/useStdinJsonHandler.ts

@@ -6,7 +6,7 @@
 import { useEffect } from "react"
 import { useEffect } from "react"
 import { useSetAtom } from "jotai"
 import { useSetAtom } from "jotai"
 import { createInterface } from "readline"
 import { createInterface } from "readline"
-import { sendAskResponseAtom, cancelTaskAtom, respondToToolAtom } from "../atoms/actions.js"
+import { sendAskResponseAtom, sendTaskAtom, cancelTaskAtom, respondToToolAtom } from "../atoms/actions.js"
 import { logs } from "../../services/logs.js"
 import { logs } from "../../services/logs.js"
 
 
 export interface StdinMessage {
 export interface StdinMessage {
@@ -19,6 +19,7 @@ export interface StdinMessage {
 
 
 export interface StdinMessageHandlers {
 export interface StdinMessageHandlers {
 	sendAskResponse: (params: { response: "messageResponse"; text?: string; images?: string[] }) => Promise<void>
 	sendAskResponse: (params: { response: "messageResponse"; text?: string; images?: string[] }) => Promise<void>
+	sendTask: (params: { text: string; images?: string[] }) => Promise<void>
 	cancelTask: () => Promise<void>
 	cancelTask: () => Promise<void>
 	respondToTool: (params: {
 	respondToTool: (params: {
 		response: "yesButtonClicked" | "noButtonClicked"
 		response: "yesButtonClicked" | "noButtonClicked"
@@ -36,6 +37,16 @@ export async function handleStdinMessage(
 	handlers: StdinMessageHandlers,
 	handlers: StdinMessageHandlers,
 ): Promise<{ handled: boolean; error?: string }> {
 ): Promise<{ handled: boolean; error?: string }> {
 	switch (message.type) {
 	switch (message.type) {
+		case "newTask":
+			// Start a new task with prompt and optional images
+			// This allows the Agent Manager to send the initial prompt via stdin
+			// instead of CLI args, enabling images to be included with the first message
+			await handlers.sendTask({
+				text: message.text || "",
+				...(message.images !== undefined && { images: message.images }),
+			})
+			return { handled: true }
+
 		case "askResponse":
 		case "askResponse":
 			// Handle ask response (user message, approval response, etc.)
 			// Handle ask response (user message, approval response, etc.)
 			if (message.askResponse === "yesButtonClicked" || message.askResponse === "noButtonClicked") {
 			if (message.askResponse === "yesButtonClicked" || message.askResponse === "noButtonClicked") {
@@ -80,6 +91,7 @@ export async function handleStdinMessage(
 
 
 export function useStdinJsonHandler(enabled: boolean) {
 export function useStdinJsonHandler(enabled: boolean) {
 	const sendAskResponse = useSetAtom(sendAskResponseAtom)
 	const sendAskResponse = useSetAtom(sendAskResponseAtom)
+	const sendTask = useSetAtom(sendTaskAtom)
 	const cancelTask = useSetAtom(cancelTaskAtom)
 	const cancelTask = useSetAtom(cancelTaskAtom)
 	const respondToTool = useSetAtom(respondToToolAtom)
 	const respondToTool = useSetAtom(respondToToolAtom)
 
 
@@ -99,6 +111,9 @@ export function useStdinJsonHandler(enabled: boolean) {
 			sendAskResponse: async (params) => {
 			sendAskResponse: async (params) => {
 				await sendAskResponse(params)
 				await sendAskResponse(params)
 			},
 			},
+			sendTask: async (params) => {
+				await sendTask(params)
+			},
 			cancelTask: async () => {
 			cancelTask: async () => {
 				await cancelTask()
 				await cancelTask()
 			},
 			},
@@ -142,5 +157,5 @@ export function useStdinJsonHandler(enabled: boolean) {
 		return () => {
 		return () => {
 			rl.close()
 			rl.close()
 		}
 		}
-	}, [enabled, sendAskResponse, cancelTask, respondToTool])
+	}, [enabled, sendAskResponse, sendTask, cancelTask, respondToTool])
 }
 }

+ 1 - 0
packages/core-schemas/src/agent-manager/types.ts

@@ -81,6 +81,7 @@ export const startSessionMessageSchema = z.object({
 	model: z.string().optional(), // Model ID to use for this session
 	model: z.string().optional(), // Model ID to use for this session
 	versions: z.number().optional(), // Number of versions for multi-version mode
 	versions: z.number().optional(), // Number of versions for multi-version mode
 	labels: z.array(z.string()).optional(), // Labels for multi-version sessions
 	labels: z.array(z.string()).optional(), // Labels for multi-version sessions
+	images: z.array(z.string()).optional(), // Image data URLs to include with the prompt
 })
 })
 
 
 export const agentManagerMessageSchema = z.discriminatedUnion("type", [
 export const agentManagerMessageSchema = z.discriminatedUnion("type", [

+ 152 - 25
src/core/kilocode/agent-manager/AgentManagerProvider.ts

@@ -1,6 +1,8 @@
 import * as vscode from "vscode"
 import * as vscode from "vscode"
 import * as fs from "node:fs"
 import * as fs from "node:fs"
 import * as path from "node:path"
 import * as path from "node:path"
+import * as os from "node:os"
+import { randomUUID } from "node:crypto"
 import { t } from "i18next"
 import { t } from "i18next"
 import { AgentRegistry } from "./AgentRegistry"
 import { AgentRegistry } from "./AgentRegistry"
 import { renameMapKey } from "./mapUtils"
 import { renameMapKey } from "./mapUtils"
@@ -44,6 +46,21 @@ import { WorkspaceGitService } from "./WorkspaceGitService"
 import { SessionTerminalManager } from "./SessionTerminalManager"
 import { SessionTerminalManager } from "./SessionTerminalManager"
 import { fetchAvailableModels, type ModelsApiResponse } from "./CliModelsFetcher"
 import { fetchAvailableModels, type ModelsApiResponse } from "./CliModelsFetcher"
 import { startSessionMessageSchema, type StartSessionMessage } from "./types"
 import { startSessionMessageSchema, type StartSessionMessage } from "./types"
+import { openImage } from "../../../integrations/misc/image-handler"
+
+/**
+ * Message format for sending responses to the CLI via stdin.
+ * Used for user messages, approval responses, and other interactions.
+ */
+interface StdinAskResponseMessage {
+	type: "askResponse"
+	askResponse: "messageResponse" | "yesButtonClicked" | "noButtonClicked"
+	text: string
+	images?: string[]
+}
+
+/** Directory name for temporary image files */
+const TEMP_IMAGES_DIR = "kilo-code-agent-manager-images"
 
 
 /**
 /**
  * AgentManagerProvider
  * AgentManagerProvider
@@ -176,6 +193,84 @@ export class AgentManagerProvider implements vscode.Disposable {
 		})
 		})
 	}
 	}
 
 
+	/**
+	 * Save base64 data URL images to temp files and return file paths.
+	 * Images are saved to a temp directory and cleaned up when the extension deactivates.
+	 */
+	private async saveImagesToTempFiles(dataUrls: string[]): Promise<string[]> {
+		if (!dataUrls || dataUrls.length === 0) {
+			return []
+		}
+
+		const tempDir = path.join(os.tmpdir(), TEMP_IMAGES_DIR)
+
+		// Ensure temp directory exists
+		await fs.promises.mkdir(tempDir, { recursive: true })
+
+		const savedPaths: string[] = []
+
+		for (const dataUrl of dataUrls) {
+			try {
+				// Parse data URL: data:image/png;base64,<data>
+				const match = dataUrl.match(/^data:image\/(\w+);base64,(.+)$/)
+				if (!match) {
+					this.outputChannel.appendLine(`[AgentManager] Invalid image data URL format`)
+					continue
+				}
+
+				const [, format, base64Data] = match
+				const ext = format === "jpeg" ? "jpg" : format
+				const filename = `clipboard-${Date.now()}-${randomUUID().slice(0, 8)}.${ext}`
+				const filepath = path.join(tempDir, filename)
+
+				// Write the image file
+				const buffer = Buffer.from(base64Data, "base64")
+				await fs.promises.writeFile(filepath, buffer)
+
+				savedPaths.push(filepath)
+				this.outputChannel.appendLine(`[AgentManager] Saved image to temp file: ${filepath}`)
+			} catch (error) {
+				this.outputChannel.appendLine(`[AgentManager] Failed to save image: ${error}`)
+			}
+		}
+
+		return savedPaths
+	}
+
+	/**
+	 * Build a StdinAskResponseMessage with optional image support.
+	 * Handles saving images to temp files and attaching paths to the message.
+	 */
+	private async buildAskResponseMessage(content: string, images?: string[]): Promise<StdinAskResponseMessage> {
+		const message: StdinAskResponseMessage = {
+			type: "askResponse",
+			askResponse: "messageResponse",
+			text: content,
+		}
+
+		if (images && images.length > 0) {
+			const imagePaths = await this.saveImagesToTempFiles(images)
+			if (imagePaths.length > 0) {
+				message.images = imagePaths
+			}
+		}
+
+		return message
+	}
+
+	/**
+	 * Clean up temporary image files created during the session.
+	 */
+	private async cleanupTempImages(): Promise<void> {
+		const tempDir = path.join(os.tmpdir(), TEMP_IMAGES_DIR)
+		try {
+			await fs.promises.rm(tempDir, { recursive: true, force: true })
+			this.outputChannel.appendLine(`[AgentManager] Cleaned up temp images directory`)
+		} catch {
+			// Directory may not exist, ignore
+		}
+	}
+
 	/**
 	/**
 	 * Open or focus the Agent Manager panel
 	 * Open or focus the Agent Manager panel
 	 */
 	 */
@@ -265,6 +360,7 @@ export class AgentManagerProvider implements vscode.Disposable {
 						message.sessionId as string,
 						message.sessionId as string,
 						message.content as string,
 						message.content as string,
 						message.sessionLabel as string | undefined,
 						message.sessionLabel as string | undefined,
+						message.images as string[] | undefined,
 					)
 					)
 					break
 					break
 				case "agentManager.messageQueued":
 				case "agentManager.messageQueued":
@@ -273,10 +369,15 @@ export class AgentManagerProvider implements vscode.Disposable {
 						message.messageId as string,
 						message.messageId as string,
 						message.content as string,
 						message.content as string,
 						message.sessionLabel as string | undefined,
 						message.sessionLabel as string | undefined,
+						message.images as string[] | undefined,
 					)
 					)
 					break
 					break
 				case "agentManager.resumeSession":
 				case "agentManager.resumeSession":
-					void this.resumeSession(message.sessionId as string, message.content as string)
+					void this.resumeSession(
+						message.sessionId as string,
+						message.content as string,
+						message.images as string[] | undefined,
+					)
 					break
 					break
 				case "agentManager.cancelSession":
 				case "agentManager.cancelSession":
 					void this.cancelSession(message.sessionId as string)
 					void this.cancelSession(message.sessionId as string)
@@ -325,6 +426,10 @@ export class AgentManagerProvider implements vscode.Disposable {
 							vscode.window.showErrorMessage(`Failed to share session: ${errorMessage}`)
 							vscode.window.showErrorMessage(`Failed to share session: ${errorMessage}`)
 						})
 						})
 					break
 					break
+				case "openImage":
+					// Handle image click from ImageThumbnail component
+					void openImage(message.text as string)
+					break
 			}
 			}
 		} catch (error) {
 		} catch (error) {
 			this.outputChannel.appendLine(`Error handling message: ${error}`)
 			this.outputChannel.appendLine(`Error handling message: ${error}`)
@@ -349,7 +454,14 @@ export class AgentManagerProvider implements vscode.Disposable {
 		}
 		}
 
 
 		const validatedMessage: StartSessionMessage = parseResult.data
 		const validatedMessage: StartSessionMessage = parseResult.data
-		const { prompt, parallelMode = false, existingBranch, model } = validatedMessage
+		const { prompt, parallelMode = false, existingBranch, model, images } = validatedMessage
+
+		// Save images to temp files if provided
+		let imagePaths: string[] | undefined
+		if (images && images.length > 0) {
+			imagePaths = await this.saveImagesToTempFiles(images)
+			this.outputChannel.appendLine(`[AgentManager] Saved ${imagePaths.length} images for new session`)
+		}
 
 
 		// Clamp versions to valid range to prevent runaway process spawning
 		// Clamp versions to valid range to prevent runaway process spawning
 		const rawVersions = validatedMessage.versions ?? 1
 		const rawVersions = validatedMessage.versions ?? 1
@@ -369,12 +481,14 @@ export class AgentManagerProvider implements vscode.Disposable {
 				labelOverride: config.label,
 				labelOverride: config.label,
 				existingBranch: config.existingBranch,
 				existingBranch: config.existingBranch,
 				model,
 				model,
+				images: imagePaths,
 			})
 			})
 			return
 			return
 		}
 		}
 
 
 		// Multi-version mode: spawn sessions sequentially
 		// Multi-version mode: spawn sessions sequentially
 		// We need to wait for each pending session to clear before starting the next
 		// We need to wait for each pending session to clear before starting the next
+		// Note: Images are only sent to the first session in multi-version mode
 		this.outputChannel.appendLine(`[AgentManager] Starting ${configs.length} versions in multi-version mode`)
 		this.outputChannel.appendLine(`[AgentManager] Starting ${configs.length} versions in multi-version mode`)
 
 
 		for (let i = 0; i < configs.length; i++) {
 		for (let i = 0; i < configs.length; i++) {
@@ -386,6 +500,7 @@ export class AgentManagerProvider implements vscode.Disposable {
 				labelOverride: config.label,
 				labelOverride: config.label,
 				existingBranch: config.existingBranch,
 				existingBranch: config.existingBranch,
 				model,
 				model,
+				images: i === 0 ? imagePaths : undefined, // Only send images to first version
 			})
 			})
 
 
 			// Wait for the pending session to transition to active before spawning the next
 			// Wait for the pending session to transition to active before spawning the next
@@ -477,6 +592,7 @@ export class AgentManagerProvider implements vscode.Disposable {
 			labelOverride?: string
 			labelOverride?: string
 			existingBranch?: string
 			existingBranch?: string
 			model?: string
 			model?: string
+			images?: string[] // Image file paths to include with the initial prompt
 		},
 		},
 	): Promise<void> {
 	): Promise<void> {
 		if (!prompt) {
 		if (!prompt) {
@@ -536,6 +652,7 @@ export class AgentManagerProvider implements vscode.Disposable {
 				worktreeInfo,
 				worktreeInfo,
 				effectiveWorkspace,
 				effectiveWorkspace,
 				model: options?.model,
 				model: options?.model,
+				images: options?.images, // Images are sent with prompt via stdin newTask message
 			},
 			},
 			onSetupFailed,
 			onSetupFailed,
 		)
 		)
@@ -589,6 +706,7 @@ export class AgentManagerProvider implements vscode.Disposable {
 			worktreeInfo?: { branch: string; path: string; parentBranch: string }
 			worktreeInfo?: { branch: string; path: string; parentBranch: string }
 			effectiveWorkspace?: string
 			effectiveWorkspace?: string
 			model?: string
 			model?: string
+			images?: string[] // Image file paths to include with the initial prompt
 		},
 		},
 		onSetupFailed?: () => void,
 		onSetupFailed?: () => void,
 	): Promise<boolean> {
 	): Promise<boolean> {
@@ -984,12 +1102,8 @@ export class AgentManagerProvider implements vscode.Disposable {
 	/**
 	/**
 	 * Send a message to a session's stdin (for agent instructions)
 	 * Send a message to a session's stdin (for agent instructions)
 	 */
 	 */
-	private async sendMessageToStdin(sessionId: string, content: string): Promise<void> {
-		const message = {
-			type: "askResponse",
-			askResponse: "messageResponse",
-			text: content,
-		}
+	private async sendMessageToStdin(sessionId: string, content: string, images?: string[]): Promise<void> {
+		const message = await this.buildAskResponseMessage(content, images)
 		await this.processHandler.writeToStdin(sessionId, message)
 		await this.processHandler.writeToStdin(sessionId, message)
 	}
 	}
 
 
@@ -1010,19 +1124,19 @@ export class AgentManagerProvider implements vscode.Disposable {
 	/**
 	/**
 	 * Send a follow-up message to a running agent session via stdin.
 	 * Send a follow-up message to a running agent session via stdin.
 	 */
 	 */
-	public async sendMessage(sessionId: string, content: string, sessionLabel?: string): Promise<void> {
+	public async sendMessage(
+		sessionId: string,
+		content: string,
+		sessionLabel?: string,
+		images?: string[],
+	): Promise<void> {
 		if (!this.processHandler.hasStdin(sessionId)) {
 		if (!this.processHandler.hasStdin(sessionId)) {
 			// Session is not running - ignore the message
 			// Session is not running - ignore the message
 			this.outputChannel.appendLine(`[AgentManager] Session ${sessionId} not running, ignoring follow-up message`)
 			this.outputChannel.appendLine(`[AgentManager] Session ${sessionId} not running, ignoring follow-up message`)
 			return
 			return
 		}
 		}
 
 
-		const message = {
-			type: "askResponse",
-			askResponse: "messageResponse",
-			text: content,
-		}
-
+		const message = await this.buildAskResponseMessage(content, images)
 		await this.safeWriteToStdin(sessionId, message, "message")
 		await this.safeWriteToStdin(sessionId, message, "message")
 	}
 	}
 
 
@@ -1035,13 +1149,14 @@ export class AgentManagerProvider implements vscode.Disposable {
 		messageId: string,
 		messageId: string,
 		content: string,
 		content: string,
 		_sessionLabel?: string,
 		_sessionLabel?: string,
+		images?: string[],
 	): Promise<void> {
 	): Promise<void> {
 		// Validate the session and message prerequisites
 		// Validate the session and message prerequisites
 		const validationError = this.validateMessagePrerequisites(sessionId, messageId)
 		const validationError = this.validateMessagePrerequisites(sessionId, messageId)
 		if (validationError) return
 		if (validationError) return
 
 
 		// Attempt to send the message
 		// Attempt to send the message
-		await this.sendQueuedMessage(sessionId, messageId, content)
+		await this.sendQueuedMessage(sessionId, messageId, content, images)
 	}
 	}
 
 
 	/**
 	/**
@@ -1070,18 +1185,18 @@ export class AgentManagerProvider implements vscode.Disposable {
 	 * Send a validated queued message to the CLI.
 	 * Send a validated queued message to the CLI.
 	 * Handles marking as sending, actual send, and error handling.
 	 * Handles marking as sending, actual send, and error handling.
 	 */
 	 */
-	private async sendQueuedMessage(sessionId: string, messageId: string, content: string): Promise<void> {
+	private async sendQueuedMessage(
+		sessionId: string,
+		messageId: string,
+		content: string,
+		images?: string[],
+	): Promise<void> {
 		// Mark as sending
 		// Mark as sending
 		this.sendingMessageMap.set(sessionId, messageId)
 		this.sendingMessageMap.set(sessionId, messageId)
 		this.notifyMessageStatus(sessionId, messageId, "sending")
 		this.notifyMessageStatus(sessionId, messageId, "sending")
 
 
 		try {
 		try {
-			const message = {
-				type: "askResponse",
-				askResponse: "messageResponse",
-				text: content,
-			}
-
+			const message = await this.buildAskResponseMessage(content, images)
 			await this.safeWriteToStdin(sessionId, message, "message")
 			await this.safeWriteToStdin(sessionId, message, "message")
 			this.log(sessionId, `Message ${messageId} sent successfully`)
 			this.log(sessionId, `Message ${messageId} sent successfully`)
 			this.notifyMessageStatus(sessionId, messageId, "sent")
 			this.notifyMessageStatus(sessionId, messageId, "sent")
@@ -1116,7 +1231,7 @@ export class AgentManagerProvider implements vscode.Disposable {
 	/**
 	/**
 	 * Resume a completed session by spawning a new CLI process with --session flag.
 	 * Resume a completed session by spawning a new CLI process with --session flag.
 	 */
 	 */
-	public async resumeSession(sessionId: string, content: string): Promise<void> {
+	public async resumeSession(sessionId: string, content: string, images?: string[]): Promise<void> {
 		const session = this.registry.getSession(sessionId)
 		const session = this.registry.getSession(sessionId)
 		if (!session) {
 		if (!session) {
 			this.outputChannel.appendLine(`[AgentManager] Session ${sessionId} not found, cannot resume`)
 			this.outputChannel.appendLine(`[AgentManager] Session ${sessionId} not found, cannot resume`)
@@ -1125,10 +1240,17 @@ export class AgentManagerProvider implements vscode.Disposable {
 
 
 		// If session is still running, send as regular message instead
 		// If session is still running, send as regular message instead
 		if (this.processHandler.hasStdin(sessionId)) {
 		if (this.processHandler.hasStdin(sessionId)) {
-			await this.sendMessage(sessionId, content)
+			await this.sendMessage(sessionId, content, undefined, images)
 			return
 			return
 		}
 		}
 
 
+		// Save images to temp files if provided (for the initial prompt via stdin newTask)
+		let imagePaths: string[] | undefined
+		if (images && images.length > 0) {
+			imagePaths = await this.saveImagesToTempFiles(images)
+			this.outputChannel.appendLine(`[AgentManager] Saved ${imagePaths.length} images for resumed session`)
+		}
+
 		this.outputChannel.appendLine(`[AgentManager] Resuming session ${sessionId} with new prompt`)
 		this.outputChannel.appendLine(`[AgentManager] Resuming session ${sessionId} with new prompt`)
 
 
 		// Handle parallel mode session resumption
 		// Handle parallel mode session resumption
@@ -1141,6 +1263,7 @@ export class AgentManagerProvider implements vscode.Disposable {
 					gitUrl: session.gitUrl,
 					gitUrl: session.gitUrl,
 					worktreeInfo,
 					worktreeInfo,
 					effectiveWorkspace: worktreeInfo.path,
 					effectiveWorkspace: worktreeInfo.path,
+					images: imagePaths, // Images sent with prompt via stdin newTask message
 				})
 				})
 				return
 				return
 			}
 			}
@@ -1152,6 +1275,7 @@ export class AgentManagerProvider implements vscode.Disposable {
 			sessionId, // This triggers --session=<id> flag
 			sessionId, // This triggers --session=<id> flag
 			parallelMode: session.parallelMode?.enabled,
 			parallelMode: session.parallelMode?.enabled,
 			gitUrl: session.gitUrl,
 			gitUrl: session.gitUrl,
+			images: imagePaths, // Images sent with prompt via stdin newTask message
 		})
 		})
 	}
 	}
 
 
@@ -1496,6 +1620,9 @@ export class AgentManagerProvider implements vscode.Disposable {
 		this.sessionMessages.clear()
 		this.sessionMessages.clear()
 		this.firstApiReqStarted.clear()
 		this.firstApiReqStarted.clear()
 
 
+		// Clean up temporary image files
+		void this.cleanupTempImages()
+
 		this.panel?.dispose()
 		this.panel?.dispose()
 		this.disposables.forEach((d) => d.dispose())
 		this.disposables.forEach((d) => d.dispose())
 	}
 	}

+ 6 - 2
src/core/kilocode/agent-manager/CliArgsBuilder.ts

@@ -1,6 +1,8 @@
 export interface BuildCliArgsOptions {
 export interface BuildCliArgsOptions {
 	sessionId?: string
 	sessionId?: string
 	model?: string
 	model?: string
+	/** When true, prompt will be sent via stdin (for multimodal messages with images) */
+	promptViaStdin?: boolean
 }
 }
 
 
 /**
 /**
@@ -22,9 +24,11 @@ export function buildCliArgs(workspace: string, prompt: string, options?: BuildC
 		args.push(`--session=${options.sessionId}`)
 		args.push(`--session=${options.sessionId}`)
 	}
 	}
 
 
-	// Only add prompt if non-empty
+	// Only add prompt if non-empty and not being sent via stdin
 	// When resuming with --session, an empty prompt means "continue from where we left off"
 	// When resuming with --session, an empty prompt means "continue from where we left off"
-	if (prompt) {
+	// When promptViaStdin is true, prompt will be sent as a newTask message via stdin
+	// (used for multimodal messages with images)
+	if (prompt && !options?.promptViaStdin) {
 		args.push(prompt)
 		args.push(prompt)
 	}
 	}
 
 

+ 48 - 0
src/core/kilocode/agent-manager/CliProcessHandler.ts

@@ -28,6 +28,12 @@ const PENDING_SESSION_TIMEOUT_MS = 30_000
  */
  */
 const MAX_STDOUT_BUFFER_SIZE = 64 * 1024
 const MAX_STDOUT_BUFFER_SIZE = 64 * 1024
 
 
+/**
+ * Delay before sending initial prompt with images via stdin (ms).
+ * Allows the CLI process to initialize and be ready to receive stdin input.
+ */
+const STDIN_READY_DELAY_MS = 100
+
 /**
 /**
  * Tracks a pending session while waiting for CLI's session_created event.
  * Tracks a pending session while waiting for CLI's session_created event.
  * Note: This is only used for NEW sessions. Resume sessions go directly to activeSessions.
  * Note: This is only used for NEW sessions. Resume sessions go directly to activeSessions.
@@ -54,6 +60,7 @@ interface PendingProcessInfo {
 	cliPath?: string // CLI path for error telemetry
 	cliPath?: string // CLI path for error telemetry
 	configurationError?: string // Captured from welcome event instructions (indicates misconfigured CLI)
 	configurationError?: string // Captured from welcome event instructions (indicates misconfigured CLI)
 	model?: string // Model ID used for this session
 	model?: string // Model ID used for this session
+	images?: string[] // Image paths to send with initial prompt via stdin
 }
 }
 
 
 interface ActiveProcessInfo {
 interface ActiveProcessInfo {
@@ -154,6 +161,8 @@ export class CliProcessHandler {
 					worktreeInfo?: { branch: string; path: string; parentBranch: string }
 					worktreeInfo?: { branch: string; path: string; parentBranch: string }
 					/** Model ID to use for this session (overrides CLI default) */
 					/** Model ID to use for this session (overrides CLI default) */
 					model?: string
 					model?: string
+					/** Image paths to send with initial prompt (requires prompt to be sent via stdin) */
+					images?: string[]
 			  }
 			  }
 			| undefined,
 			| undefined,
 		onCliEvent: (sessionId: string, event: StreamEvent) => void,
 		onCliEvent: (sessionId: string, event: StreamEvent) => void,
@@ -191,9 +200,12 @@ export class CliProcessHandler {
 		// Build CLI command
 		// Build CLI command
 		// Note: Worktree/parallel mode is handled by AgentManagerProvider creating the worktree
 		// Note: Worktree/parallel mode is handled by AgentManagerProvider creating the worktree
 		// and passing the worktree path as the workspace. CLI is unaware of worktrees.
 		// and passing the worktree path as the workspace. CLI is unaware of worktrees.
+		// When images are present, prompt is sent via stdin (as newTask message) instead of CLI args
+		const hasImages = options?.images && options.images.length > 0
 		const cliArgs = buildCliArgs(workspace, prompt, {
 		const cliArgs = buildCliArgs(workspace, prompt, {
 			sessionId: options?.sessionId,
 			sessionId: options?.sessionId,
 			model: options?.model,
 			model: options?.model,
+			promptViaStdin: hasImages,
 		})
 		})
 		const env = this.buildEnvWithApiConfiguration(options?.apiConfiguration, options?.shellPath)
 		const env = this.buildEnvWithApiConfiguration(options?.apiConfiguration, options?.shellPath)
 
 
@@ -258,6 +270,7 @@ export class CliProcessHandler {
 				hadShellPath: !!options?.shellPath, // Track for telemetry
 				hadShellPath: !!options?.shellPath, // Track for telemetry
 				cliPath,
 				cliPath,
 				model: options?.model,
 				model: options?.model,
+				images: options?.images, // Store images to send with prompt via stdin
 			}
 			}
 		}
 		}
 
 
@@ -303,6 +316,41 @@ export class CliProcessHandler {
 		})
 		})
 
 
 		this.debugLog(`spawned CLI process pid=${proc.pid}`)
 		this.debugLog(`spawned CLI process pid=${proc.pid}`)
+
+		// If images are present, send the initial prompt with images via stdin
+		// This is done here because the prompt was not passed as CLI arg
+		if (hasImages && proc.stdin) {
+			this.sendInitialPromptWithImages(proc, prompt, options!.images!)
+		}
+	}
+
+	/**
+	 * Send the initial prompt with images via stdin as a newTask message.
+	 * This is used when images are present, since images can't be passed via CLI args.
+	 */
+	private sendInitialPromptWithImages(proc: ChildProcess, prompt: string, images: string[]): void {
+		// Small delay to ensure CLI is ready to receive stdin
+		setTimeout(() => {
+			if (!proc.stdin || proc.killed) {
+				this.callbacks.onLog(`Cannot send initial prompt: process stdin not available`)
+				return
+			}
+
+			const message = {
+				type: "newTask",
+				text: prompt,
+				images,
+			}
+
+			const jsonLine = JSON.stringify(message) + "\n"
+			proc.stdin.write(jsonLine, (error) => {
+				if (error) {
+					this.callbacks.onLog(`Failed to send initial prompt with images: ${error.message}`)
+				} else {
+					this.debugLog(`Sent initial prompt with ${images.length} images via stdin`)
+				}
+			})
+		}, STDIN_READY_DELAY_MS)
 	}
 	}
 
 
 	public stopProcess(sessionId: string): void {
 	public stopProcess(sessionId: string): void {

+ 3 - 1
webview-ui/src/i18n/locales/en/agentManager.json

@@ -86,7 +86,9 @@
 		"createPRTitle": "Create pull request",
 		"createPRTitle": "Create pull request",
 		"sendTitle": "Send message",
 		"sendTitle": "Send message",
 		"resumeTitle": "Resume agent",
 		"resumeTitle": "Resume agent",
-		"hint": "Press Enter to send, Shift+Enter for new line"
+		"hint": "Press Enter to send, Shift+Enter for new line. Paste images with Ctrl/Cmd+V",
+		"addImage": "Add image",
+		"removeImage": "Remove image"
 	},
 	},
 	"status": {
 	"status": {
 		"running": "running",
 		"running": "running",

+ 64 - 0
webview-ui/src/kilocode/agent-manager/components/AddImageButton.tsx

@@ -0,0 +1,64 @@
+import React from "react"
+import { useTranslation } from "react-i18next"
+import { ImageIcon } from "lucide-react"
+import { cn } from "../../../lib/utils"
+import { StandardTooltip } from "../../../components/ui"
+
+interface AddImageButtonProps {
+	onClick: () => void
+	onFileSelect: (e: React.ChangeEvent<HTMLInputElement>) => void
+	fileInputRef: React.RefObject<HTMLInputElement>
+	acceptedMimeTypes: string
+	disabled?: boolean
+}
+
+/**
+ * Reusable button for adding images from file browser.
+ * Used in ChatInput and NewAgentForm.
+ * Includes a hidden file input that opens on button click.
+ */
+export const AddImageButton: React.FC<AddImageButtonProps> = ({
+	onClick,
+	onFileSelect,
+	fileInputRef,
+	acceptedMimeTypes,
+	disabled = false,
+}) => {
+	const { t } = useTranslation("agentManager")
+
+	return (
+		<>
+			<input
+				ref={fileInputRef}
+				type="file"
+				accept={acceptedMimeTypes}
+				multiple
+				onChange={onFileSelect}
+				className="hidden"
+				aria-hidden="true"
+			/>
+			<StandardTooltip content={t("chatInput.addImage")}>
+				<button
+					type="button"
+					aria-label={t("chatInput.addImage")}
+					disabled={disabled}
+					onClick={onClick}
+					className={cn(
+						"relative inline-flex items-center justify-center",
+						"bg-transparent border-none p-1.5",
+						"rounded-md min-w-[28px] min-h-[28px]",
+						"opacity-60 hover:opacity-100 text-vscode-descriptionForeground hover:text-vscode-foreground",
+						"transition-all duration-150",
+						"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
+						"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
+						"active:bg-[rgba(255,255,255,0.1)]",
+						!disabled && "cursor-pointer",
+						disabled &&
+							"opacity-40 cursor-not-allowed grayscale-[30%] hover:bg-transparent hover:border-[rgba(255,255,255,0.08)] active:bg-transparent",
+					)}>
+					<ImageIcon size={14} />
+				</button>
+			</StandardTooltip>
+		</>
+	)
+}

+ 154 - 137
webview-ui/src/kilocode/agent-manager/components/ChatInput.tsx

@@ -8,10 +8,13 @@ import DynamicTextArea from "react-textarea-autosize"
 import { cn } from "../../../lib/utils"
 import { cn } from "../../../lib/utils"
 import { StandardTooltip } from "../../../components/ui"
 import { StandardTooltip } from "../../../components/ui"
 import { SelectDropdown, type DropdownOption } from "../../../components/ui/select-dropdown"
 import { SelectDropdown, type DropdownOption } from "../../../components/ui/select-dropdown"
-import { sessionInputAtomFamily } from "../state/atoms/sessions"
+import { sessionInputAtomFamily, sessionImagesAtomFamily } from "../state/atoms/sessions"
 import { sessionTodoStatsAtomFamily } from "../state/atoms/todos"
 import { sessionTodoStatsAtomFamily } from "../state/atoms/todos"
 import { AgentTodoList } from "./AgentTodoList"
 import { AgentTodoList } from "./AgentTodoList"
 import { addToQueueAtom } from "../state/atoms/messageQueue"
 import { addToQueueAtom } from "../state/atoms/messageQueue"
+import { useImagePaste } from "../hooks/useImagePaste"
+import { ImageThumbnail } from "./ImageThumbnail"
+import { AddImageButton } from "./AddImageButton"
 
 
 interface ChatInputProps {
 interface ChatInputProps {
 	sessionId: string
 	sessionId: string
@@ -40,6 +43,7 @@ export const ChatInput: React.FC<ChatInputProps> = ({
 }) => {
 }) => {
 	const { t } = useTranslation("agentManager")
 	const { t } = useTranslation("agentManager")
 	const [messageText, setMessageText] = useAtom(sessionInputAtomFamily(sessionId))
 	const [messageText, setMessageText] = useAtom(sessionInputAtomFamily(sessionId))
+	const [selectedImages, setSelectedImages] = useAtom(sessionImagesAtomFamily(sessionId))
 	const todoStats = useAtomValue(sessionTodoStatsAtomFamily(sessionId))
 	const todoStats = useAtomValue(sessionTodoStatsAtomFamily(sessionId))
 	const modelsConfig = useAtomValue(modelsConfigAtom)
 	const modelsConfig = useAtomValue(modelsConfigAtom)
 	const [isFocused, setIsFocused] = useState(false)
 	const [isFocused, setIsFocused] = useState(false)
@@ -62,29 +66,43 @@ export const ChatInput: React.FC<ChatInputProps> = ({
 	}, [sessionId])
 	}, [sessionId])
 
 
 	const trimmedMessage = messageText.trim()
 	const trimmedMessage = messageText.trim()
-	const isEmpty = trimmedMessage.length === 0
+	const hasText = trimmedMessage.length > 0
+	const hasImages = selectedImages.length > 0
+	const isEmpty = !hasText && !hasImages
 	const isSessionCompleted = sessionStatus === "done" || sessionStatus === "error" || sessionStatus === "stopped"
 	const isSessionCompleted = sessionStatus === "done" || sessionStatus === "error" || sessionStatus === "stopped"
 
 
-	// Send is disabled when empty
+	// Send is disabled when empty (no text AND no images)
 	// Note: Users CAN queue multiple messages while one is sending (for running sessions)
 	// Note: Users CAN queue multiple messages while one is sending (for running sessions)
 	// Note: Users CAN send messages to completed sessions (to resume them)
 	// Note: Users CAN send messages to completed sessions (to resume them)
 	const sendDisabled = isEmpty
 	const sendDisabled = isEmpty
 
 
+	// Use shared hook for image paste and file selection handling
+	const { handlePaste, handleFileSelect, openFileBrowser, removeImage, canAddMore, fileInputRef, acceptedMimeTypes } =
+		useImagePaste({
+			selectedImages,
+			setSelectedImages,
+		})
+
 	const handleSend = () => {
 	const handleSend = () => {
 		if (isEmpty) return
 		if (isEmpty) return
 
 
+		// Prepare images array (only include if there are images)
+		const images = hasImages ? selectedImages : undefined
+
 		if (isSessionCompleted) {
 		if (isSessionCompleted) {
 			// Resume a completed session with a new message (sent directly, not queued)
 			// Resume a completed session with a new message (sent directly, not queued)
 			vscode.postMessage({
 			vscode.postMessage({
 				type: "agentManager.resumeSession",
 				type: "agentManager.resumeSession",
 				sessionId,
 				sessionId,
 				sessionLabel,
 				sessionLabel,
-				content: trimmedMessage,
+				content: trimmedMessage || "", // Can be empty if only images
+				images,
 			})
 			})
 			setMessageText("")
 			setMessageText("")
+			setSelectedImages([])
 		} else {
 		} else {
 			// For running sessions, queue the message instead of sending directly
 			// For running sessions, queue the message instead of sending directly
-			const queuedMsg = addToQueue({ sessionId, content: trimmedMessage })
+			const queuedMsg = addToQueue({ sessionId, content: trimmedMessage || "", images })
 
 
 			if (queuedMsg) {
 			if (queuedMsg) {
 				// Notify the extension that a message has been queued
 				// Notify the extension that a message has been queued
@@ -93,10 +111,12 @@ export const ChatInput: React.FC<ChatInputProps> = ({
 					sessionId,
 					sessionId,
 					messageId: queuedMsg.id,
 					messageId: queuedMsg.id,
 					sessionLabel,
 					sessionLabel,
-					content: trimmedMessage,
+					content: trimmedMessage || "",
+					images,
 				})
 				})
 
 
 				setMessageText("")
 				setMessageText("")
+				setSelectedImages([])
 			}
 			}
 		}
 		}
 	}
 	}
@@ -192,25 +212,24 @@ If any step fails, ask the user for help.`
 
 
 	return (
 	return (
 		<div className="am-chat-input-container">
 		<div className="am-chat-input-container">
-			{/* Unified wrapper when todos present - handles border and focus state */}
+			{/* Unified wrapper - handles border and focus state for textarea + toolbar */}
 			<div
 			<div
 				className={cn(
 				className={cn(
 					"relative flex-1 flex flex-col min-h-0 overflow-hidden rounded",
 					"relative flex-1 flex flex-col min-h-0 overflow-hidden rounded",
-					hasTodos && [
-						"border bg-vscode-input-background",
-						isFocused
-							? "border-vscode-focusBorder outline outline-vscode-focusBorder"
-							: "border-vscode-input-border",
-					],
+					"border bg-vscode-input-background",
+					isFocused
+						? "border-vscode-focusBorder outline outline-vscode-focusBorder"
+						: "border-vscode-input-border",
 				)}>
 				)}>
 				{/* Todo list above input */}
 				{/* Todo list above input */}
 				{hasTodos && <AgentTodoList stats={todoStats} isIntegrated />}
 				{hasTodos && <AgentTodoList stats={todoStats} isIntegrated />}
-				<div className={cn("relative", "flex-1", "flex", "flex-col-reverse", "min-h-0", "overflow-hidden")}>
+				<div className={cn("relative", "flex-1", "flex", "flex-col", "min-h-0", "overflow-visible")}>
 					<DynamicTextArea
 					<DynamicTextArea
 						ref={textareaRef}
 						ref={textareaRef}
 						value={messageText}
 						value={messageText}
 						onChange={(e) => setMessageText(e.target.value)}
 						onChange={(e) => setMessageText(e.target.value)}
 						onKeyDown={handleKeyDown}
 						onKeyDown={handleKeyDown}
+						onPaste={handlePaste}
 						onFocus={() => setIsFocused(true)}
 						onFocus={() => setIsFocused(true)}
 						onBlur={() => setIsFocused(false)}
 						onBlur={() => setIsFocused(false)}
 						aria-label={t("chatInput.ariaLabel")}
 						aria-label={t("chatInput.ariaLabel")}
@@ -224,86 +243,139 @@ If any step fails, ask the user for help.`
 							"text-vscode-editor-font-size",
 							"text-vscode-editor-font-size",
 							"leading-vscode-editor-line-height",
 							"leading-vscode-editor-line-height",
 							"cursor-text",
 							"cursor-text",
-							"!pt-3 !pl-3 pr-9",
-							// Only show border when no todos (standalone mode)
-							!hasTodos && [
-								isFocused
-									? "border border-vscode-focusBorder outline outline-vscode-focusBorder"
-									: "border border-vscode-input-border",
-								"rounded",
-							],
+							"!pt-3 !pl-3 pr-3 !pb-2",
 							"bg-vscode-input-background",
 							"bg-vscode-input-background",
+							"!border-none !outline-none",
+							"focus:!border-none focus:!outline-none focus:ring-0",
+							"focus-visible:!border-none focus-visible:!outline-none focus-visible:ring-0",
 							"transition-background-color duration-150 ease-in-out",
 							"transition-background-color duration-150 ease-in-out",
 							"will-change-background-color",
 							"will-change-background-color",
-							"min-h-[90px]",
+							"min-h-[70px]",
 							"box-border",
 							"box-border",
 							"resize-none",
 							"resize-none",
 							"overflow-x-hidden",
 							"overflow-x-hidden",
 							"overflow-y-auto",
 							"overflow-y-auto",
-							"!pb-10",
 							"flex-none flex-grow",
 							"flex-none flex-grow",
-							"z-[2]",
 							"scrollbar-none",
 							"scrollbar-none",
 							"scrollbar-hide",
 							"scrollbar-hide",
 						)}
 						)}
 					/>
 					/>
 
 
-					{/* Transparent overlay at bottom */}
-					<div
-						className="absolute bottom-[1px] left-2 right-2 h-10 bg-gradient-to-t from-[var(--vscode-input-background)] via-[var(--vscode-input-background)] to-transparent pointer-events-none z-[2]"
-						aria-hidden="true"
-					/>
+					{/* Bottom toolbar - inside the bordered container */}
+					<div className="flex items-center justify-between px-3 pb-2 bg-vscode-input-background">
+						{/* Left side: Image thumbnails and hint */}
+						<div className="flex items-center gap-2 min-w-0 flex-1">
+							{selectedImages.length > 0 && (
+								<div className="flex items-center gap-1.5 flex-shrink-0">
+									{selectedImages.map((image, index) => (
+										<ImageThumbnail key={index} src={image} index={index} onRemove={removeImage} />
+									))}
+								</div>
+							)}
+							{!messageText && !hasImages && (
+								<div className="overflow-hidden text-ellipsis whitespace-nowrap text-[11px] text-vscode-descriptionForeground opacity-70 select-none">
+									{t("chatInput.hint")}
+								</div>
+							)}
+						</div>
 
 
-					{/* Floating Actions */}
-					<div className="absolute bottom-2 right-2 z-30 flex items-center gap-1">
-						{/* Model indicator (read-only) - disabled SelectDropdown for visual consistency */}
-						{modelId && modelOptions.length > 0 && (
-							<div className="am-model-selector mr-1">
-								<SelectDropdown
-									value={modelId}
-									options={modelOptions}
-									onChange={() => {}} // No-op since disabled
-									disabled={true}
-									triggerClassName="am-model-selector-trigger"
-									contentClassName="am-model-selector-content"
-									align="end"
-								/>
-							</div>
-						)}
-						{showFinishToBranch && (
-							<StandardTooltip
-								content={
-									worktreeBranchName
-										? t("chatInput.finishToBranchTitle", { branch: worktreeBranchName })
-										: t("chatInput.finishToBranchTitleNoBranch")
-								}>
-								<button
-									aria-label={
+						{/* Right side: Actions */}
+						<div className="flex items-center gap-1 flex-shrink-0">
+							<AddImageButton
+								onClick={openFileBrowser}
+								onFileSelect={handleFileSelect}
+								fileInputRef={fileInputRef}
+								acceptedMimeTypes={acceptedMimeTypes}
+								disabled={!canAddMore}
+							/>
+							{/* Model indicator (read-only) - disabled SelectDropdown for visual consistency */}
+							{modelId && modelOptions.length > 0 && (
+								<div className="am-model-selector mr-1">
+									<SelectDropdown
+										value={modelId}
+										options={modelOptions}
+										onChange={() => {}} // No-op since disabled
+										disabled={true}
+										triggerClassName="am-model-selector-trigger"
+										contentClassName="am-model-selector-content"
+										align="end"
+									/>
+								</div>
+							)}
+							{showFinishToBranch && (
+								<StandardTooltip
+									content={
 										worktreeBranchName
 										worktreeBranchName
 											? t("chatInput.finishToBranchTitle", { branch: worktreeBranchName })
 											? t("chatInput.finishToBranchTitle", { branch: worktreeBranchName })
 											: t("chatInput.finishToBranchTitleNoBranch")
 											: t("chatInput.finishToBranchTitleNoBranch")
-									}
-									onClick={handleFinishToBranch}
-									className={cn(
-										"relative inline-flex items-center justify-center",
-										"bg-transparent border-none p-1.5",
-										"rounded-md min-w-[28px] min-h-[28px]",
-										"opacity-60 hover:opacity-100 text-vscode-descriptionForeground hover:text-vscode-foreground",
-										"transition-all duration-150",
-										"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
-										"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
-										"active:bg-[rgba(255,255,255,0.1)]",
-										"cursor-pointer",
-									)}>
-									<GitBranch size={14} />
-								</button>
-							</StandardTooltip>
-						)}
-						{showCreatePR && (
-							<StandardTooltip content={t("chatInput.createPRTitle")}>
+									}>
+									<button
+										aria-label={
+											worktreeBranchName
+												? t("chatInput.finishToBranchTitle", { branch: worktreeBranchName })
+												: t("chatInput.finishToBranchTitleNoBranch")
+										}
+										onClick={handleFinishToBranch}
+										className={cn(
+											"relative inline-flex items-center justify-center",
+											"bg-transparent border-none p-1.5",
+											"rounded-md min-w-[28px] min-h-[28px]",
+											"opacity-60 hover:opacity-100 text-vscode-descriptionForeground hover:text-vscode-foreground",
+											"transition-all duration-150",
+											"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
+											"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
+											"active:bg-[rgba(255,255,255,0.1)]",
+											"cursor-pointer",
+										)}>
+										<GitBranch size={14} />
+									</button>
+								</StandardTooltip>
+							)}
+							{showCreatePR && (
+								<StandardTooltip content={t("chatInput.createPRTitle")}>
+									<button
+										aria-label={t("chatInput.createPRTitle")}
+										onClick={handleCreatePR}
+										className={cn(
+											"relative inline-flex items-center justify-center",
+											"bg-transparent border-none p-1.5",
+											"rounded-md min-w-[28px] min-h-[28px]",
+											"opacity-60 hover:opacity-100 text-vscode-descriptionForeground hover:text-vscode-foreground",
+											"transition-all duration-150",
+											"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
+											"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
+											"active:bg-[rgba(255,255,255,0.1)]",
+											"cursor-pointer",
+										)}>
+										<GitPullRequest size={14} />
+									</button>
+								</StandardTooltip>
+							)}
+							{isActive && showCancel && (
+								<StandardTooltip content={t("chatInput.cancelTitle")}>
+									<button
+										aria-label={t("chatInput.cancelTitle")}
+										onClick={handleCancel}
+										className={cn(
+											"relative inline-flex items-center justify-center",
+											"bg-transparent border-none p-1.5",
+											"rounded-md min-w-[28px] min-h-[28px]",
+											"opacity-60 hover:opacity-100 text-vscode-errorForeground",
+											"transition-all duration-150",
+											"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
+											"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
+											"active:bg-[rgba(255,255,255,0.1)]",
+											"cursor-pointer",
+										)}>
+										<Square size={14} fill="currentColor" />
+									</button>
+								</StandardTooltip>
+							)}
+							<StandardTooltip content={t("chatInput.sendTitle")}>
 								<button
 								<button
-									aria-label={t("chatInput.createPRTitle")}
-									onClick={handleCreatePR}
+									aria-label={t("chatInput.sendTitle")}
+									disabled={sendDisabled}
+									onClick={handleSend}
 									className={cn(
 									className={cn(
 										"relative inline-flex items-center justify-center",
 										"relative inline-flex items-center justify-center",
 										"bg-transparent border-none p-1.5",
 										"bg-transparent border-none p-1.5",
@@ -313,71 +385,16 @@ If any step fails, ask the user for help.`
 										"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
 										"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
 										"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
 										"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
 										"active:bg-[rgba(255,255,255,0.1)]",
 										"active:bg-[rgba(255,255,255,0.1)]",
-										"cursor-pointer",
-									)}>
-									<GitPullRequest size={14} />
-								</button>
-							</StandardTooltip>
-						)}
-						{isActive && showCancel && (
-							<StandardTooltip content={t("chatInput.cancelTitle")}>
-								<button
-									aria-label={t("chatInput.cancelTitle")}
-									onClick={handleCancel}
-									className={cn(
-										"relative inline-flex items-center justify-center",
-										"bg-transparent border-none p-1.5",
-										"rounded-md min-w-[28px] min-h-[28px]",
-										"opacity-60 hover:opacity-100 text-vscode-errorForeground",
-										"transition-all duration-150",
-										"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
-										"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
-										"active:bg-[rgba(255,255,255,0.1)]",
-										"cursor-pointer",
+										!sendDisabled && "cursor-pointer",
+										sendDisabled &&
+											"opacity-40 cursor-not-allowed grayscale-[30%] hover:bg-transparent hover:border-[rgba(255,255,255,0.08)] active:bg-transparent",
 									)}>
 									)}>
-									<Square size={14} fill="currentColor" />
+									{/* rtl support */}
+									<SendHorizontal className="w-4 h-4 rtl:-scale-x-100" />
 								</button>
 								</button>
 							</StandardTooltip>
 							</StandardTooltip>
-						)}
-						<StandardTooltip content={t("chatInput.sendTitle")}>
-							<button
-								aria-label={t("chatInput.sendTitle")}
-								disabled={sendDisabled}
-								onClick={handleSend}
-								className={cn(
-									"relative inline-flex items-center justify-center",
-									"bg-transparent border-none p-1.5",
-									"rounded-md min-w-[28px] min-h-[28px]",
-									"opacity-60 hover:opacity-100 text-vscode-descriptionForeground hover:text-vscode-foreground",
-									"transition-all duration-150",
-									"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
-									"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
-									"active:bg-[rgba(255,255,255,0.1)]",
-									!sendDisabled && "cursor-pointer",
-									sendDisabled &&
-										"opacity-40 cursor-not-allowed grayscale-[30%] hover:bg-transparent hover:border-[rgba(255,255,255,0.08)] active:bg-transparent",
-								)}>
-								{/* rtl support */}
-								<SendHorizontal className="w-4 h-4 rtl:-scale-x-100" />
-							</button>
-						</StandardTooltip>
-					</div>
-
-					{/* Hint Text inside input */}
-					{!messageText && (
-						<div
-							className="absolute left-3 right-[100px] z-30 flex items-center h-8 overflow-hidden text-ellipsis whitespace-nowrap"
-							style={{
-								bottom: "0.25rem",
-								color: "var(--vscode-descriptionForeground)",
-								opacity: 0.7,
-								fontSize: "11px",
-								userSelect: "none",
-								pointerEvents: "none",
-							}}>
-							{t("chatInput.hint")}
 						</div>
 						</div>
-					)}
+					</div>
 				</div>
 				</div>
 			</div>
 			</div>
 		</div>
 		</div>

+ 35 - 0
webview-ui/src/kilocode/agent-manager/components/ImageThumbnail.tsx

@@ -0,0 +1,35 @@
+import React from "react"
+import { useTranslation } from "react-i18next"
+import { X } from "lucide-react"
+import { vscode } from "../utils/vscode"
+
+interface ImageThumbnailProps {
+	src: string
+	index: number
+	onRemove: (index: number) => void
+}
+
+/**
+ * Reusable image thumbnail component with remove button on hover.
+ * Used in ChatInput and NewAgentForm for displaying pasted images.
+ */
+export const ImageThumbnail: React.FC<ImageThumbnailProps> = ({ src, index, onRemove }) => {
+	const { t } = useTranslation("agentManager")
+
+	return (
+		<div className="relative group">
+			<img
+				src={src}
+				alt={`Image ${index + 1}`}
+				className="w-6 h-6 object-cover rounded cursor-pointer border border-vscode-input-border"
+				onClick={() => vscode.postMessage({ type: "openImage", text: src })}
+			/>
+			<button
+				onClick={() => onRemove(index)}
+				className="absolute -top-1 -right-1 w-3.5 h-3.5 rounded-full bg-vscode-badge-background flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity cursor-pointer"
+				aria-label={t("chatInput.removeImage")}>
+				<X size={8} className="text-vscode-foreground" />
+			</button>
+		</div>
+	)
+}

+ 182 - 138
webview-ui/src/kilocode/agent-manager/components/SessionDetail.tsx

@@ -23,8 +23,11 @@ import { sessionMachineUiStateAtom, selectedSessionMachineStateAtom } from "../s
 import { MessageList } from "./MessageList"
 import { MessageList } from "./MessageList"
 import { ChatInput } from "./ChatInput"
 import { ChatInput } from "./ChatInput"
 import { BranchPicker } from "./BranchPicker"
 import { BranchPicker } from "./BranchPicker"
+import { ImageThumbnail } from "./ImageThumbnail"
+import { AddImageButton } from "./AddImageButton"
 import { vscode } from "../utils/vscode"
 import { vscode } from "../utils/vscode"
 import { formatRelativeTime, createRelativeTimeLabels } from "../utils/timeUtils"
 import { formatRelativeTime, createRelativeTimeLabels } from "../utils/timeUtils"
+import { useImagePaste } from "../hooks/useImagePaste"
 import {
 import {
 	Loader2,
 	Loader2,
 	SendHorizontal,
 	SendHorizontal,
@@ -242,6 +245,7 @@ function PendingSessionView({
 function NewAgentForm() {
 function NewAgentForm() {
 	const { t } = useTranslation("agentManager")
 	const { t } = useTranslation("agentManager")
 	const [promptText, setPromptText] = useState("")
 	const [promptText, setPromptText] = useState("")
+	const [selectedImages, setSelectedImages] = useState<string[]>([])
 	const [runMode, setRunMode] = useAtom(preferredRunModeAtom)
 	const [runMode, setRunMode] = useAtom(preferredRunModeAtom)
 	const [versionCount, setVersionCount] = useAtom(versionCountAtom)
 	const [versionCount, setVersionCount] = useAtom(versionCountAtom)
 	const setSelectedModelId = useSetAtom(setSelectedModelIdAtom)
 	const setSelectedModelId = useSetAtom(setSelectedModelIdAtom)
@@ -302,7 +306,17 @@ function NewAgentForm() {
 	}, [isDropdownOpen, isVersionDropdownOpen])
 	}, [isDropdownOpen, isVersionDropdownOpen])
 
 
 	const trimmedPrompt = promptText.trim()
 	const trimmedPrompt = promptText.trim()
-	const isEmpty = trimmedPrompt.length === 0
+	const hasText = trimmedPrompt.length > 0
+	const hasImages = selectedImages.length > 0
+	const isEmpty = !hasText && !hasImages
+
+	// Use shared hook for image paste handling
+	const { handlePaste, handleFileSelect, openFileBrowser, removeImage, canAddMore, fileInputRef, acceptedMimeTypes } =
+		useImagePaste({
+			selectedImages,
+			setSelectedImages,
+			disabled: isStarting,
+		})
 
 
 	const handleStart = () => {
 	const handleStart = () => {
 		if (isEmpty || isStarting) return
 		if (isEmpty || isStarting) return
@@ -312,14 +326,18 @@ function NewAgentForm() {
 		// Generate labels for multi-version mode
 		// Generate labels for multi-version mode
 		const labels = isMultiVersion ? generateVersionLabels(trimmedPrompt.slice(0, 50), versionCount) : undefined
 		const labels = isMultiVersion ? generateVersionLabels(trimmedPrompt.slice(0, 50), versionCount) : undefined
 
 
+		// Prepare images array (only include if there are images)
+		const images = hasImages ? selectedImages : undefined
+
 		vscode.postMessage({
 		vscode.postMessage({
 			type: "agentManager.startSession",
 			type: "agentManager.startSession",
-			prompt: trimmedPrompt,
+			prompt: trimmedPrompt || "", // Can be empty if only images
 			parallelMode: effectiveRunMode === "worktree",
 			parallelMode: effectiveRunMode === "worktree",
 			versions: versionCount,
 			versions: versionCount,
 			labels,
 			labels,
 			existingBranch: selectedBranch || undefined,
 			existingBranch: selectedBranch || undefined,
 			model: effectiveModelId || undefined,
 			model: effectiveModelId || undefined,
+			images,
 		})
 		})
 	}
 	}
 
 
@@ -375,6 +393,7 @@ function NewAgentForm() {
 						value={promptText}
 						value={promptText}
 						onChange={(e) => setPromptText(e.target.value)}
 						onChange={(e) => setPromptText(e.target.value)}
 						onKeyDown={handleKeyDown}
 						onKeyDown={handleKeyDown}
+						onPaste={handlePaste}
 						onFocus={() => setIsFocused(true)}
 						onFocus={() => setIsFocused(true)}
 						onBlur={() => setIsFocused(false)}
 						onBlur={() => setIsFocused(false)}
 						aria-label={t("sessionDetail.startNewAgent")}
 						aria-label={t("sessionDetail.startNewAgent")}
@@ -419,156 +438,181 @@ function NewAgentForm() {
 						aria-hidden="true"
 						aria-hidden="true"
 					/>
 					/>
 
 
-					<div className="absolute bottom-2 right-2 z-30 flex items-center gap-2">
-						<div ref={dropdownRef} className="am-run-mode-dropdown-inline relative">
-							<StandardTooltip
-								content={
-									isMultiVersion
-										? t("sessionDetail.versionsHelperText", { count: versionCount })
-										: effectiveRunMode === "local"
-											? t("sessionDetail.runModeLocal")
-											: t("sessionDetail.runModeWorktree")
-								}>
-								<button
-									className={cn("am-run-mode-trigger-inline", isMultiVersion && "am-locked")}
-									onClick={() => !isMultiVersion && setIsDropdownOpen(!isDropdownOpen)}
-									disabled={isStarting || isMultiVersion}
-									type="button">
-									{effectiveRunMode === "local" ? <Folder size={14} /> : <GitBranch size={14} />}
-									{!isMultiVersion && (
-										<ChevronDown
-											size={10}
-											className={cn("am-chevron", isDropdownOpen && "am-open")}
-										/>
-									)}
-								</button>
-							</StandardTooltip>
-							{isDropdownOpen && !isMultiVersion && (
-								<div className="am-run-mode-menu-inline">
+					{/* Bottom bar with images and actions */}
+					<div className="absolute bottom-2 left-3 right-2 z-30 flex items-center justify-between">
+						{/* Image Thumbnails on the left */}
+						<div className="flex items-center gap-1.5">
+							{selectedImages.map((image, index) => (
+								<ImageThumbnail key={index} src={image} index={index} onRemove={removeImage} />
+							))}
+						</div>
+						{/* Actions on the right */}
+						<div className="flex items-center gap-2">
+							<AddImageButton
+								onClick={openFileBrowser}
+								onFileSelect={handleFileSelect}
+								fileInputRef={fileInputRef}
+								acceptedMimeTypes={acceptedMimeTypes}
+								disabled={!canAddMore}
+							/>
+							<div ref={dropdownRef} className="am-run-mode-dropdown-inline relative">
+								<StandardTooltip
+									content={
+										isMultiVersion
+											? t("sessionDetail.versionsHelperText", { count: versionCount })
+											: effectiveRunMode === "local"
+												? t("sessionDetail.runModeLocal")
+												: t("sessionDetail.runModeWorktree")
+									}>
 									<button
 									<button
-										className={cn(
-											"am-run-mode-option-inline",
-											runMode === "local" && "am-selected",
-										)}
-										onClick={() => handleSelectMode("local")}
+										className={cn("am-run-mode-trigger-inline", isMultiVersion && "am-locked")}
+										onClick={() => !isMultiVersion && setIsDropdownOpen(!isDropdownOpen)}
+										disabled={isStarting || isMultiVersion}
 										type="button">
 										type="button">
-										<Folder size={12} />
-										<span>{t("sessionDetail.runModeLocal")}</span>
-										{runMode === "local" && <span className="am-checkmark">✓</span>}
-									</button>
-									<button
-										className={cn(
-											"am-run-mode-option-inline",
-											runMode === "worktree" && "am-selected",
+										{effectiveRunMode === "local" ? <Folder size={14} /> : <GitBranch size={14} />}
+										{!isMultiVersion && (
+											<ChevronDown
+												size={10}
+												className={cn("am-chevron", isDropdownOpen && "am-open")}
+											/>
 										)}
 										)}
-										onClick={() => handleSelectMode("worktree")}
-										type="button">
-										<GitBranch size={12} />
-										<span className="am-run-mode-label">{t("sessionDetail.runModeWorktree")}</span>
-										{runMode === "worktree" && <span className="am-checkmark">✓</span>}
 									</button>
 									</button>
-								</div>
-							)}
-						</div>
-
-						<div ref={versionDropdownRef} className="am-run-mode-dropdown-inline relative">
-							<StandardTooltip content={t("sessionDetail.versionsTooltip")}>
-								<button
-									className="am-run-mode-trigger-inline"
-									onClick={() => setIsVersionDropdownOpen(!isVersionDropdownOpen)}
-									disabled={isStarting}
-									type="button"
-									title={t("sessionDetail.versions")}>
-									<Layers size={14} />
-									<span className="am-version-count">{versionCount}</span>
-									<ChevronDown
-										size={10}
-										className={cn("am-chevron", isVersionDropdownOpen && "am-open")}
-									/>
-								</button>
-							</StandardTooltip>
-							{isVersionDropdownOpen && (
-								<div className="am-run-mode-menu-inline">
-									{VERSION_COUNT_OPTIONS.map((count) => (
+								</StandardTooltip>
+								{isDropdownOpen && !isMultiVersion && (
+									<div className="am-run-mode-menu-inline">
 										<button
 										<button
-											key={count}
 											className={cn(
 											className={cn(
 												"am-run-mode-option-inline",
 												"am-run-mode-option-inline",
-												versionCount === count && "am-selected",
+												runMode === "local" && "am-selected",
 											)}
 											)}
-											onClick={() => handleSelectVersionCount(count)}
+											onClick={() => handleSelectMode("local")}
 											type="button">
 											type="button">
-											<span>{t("sessionDetail.versionCount", { count })}</span>
-											{versionCount === count && <span className="am-checkmark">✓</span>}
+											<Folder size={12} />
+											<span>{t("sessionDetail.runModeLocal")}</span>
+											{runMode === "local" && <span className="am-checkmark">✓</span>}
 										</button>
 										</button>
-									))}
-								</div>
-							)}
-						</div>
-
-						{/* Model selector - show loading spinner while fetching, then SelectDropdown */}
-						{modelsLoading ? (
-							<div className="am-run-mode-trigger-inline opacity-70">
-								<Loader2 size={14} className="am-spinning" />
-								<span className="text-sm">{t("sessionDetail.loadingModels")}</span>
+										<button
+											className={cn(
+												"am-run-mode-option-inline",
+												runMode === "worktree" && "am-selected",
+											)}
+											onClick={() => handleSelectMode("worktree")}
+											type="button">
+											<GitBranch size={12} />
+											<span className="am-run-mode-label">
+												{t("sessionDetail.runModeWorktree")}
+											</span>
+											{runMode === "worktree" && <span className="am-checkmark">✓</span>}
+										</button>
+									</div>
+								)}
 							</div>
 							</div>
-						) : hasModels ? (
-							<div className="am-model-selector">
-								<SelectDropdown
-									value={effectiveModelId || ""}
-									options={modelOptions}
-									onChange={(value) => setSelectedModelId(value)}
-									disabled={isStarting}
-									placeholder={t("sessionDetail.selectModel")}
-									title={t("sessionDetail.modelTooltip")}
-									triggerClassName="am-model-selector-trigger"
-									contentClassName="am-model-selector-content"
-									align="end"
-								/>
+
+							<div ref={versionDropdownRef} className="am-run-mode-dropdown-inline relative">
+								<StandardTooltip content={t("sessionDetail.versionsTooltip")}>
+									<button
+										className="am-run-mode-trigger-inline"
+										onClick={() => setIsVersionDropdownOpen(!isVersionDropdownOpen)}
+										disabled={isStarting}
+										type="button"
+										title={t("sessionDetail.versions")}>
+										<Layers size={14} />
+										<span className="am-version-count">{versionCount}</span>
+										<ChevronDown
+											size={10}
+											className={cn("am-chevron", isVersionDropdownOpen && "am-open")}
+										/>
+									</button>
+								</StandardTooltip>
+								{isVersionDropdownOpen && (
+									<div className="am-run-mode-menu-inline">
+										{VERSION_COUNT_OPTIONS.map((count) => (
+											<button
+												key={count}
+												className={cn(
+													"am-run-mode-option-inline",
+													versionCount === count && "am-selected",
+												)}
+												onClick={() => handleSelectVersionCount(count)}
+												type="button">
+												<span>{t("sessionDetail.versionCount", { count })}</span>
+												{versionCount === count && <span className="am-checkmark">✓</span>}
+											</button>
+										))}
+									</div>
+								)}
 							</div>
 							</div>
-						) : null}
-
-						{effectiveRunMode === "worktree" && !isMultiVersion && (
-							<StandardTooltip content={t("sessionDetail.branchPickerTooltip")}>
-								<button
-									className="am-run-mode-trigger-inline"
-									onClick={() => setIsBranchPickerOpen(true)}
-									disabled={isStarting}
-									type="button"
-									title={t("sessionDetail.selectBranch")}>
-									<GitBranch size={14} />
-									<span className="truncate max-w-[80px] text-sm">
-										{selectedBranch || t("sessionDetail.selectBranch")}
-									</span>
-									<ChevronDown size={10} className="am-chevron" />
-								</button>
-							</StandardTooltip>
-						)}
 
 
-						<button
-							className={cn(
-								"relative inline-flex items-center justify-center",
-								"bg-transparent border-none p-1.5",
-								"rounded-md min-w-[28px] min-h-[28px]",
-								"opacity-60 hover:opacity-100 text-vscode-descriptionForeground hover:text-vscode-foreground",
-								"transition-all duration-150",
-								"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
-								"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
-								"active:bg-[rgba(255,255,255,0.1)]",
-								!isEmpty && !isStarting && "cursor-pointer",
-								(isEmpty || isStarting) &&
-									"opacity-40 cursor-not-allowed grayscale-[30%] hover:bg-transparent hover:border-[rgba(255,255,255,0.08)] active:bg-transparent",
+							{/* Model selector - show loading spinner while fetching, then SelectDropdown */}
+							{modelsLoading ? (
+								<div className="am-run-mode-trigger-inline opacity-70">
+									<Loader2 size={14} className="am-spinning" />
+									<span className="text-sm">{t("sessionDetail.loadingModels")}</span>
+								</div>
+							) : hasModels ? (
+								<div className="am-model-selector">
+									<SelectDropdown
+										value={effectiveModelId || ""}
+										options={modelOptions}
+										onChange={(value) => setSelectedModelId(value)}
+										disabled={isStarting}
+										placeholder={t("sessionDetail.selectModel")}
+										title={t("sessionDetail.modelTooltip")}
+										triggerClassName="am-model-selector-trigger"
+										contentClassName="am-model-selector-content"
+										align="end"
+									/>
+								</div>
+							) : null}
+
+							{effectiveRunMode === "worktree" && !isMultiVersion && (
+								<StandardTooltip content={t("sessionDetail.branchPickerTooltip")}>
+									<button
+										className="am-run-mode-trigger-inline"
+										onClick={() => setIsBranchPickerOpen(true)}
+										disabled={isStarting}
+										type="button"
+										title={t("sessionDetail.selectBranch")}>
+										<GitBranch size={14} />
+										<span className="truncate max-w-[80px] text-sm">
+											{selectedBranch || t("sessionDetail.selectBranch")}
+										</span>
+										<ChevronDown size={10} className="am-chevron" />
+									</button>
+								</StandardTooltip>
 							)}
 							)}
-							onClick={handleStart}
-							disabled={isEmpty || isStarting}
-							aria-label={isStarting ? t("sessionDetail.starting") : t("sessionDetail.startAriaLabel")}
-							title={
-								isMultiVersion
-									? t("sessionDetail.launchVersions", { count: versionCount })
-									: t("sessionDetail.startAgent")
-							}>
-							{isStarting ? <Loader2 size={16} className="am-spinning" /> : <SendHorizontal size={16} />}
-						</button>
+
+							<button
+								className={cn(
+									"relative inline-flex items-center justify-center",
+									"bg-transparent border-none p-1.5",
+									"rounded-md min-w-[28px] min-h-[28px]",
+									"opacity-60 hover:opacity-100 text-vscode-descriptionForeground hover:text-vscode-foreground",
+									"transition-all duration-150",
+									"hover:bg-[rgba(255,255,255,0.03)] hover:border-[rgba(255,255,255,0.15)]",
+									"focus:outline-none focus-visible:ring-1 focus-visible:ring-vscode-focusBorder",
+									"active:bg-[rgba(255,255,255,0.1)]",
+									!isEmpty && !isStarting && "cursor-pointer",
+									(isEmpty || isStarting) &&
+										"opacity-40 cursor-not-allowed grayscale-[30%] hover:bg-transparent hover:border-[rgba(255,255,255,0.08)] active:bg-transparent",
+								)}
+								onClick={handleStart}
+								disabled={isEmpty || isStarting}
+								aria-label={
+									isStarting ? t("sessionDetail.starting") : t("sessionDetail.startAriaLabel")
+								}
+								title={
+									isMultiVersion
+										? t("sessionDetail.launchVersions", { count: versionCount })
+										: t("sessionDetail.startAgent")
+								}>
+								{isStarting ? (
+									<Loader2 size={16} className="am-spinning" />
+								) : (
+									<SendHorizontal size={16} />
+								)}
+							</button>
+						</div>
 					</div>
 					</div>
 				</div>
 				</div>
 			</div>
 			</div>

+ 127 - 0
webview-ui/src/kilocode/agent-manager/hooks/useImagePaste.ts

@@ -0,0 +1,127 @@
+import { useCallback, useRef } from "react"
+import { MAX_IMAGES_PER_MESSAGE } from "../state/atoms/sessions"
+
+const ACCEPTED_IMAGE_TYPES = ["png", "jpeg", "webp", "gif"]
+const ACCEPTED_MIME_TYPES = ACCEPTED_IMAGE_TYPES.map((t) => `image/${t}`).join(",")
+
+interface UseImagePasteOptions {
+	selectedImages: string[]
+	setSelectedImages: React.Dispatch<React.SetStateAction<string[]>>
+	disabled?: boolean
+}
+
+/**
+ * Hook for handling image selection from files and paste from clipboard.
+ * Returns handlers for file input, paste events, and image management.
+ */
+export function useImagePaste({ selectedImages, setSelectedImages, disabled = false }: UseImagePasteOptions) {
+	const fileInputRef = useRef<HTMLInputElement>(null)
+	const canAddMore = selectedImages.length < MAX_IMAGES_PER_MESSAGE && !disabled
+
+	const handlePaste = useCallback(
+		async (e: React.ClipboardEvent) => {
+			if (!canAddMore) return
+
+			const items = e.clipboardData.items
+
+			const imageItems = Array.from(items).filter((item) => {
+				const [type, subtype] = item.type.split("/")
+				return type === "image" && ACCEPTED_IMAGE_TYPES.includes(subtype)
+			})
+
+			if (imageItems.length === 0) return
+
+			e.preventDefault()
+
+			const imagePromises = imageItems.map((item) => {
+				return new Promise<string | null>((resolve) => {
+					const blob = item.getAsFile()
+					if (!blob) {
+						resolve(null)
+						return
+					}
+
+					const reader = new FileReader()
+					reader.onloadend = () => {
+						if (reader.error) {
+							console.error("Error reading image file:", reader.error)
+							resolve(null)
+						} else {
+							const result = reader.result
+							resolve(typeof result === "string" ? result : null)
+						}
+					}
+					reader.readAsDataURL(blob)
+				})
+			})
+
+			const imageDataArray = await Promise.all(imagePromises)
+			const dataUrls = imageDataArray.filter((dataUrl): dataUrl is string => dataUrl !== null)
+
+			if (dataUrls.length > 0) {
+				setSelectedImages((prevImages) => [...prevImages, ...dataUrls].slice(0, MAX_IMAGES_PER_MESSAGE))
+			}
+		},
+		[canAddMore, setSelectedImages],
+	)
+
+	/**
+	 * Handle file selection from the file input.
+	 * Reads selected files and converts them to data URLs.
+	 */
+	const handleFileSelect = useCallback(
+		(e: React.ChangeEvent<HTMLInputElement>) => {
+			if (!canAddMore) return
+
+			const files = e.target.files
+			if (!files || files.length === 0) return
+
+			const remainingSlots = MAX_IMAGES_PER_MESSAGE - selectedImages.length
+			const filesToProcess = Array.from(files).slice(0, remainingSlots)
+
+			filesToProcess.forEach((file) => {
+				const reader = new FileReader()
+				reader.onloadend = () => {
+					if (reader.error) {
+						console.error("Error reading image file:", reader.error)
+						return
+					}
+					const result = reader.result
+					if (typeof result === "string") {
+						setSelectedImages((prev) => [...prev, result].slice(0, MAX_IMAGES_PER_MESSAGE))
+					}
+				}
+				reader.readAsDataURL(file)
+			})
+
+			// Reset input value to allow selecting the same file again
+			e.target.value = ""
+		},
+		[canAddMore, selectedImages.length, setSelectedImages],
+	)
+
+	/**
+	 * Trigger the hidden file input to open file browser.
+	 */
+	const openFileBrowser = useCallback(() => {
+		if (!canAddMore) return
+		fileInputRef.current?.click()
+	}, [canAddMore])
+
+	const removeImage = useCallback(
+		(index: number) => {
+			setSelectedImages((prevImages) => prevImages.filter((_, i) => i !== index))
+		},
+		[setSelectedImages],
+	)
+
+	return {
+		handlePaste,
+		handleFileSelect,
+		openFileBrowser,
+		removeImage,
+		canAddMore,
+		fileInputRef,
+		acceptedMimeTypes: ACCEPTED_MIME_TYPES,
+	}
+}

+ 21 - 16
webview-ui/src/kilocode/agent-manager/state/atoms/messageQueue.ts

@@ -11,6 +11,7 @@ export interface QueuedMessage {
 	id: string
 	id: string
 	sessionId: string
 	sessionId: string
 	content: string
 	content: string
+	images?: string[] // Data URLs of pasted images
 	status: "queued" | "sending" | "sent" | "failed"
 	status: "queued" | "sending" | "sent" | "failed"
 	timestamp: number
 	timestamp: number
 	error?: string
 	error?: string
@@ -41,23 +42,27 @@ export const nextQueuedMessageAtomFamily = atomFamily((sessionId: string) =>
 )
 )
 
 
 // Action: Add message to queue
 // Action: Add message to queue
-export const addToQueueAtom = atom(null, (get, set, payload: { sessionId: string; content: string }) => {
-	const { sessionId, content } = payload
-	const queue = get(sessionMessageQueueAtomFamily(sessionId))
+export const addToQueueAtom = atom(
+	null,
+	(get, set, payload: { sessionId: string; content: string; images?: string[] }) => {
+		const { sessionId, content, images } = payload
+		const queue = get(sessionMessageQueueAtomFamily(sessionId))
 
 
-	const newMessage: QueuedMessage = {
-		id: generateId(),
-		sessionId,
-		content,
-		status: "queued",
-		timestamp: Date.now(),
-		retryCount: 0,
-		maxRetries: 3,
-	}
-
-	set(sessionMessageQueueAtomFamily(sessionId), [...queue, newMessage])
-	return newMessage
-})
+		const newMessage: QueuedMessage = {
+			id: generateId(),
+			sessionId,
+			content,
+			images,
+			status: "queued",
+			timestamp: Date.now(),
+			retryCount: 0,
+			maxRetries: 3,
+		}
+
+		set(sessionMessageQueueAtomFamily(sessionId), [...queue, newMessage])
+		return newMessage
+	},
+)
 
 
 // Action: Update message status
 // Action: Update message status
 export const updateMessageStatusAtom = atom(
 export const updateMessageStatusAtom = atom(

+ 6 - 0
webview-ui/src/kilocode/agent-manager/state/atoms/sessions.ts

@@ -62,6 +62,12 @@ export const startSessionFailedCounterAtom = atom(0)
 // Per-session input value for the chat input field
 // Per-session input value for the chat input field
 export const sessionInputAtomFamily = atomFamily((_sessionId: string) => atom(""))
 export const sessionInputAtomFamily = atomFamily((_sessionId: string) => atom(""))
 
 
+// Per-session images (data URLs) for the chat input field
+export const sessionImagesAtomFamily = atomFamily((_sessionId: string) => atom<string[]>([]))
+
+// Maximum images per message (same as Anthropic limit)
+export const MAX_IMAGES_PER_MESSAGE = 20
+
 // User preference for run mode (persisted across new agent forms)
 // User preference for run mode (persisted across new agent forms)
 export type RunMode = "local" | "worktree"
 export type RunMode = "local" | "worktree"
 // Default to local until worktree mode is ready to ship
 // Default to local until worktree mode is ready to ship

+ 4 - 3
webview-ui/src/kilocode/agent-manager/state/hooks/useMessageQueueProcessor.ts

@@ -25,9 +25,9 @@ export function useMessageQueueProcessor(sessionId: string | null) {
 	const removeFromQueue = useSetAtom(removeFromQueueAtom)
 	const removeFromQueue = useSetAtom(removeFromQueueAtom)
 	const setSendingMessage = useSetAtom(setSendingMessageAtom)
 	const setSendingMessage = useSetAtom(setSendingMessageAtom)
 
 
-	// Send the next queued message
+	// Send the next queued message (including images if present)
 	const sendNextMessage = useCallback(
 	const sendNextMessage = useCallback(
-		(messageId: string, content: string) => {
+		(messageId: string, content: string, images?: string[]) => {
 			if (!sessionId) return
 			if (!sessionId) return
 
 
 			setSendingMessage({ sessionId, messageId })
 			setSendingMessage({ sessionId, messageId })
@@ -36,6 +36,7 @@ export function useMessageQueueProcessor(sessionId: string | null) {
 				sessionId,
 				sessionId,
 				messageId,
 				messageId,
 				content,
 				content,
+				images,
 			})
 			})
 		},
 		},
 		[sessionId, setSendingMessage],
 		[sessionId, setSendingMessage],
@@ -78,7 +79,7 @@ export function useMessageQueueProcessor(sessionId: string | null) {
 
 
 		const queuedMsg = queue.find((msg) => msg.status === "queued")
 		const queuedMsg = queue.find((msg) => msg.status === "queued")
 		if (queuedMsg) {
 		if (queuedMsg) {
-			sendNextMessage(queuedMsg.id, queuedMsg.content)
+			sendNextMessage(queuedMsg.id, queuedMsg.content, queuedMsg.images)
 		}
 		}
 	}, [sessionId, queue, sendingMessageId, sendNextMessage])
 	}, [sessionId, queue, sendingMessageId, sendNextMessage])
 }
 }