Browse Source

Migrate conversation continuity to plugin-side encrypted reasoning items (Responses API) (#9203)

* Migrate conversation continuity to plugin-side encrypted reasoning items (Responses API)

Summary
We moved continuity off OpenAI servers and now maintain conversation state locally by persisting and replaying encrypted reasoning items. Requests are stateless (store=false) while retaining the performance/caching benefits of the Responses API.

Why
This aligns with how Roo manages context and simplifies our Responses API implementation while keeping all the benefits of continuity, caching, and latency improvements.

What changed
- All OpenAI models now use the Responses API; system instructions are passed via the top-level instructions field; requests include store=false and include=["reasoning.encrypted_content"].
- We persist encrypted reasoning items (type: "reasoning", encrypted_content, optional id) into API history and replay them on subsequent turns.
- Reasoning summaries default to summary: "auto" when supported; text.verbosity only when supported.
- Atomic persistence via safeWriteJson.

Removed
- previous_response_id flows, suppressPreviousResponseId/skipPrevResponseIdOnce, persistGpt5Metadata(), and GPT‑5 response ID metadata in UI messages.

Kept
- taskId and mode metadata for cross-provider features.

Result
- ZDR-friendly, stateless continuity with equal or better performance and a simpler codepath.

* fix(webview): remove unused metadata prop from ReasoningBlock render

* Responses API: retain response id for troubleshooting (not continuity)

Continuity is stateless via encrypted reasoning items that we persist and replay. We now capture the top-level response id in OpenAiNativeHandler and persist the assistant message id into api_conversation_history.json solely for debugging/correlation with provider logs; it is not used for continuity or control flow.

Also: silence request-body debug logging to avoid leaking prompts.

* remove DEPRECATED tests

* chore: remove unused Task types file to satisfy knip CI

* fix(task): properly type cleanConversationHistory and createMessage args in Task to address Dan's review
Hannes Rudolph 1 month ago
parent
commit
0fdbd392e8

+ 0 - 9
packages/types/src/message.ts

@@ -226,15 +226,6 @@ export const clineMessageSchema = z.object({
 	isProtected: z.boolean().optional(),
 	apiProtocol: z.union([z.literal("openai"), z.literal("anthropic")]).optional(),
 	isAnswered: z.boolean().optional(),
-	metadata: z
-		.object({
-			gpt5: z
-				.object({
-					previous_response_id: z.string().optional(),
-				})
-				.optional(),
-		})
-		.optional(),
 })
 
 export type ClineMessage = z.infer<typeof clineMessageSchema>

+ 10 - 13
src/api/index.ts

@@ -49,23 +49,20 @@ export interface SingleCompletionHandler {
 }
 
 export interface ApiHandlerCreateMessageMetadata {
-	mode?: string
-	taskId: string
-	previousResponseId?: string
 	/**
-	 * When true, the provider must NOT fall back to internal continuity state
-	 * (e.g., lastResponseId) if previousResponseId is absent.
-	 * Used to enforce "skip once" after a condense operation.
+	 * Task ID used for tracking and provider-specific features:
+	 * - DeepInfra: Used as prompt_cache_key for caching
+	 * - Roo: Sent as X-Roo-Task-ID header
+	 * - Requesty: Sent as trace_id
+	 * - Unbound: Sent in unbound_metadata
 	 */
-	suppressPreviousResponseId?: boolean
+	taskId: string
 	/**
-	 * Controls whether the response should be stored for 30 days in OpenAI's Responses API.
-	 * When true (default), responses are stored and can be referenced in future requests
-	 * using the previous_response_id for efficient conversation continuity.
-	 * Set to false to opt out of response storage for privacy or compliance reasons.
-	 * @default true
+	 * Current mode slug for provider-specific tracking:
+	 * - Requesty: Sent in extra metadata
+	 * - Unbound: Sent in unbound_metadata
 	 */
-	store?: boolean
+	mode?: string
 }
 
 export interface ApiHandler {

+ 20 - 436
src/api/providers/__tests__/openai-native.spec.ts

@@ -686,69 +686,6 @@ describe("OpenAiNativeHandler", () => {
 			expect(contentChunks).toHaveLength(0)
 		})
 
-		it("should support previous_response_id for conversation continuity", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest.fn().mockResolvedValue({
-				ok: true,
-				body: new ReadableStream({
-					start(controller) {
-						// Include response ID in the response
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.created","response":{"id":"resp_123","status":"in_progress"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Response with ID"}}\n\n',
-							),
-						)
-						controller.enqueue(
-							new TextEncoder().encode(
-								'data: {"type":"response.done","response":{"id":"resp_123","usage":{"prompt_tokens":10,"completion_tokens":3}}}\n\n',
-							),
-						)
-						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-						controller.close()
-					},
-				}),
-			})
-			global.fetch = mockFetch as any
-
-			// Mock SDK to fail
-			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// First request - should not have previous_response_id
-			const stream1 = handler.createMessage(systemPrompt, messages)
-			const chunks1: any[] = []
-			for await (const chunk of stream1) {
-				chunks1.push(chunk)
-			}
-
-			// Verify first request doesn't include previous_response_id
-			let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(firstCallBody.previous_response_id).toBeUndefined()
-
-			// Second request with metadata - should include previous_response_id
-			const stream2 = handler.createMessage(systemPrompt, messages, {
-				taskId: "test-task",
-				previousResponseId: "resp_456",
-			})
-			const chunks2: any[] = []
-			for await (const chunk of stream2) {
-				chunks2.push(chunk)
-			}
-
-			// Verify second request includes the provided previous_response_id
-			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.previous_response_id).toBe("resp_456")
-		})
-
 		it("should handle unhandled stream events gracefully", async () => {
 			// Mock fetch for the fallback SSE path
 			const mockFetch = vitest.fn().mockResolvedValue({
@@ -798,397 +735,44 @@ describe("OpenAiNativeHandler", () => {
 			expect(textChunks[0].text).toBe("Hello")
 		})
 
-		it("should use stored response ID when metadata doesn't provide one", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest
-				.fn()
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// First response with ID
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_789","output":[{"type":"text","content":[{"type":"text","text":"First"}]}],"usage":{"prompt_tokens":10,"completion_tokens":1}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// Second response
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Second"}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-			global.fetch = mockFetch as any
-
-			// Mock SDK to fail
-			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// First request - establishes response ID
-			const stream1 = handler.createMessage(systemPrompt, messages)
-			for await (const chunk of stream1) {
-				// consume stream
-			}
-
-			// Second request without metadata - should use stored response ID
-			const stream2 = handler.createMessage(systemPrompt, messages, { taskId: "test-task" })
-			for await (const chunk of stream2) {
-				// consume stream
-			}
-
-			// Verify second request uses the stored response ID from first request
-			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.previous_response_id).toBe("resp_789")
-		})
-
-		it("should retry with full conversation when previous_response_id fails", async () => {
-			// This test verifies the fix for context loss bug when previous_response_id becomes invalid
-			const mockFetch = vitest
-				.fn()
-				// First call: fails with 400 error about invalid previous_response_id
-				.mockResolvedValueOnce({
-					ok: false,
-					status: 400,
-					text: async () => JSON.stringify({ error: { message: "Previous response not found" } }),
-				})
-				// Second call (retry): succeeds
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Retry successful"}}\n\n',
-								),
-							)
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_new","usage":{"prompt_tokens":100,"completion_tokens":2}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-			global.fetch = mockFetch as any
-
-			// Mock SDK to fail
-			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// Prepare a multi-turn conversation
-			const conversationMessages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "What is 2+2?" },
-				{ role: "assistant", content: "2+2 equals 4." },
-				{ role: "user", content: "What about 3+3?" },
-				{ role: "assistant", content: "3+3 equals 6." },
-				{ role: "user", content: "And 4+4?" }, // Latest message
-			]
-
-			// Call with a previous_response_id that will fail
-			const stream = handler.createMessage(systemPrompt, conversationMessages, {
-				taskId: "test-task",
-				previousResponseId: "resp_invalid",
-			})
-
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify we got the successful response
-			const textChunks = chunks.filter((c) => c.type === "text")
-			expect(textChunks).toHaveLength(1)
-			expect(textChunks[0].text).toBe("Retry successful")
-
-			// Verify two requests were made
-			expect(mockFetch).toHaveBeenCalledTimes(2)
-
-			// First request: includes previous_response_id and only latest message
-			const firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(firstCallBody.previous_response_id).toBe("resp_invalid")
-			expect(firstCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "And 4+4?" }],
-				},
-			])
-
-			// Second request (retry): NO previous_response_id, but FULL conversation history
-			const secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.previous_response_id).toBeUndefined()
-			expect(secondCallBody.instructions).toBe(systemPrompt)
-			// Should include the FULL conversation history
-			expect(secondCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "What is 2+2?" }],
-				},
-				{
-					role: "assistant",
-					content: [{ type: "output_text", text: "2+2 equals 4." }],
-				},
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "What about 3+3?" }],
-				},
-				{
-					role: "assistant",
-					content: [{ type: "output_text", text: "3+3 equals 6." }],
-				},
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "And 4+4?" }],
-				},
-			])
-		})
-
-		it("should retry with full conversation when SDK returns 400 for invalid previous_response_id", async () => {
-			// Test the SDK path (executeRequest method) for handling invalid previous_response_id
-
-			// Mock SDK to return an async iterable that we can control
-			const createMockStream = (chunks: any[]) => {
-				return {
-					async *[Symbol.asyncIterator]() {
-						for (const chunk of chunks) {
-							yield chunk
-						}
+		it("should format full conversation correctly", async () => {
+			const mockFetch = vitest.fn().mockResolvedValue({
+				ok: true,
+				body: new ReadableStream({
+					start(controller) {
+						controller.enqueue(
+							new TextEncoder().encode(
+								'data: {"type":"response.output_item.added","item":{"type":"text","text":"Response"}}\n\n',
+							),
+						)
+						controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
+						controller.close()
 					},
-				}
-			}
-
-			// First call: SDK throws 400 error
-			mockResponsesCreate
-				.mockRejectedValueOnce({
-					status: 400,
-					message: "Previous response resp_invalid not found",
-				})
-				// Second call (retry): SDK succeeds with async iterable
-				.mockResolvedValueOnce(
-					createMockStream([
-						{ type: "response.text.delta", delta: "Context" },
-						{ type: "response.text.delta", delta: " preserved!" },
-						{
-							type: "response.done",
-							response: { id: "resp_new", usage: { prompt_tokens: 150, completion_tokens: 2 } },
-						},
-					]),
-				)
-
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// Prepare a conversation with context
-			const conversationMessages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "Remember the number 42" },
-				{ role: "assistant", content: "I'll remember 42." },
-				{ role: "user", content: "What number did I ask you to remember?" },
-			]
-
-			// Call with a previous_response_id that will fail
-			const stream = handler.createMessage(systemPrompt, conversationMessages, {
-				taskId: "test-task",
-				previousResponseId: "resp_invalid",
+				}),
 			})
-
-			const chunks: any[] = []
-			for await (const chunk of stream) {
-				chunks.push(chunk)
-			}
-
-			// Verify we got the successful response
-			const textChunks = chunks.filter((c) => c.type === "text")
-			expect(textChunks).toHaveLength(2)
-			expect(textChunks[0].text).toBe("Context")
-			expect(textChunks[1].text).toBe(" preserved!")
-
-			// Verify two SDK calls were made
-			expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
-
-			// First SDK call: includes previous_response_id and only latest message
-			const firstCallBody = mockResponsesCreate.mock.calls[0][0]
-			expect(firstCallBody.previous_response_id).toBe("resp_invalid")
-			expect(firstCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "What number did I ask you to remember?" }],
-				},
-			])
-
-			// Second SDK call (retry): NO previous_response_id, but FULL conversation history
-			const secondCallBody = mockResponsesCreate.mock.calls[1][0]
-			expect(secondCallBody.previous_response_id).toBeUndefined()
-			expect(secondCallBody.instructions).toBe(systemPrompt)
-			// Should include the FULL conversation history to preserve context
-			expect(secondCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "Remember the number 42" }],
-				},
-				{
-					role: "assistant",
-					content: [{ type: "output_text", text: "I'll remember 42." }],
-				},
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "What number did I ask you to remember?" }],
-				},
-			])
-		})
-
-		it("should only send latest message when using previous_response_id", async () => {
-			// Mock fetch for Responses API
-			const mockFetch = vitest
-				.fn()
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// First response with ID
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_001","output":[{"type":"text","content":[{"type":"text","text":"First"}]}],"usage":{"prompt_tokens":50,"completion_tokens":1}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
-				.mockResolvedValueOnce({
-					ok: true,
-					body: new ReadableStream({
-						start(controller) {
-							// Second response
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.output_item.added","item":{"type":"text","text":"Second"}}\n\n',
-								),
-							)
-							controller.enqueue(
-								new TextEncoder().encode(
-									'data: {"type":"response.done","response":{"id":"resp_002","usage":{"prompt_tokens":10,"completion_tokens":1}}}\n\n',
-								),
-							)
-							controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"))
-							controller.close()
-						},
-					}),
-				})
 			global.fetch = mockFetch as any
-
-			// Mock SDK to fail
 			mockResponsesCreate.mockRejectedValue(new Error("SDK not available"))
 
-			handler = new OpenAiNativeHandler({
-				...mockOptions,
-				apiModelId: "gpt-5-2025-08-07",
-			})
-
-			// First request with full conversation
-			const firstMessages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "Hello" },
-				{ role: "assistant", content: "Hi there!" },
-				{ role: "user", content: "How are you?" },
-			]
-
-			const stream1 = handler.createMessage(systemPrompt, firstMessages)
-			for await (const chunk of stream1) {
-				// consume stream
-			}
-
-			// Verify first request sends full conversation in structured format
-			let firstCallBody = JSON.parse(mockFetch.mock.calls[0][1].body)
-			expect(firstCallBody.instructions).toBe(systemPrompt)
-			expect(firstCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "Hello" }],
-				},
-				{
-					role: "assistant",
-					content: [{ type: "output_text", text: "Hi there!" }],
-				},
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "How are you?" }],
-				},
-			])
-			expect(firstCallBody.previous_response_id).toBeUndefined()
-
-			// Second request with previous_response_id - should only send latest message
-			const secondMessages: Anthropic.Messages.MessageParam[] = [
-				{ role: "user", content: "Hello" },
-				{ role: "assistant", content: "Hi there!" },
-				{ role: "user", content: "How are you?" },
-				{ role: "assistant", content: "I'm doing well!" },
-				{ role: "user", content: "What's the weather?" }, // Latest message
-			]
-
-			const stream2 = handler.createMessage(systemPrompt, secondMessages, {
-				taskId: "test-task",
-				previousResponseId: "resp_001",
-			})
-			for await (const chunk of stream2) {
-				// consume stream
-			}
-
-			// Verify second request only sends the latest user message in structured format
-			let secondCallBody = JSON.parse(mockFetch.mock.calls[1][1].body)
-			expect(secondCallBody.input).toEqual([
-				{
-					role: "user",
-					content: [{ type: "input_text", text: "What's the weather?" }],
-				},
-			])
-			expect(secondCallBody.previous_response_id).toBe("resp_001")
-		})
-
-		it("should correctly prepare structured input", () => {
 			const gpt5Handler = new OpenAiNativeHandler({
 				...mockOptions,
 				apiModelId: "gpt-5-2025-08-07",
 			})
 
-			// Test with metadata that has previousResponseId
-			// @ts-expect-error - private method
-			const { formattedInput, previousResponseId } = gpt5Handler.prepareStructuredInput(systemPrompt, messages, {
+			const stream = gpt5Handler.createMessage(systemPrompt, messages, {
 				taskId: "task1",
-				previousResponseId: "resp_123",
 			})
+			for await (const chunk of stream) {
+				// consume
+			}
 
-			expect(previousResponseId).toBe("resp_123")
-			expect(formattedInput).toEqual([
+			const callBody = JSON.parse(mockFetch.mock.calls[0][1].body)
+			expect(callBody.input).toEqual([
 				{
 					role: "user",
 					content: [{ type: "input_text", text: "Hello!" }],
 				},
 			])
+			expect(callBody.previous_response_id).toBeUndefined()
 		})
 
 		it("should provide helpful error messages for different error codes", async () => {

+ 82 - 270
src/api/providers/openai-native.ts

@@ -34,11 +34,12 @@ const GPT5_MODEL_PREFIX = "gpt-5"
 export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
-	private lastResponseId: string | undefined
-	private responseIdPromise: Promise<string | undefined> | undefined
-	private responseIdResolver: ((value: string | undefined) => void) | undefined
 	// Resolved service tier from Responses API (actual tier used by OpenAI)
 	private lastServiceTier: ServiceTier | undefined
+	// Complete response output array (includes reasoning items with encrypted_content)
+	private lastResponseOutput: any[] | undefined
+	// Last top-level response id from Responses API (for troubleshooting)
+	private lastResponseId: string | undefined
 
 	// Event types handled by the shared event processor to avoid duplication
 	private readonly coreHandledEventTypes = new Set<string>([
@@ -126,17 +127,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		return out
 	}
 
-	private resolveResponseId(responseId: string | undefined): void {
-		if (responseId) {
-			this.lastResponseId = responseId
-		}
-		// Resolve the promise so the next request can use this ID
-		if (this.responseIdResolver) {
-			this.responseIdResolver(responseId)
-			this.responseIdResolver = undefined
-		}
-	}
-
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
@@ -156,6 +146,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	): ApiStream {
 		// Reset resolved tier for this request; will be set from response if present
 		this.lastServiceTier = undefined
+		// Reset output array to capture current response output items
+		this.lastResponseOutput = undefined
+		// Reset last response id for this request
+		this.lastResponseId = undefined
 
 		// Use Responses API for ALL models
 		const { verbosity, reasoning } = this.getModel()
@@ -163,60 +157,22 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		// Resolve reasoning effort for models that support it
 		const reasoningEffort = this.getReasoningEffort(model)
 
-		// Wait for any pending response ID from a previous request to be available
-		// This handles the race condition with fast nano model responses
-		let effectivePreviousResponseId = metadata?.previousResponseId
-
-		// Check if we should suppress previous response ID (e.g., after condense or message edit)
-		if (metadata?.suppressPreviousResponseId) {
-			// Clear the stored lastResponseId to prevent it from being used in future requests
-			this.lastResponseId = undefined
-			effectivePreviousResponseId = undefined
-		} else {
-			// Only try to get fallback response IDs if not suppressing
-
-			// If we have a pending response ID promise, wait for it to resolve
-			if (!effectivePreviousResponseId && this.responseIdPromise) {
-				try {
-					const resolvedId = await Promise.race([
-						this.responseIdPromise,
-						// Timeout after 100ms to avoid blocking too long
-						new Promise<undefined>((resolve) => setTimeout(() => resolve(undefined), 100)),
-					])
-					if (resolvedId) {
-						effectivePreviousResponseId = resolvedId
-					}
-				} catch {
-					// Non-fatal if promise fails
-				}
-			}
-
-			// Fall back to the last known response ID if still not available
-			if (!effectivePreviousResponseId && this.lastResponseId) {
-				effectivePreviousResponseId = this.lastResponseId
-			}
-		}
-
-		// Format input and capture continuity id
-		const { formattedInput, previousResponseId } = this.prepareStructuredInput(systemPrompt, messages, metadata)
-		const requestPreviousResponseId = effectivePreviousResponseId || previousResponseId
-
-		// Create a new promise for this request's response ID
-		this.responseIdPromise = new Promise<string | undefined>((resolve) => {
-			this.responseIdResolver = resolve
-		})
+		// Format full conversation (messages already include reasoning items from API history)
+		const formattedInput = this.formatFullConversation(systemPrompt, messages)
 
 		// Build request body
 		const requestBody = this.buildRequestBody(
 			model,
 			formattedInput,
-			requestPreviousResponseId,
 			systemPrompt,
 			verbosity,
 			reasoningEffort,
 			metadata,
 		)
 
+		// Temporary debug logging
+		// console.log("[OpenAI Native] Request body:", requestBody)
+
 		// Make the request (pass systemPrompt and messages for potential retry)
 		yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages)
 	}
@@ -224,27 +180,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	private buildRequestBody(
 		model: OpenAiNativeModel,
 		formattedInput: any,
-		requestPreviousResponseId: string | undefined,
 		systemPrompt: string,
 		verbosity: any,
 		reasoningEffort: ReasoningEffortWithMinimal | undefined,
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): any {
-		// Build a request body (also used for fallback)
+		// Build a request body
 		// Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation
 		// so requests do not default to very large limits (e.g., 120k).
 		interface Gpt5RequestBody {
 			model: string
-			input: Array<{ role: "user" | "assistant"; content: any[] }>
+			input: Array<{ role: "user" | "assistant"; content: any[] } | { type: string; content: string }>
 			stream: boolean
-			reasoning?: { effort: ReasoningEffortWithMinimal; summary?: "auto" }
+			reasoning?: { effort?: ReasoningEffortWithMinimal; summary?: "auto" }
 			text?: { verbosity: VerbosityLevel }
 			temperature?: number
 			max_output_tokens?: number
-			previous_response_id?: string
 			store?: boolean
 			instructions?: string
 			service_tier?: ServiceTier
+			include?: string[]
 		}
 
 		// Validate requested tier against model support; if not supported, omit.
@@ -255,17 +210,21 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			model: model.id,
 			input: formattedInput,
 			stream: true,
-			store: metadata?.store !== false, // Default to true unless explicitly set to false
+			// Always use stateless operation with encrypted reasoning
+			store: false,
 			// Always include instructions (system prompt) for Responses API.
 			// Unlike Chat Completions, system/developer roles in input have no special semantics here.
 			// The official way to set system behavior is the top-level `instructions` field.
 			instructions: systemPrompt,
-			...(reasoningEffort && {
-				reasoning: {
-					effort: reasoningEffort,
-					...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
-				},
-			}),
+			include: ["reasoning.encrypted_content"],
+			...(reasoningEffort
+				? {
+						reasoning: {
+							...(reasoningEffort ? { effort: reasoningEffort } : {}),
+							...(this.options.enableGpt5ReasoningSummary ? { summary: "auto" as const } : {}),
+						},
+					}
+				: {}),
 			// Only include temperature if the model supports it
 			...(model.info.supportsTemperature !== false && {
 				temperature:
@@ -277,7 +236,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			// Explicitly include the calculated max output tokens.
 			// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
 			...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
-			...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
 			// Include tier when selected and supported by the model, or when explicitly "default"
 			...(requestedTier &&
 				(requestedTier === "default" || allowedTierNames.has(requestedTier)) && {
@@ -316,60 +274,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 				}
 			}
 		} catch (sdkErr: any) {
-			// Check if this is a 400 error about previous_response_id not found
-			const errorMessage = sdkErr?.message || sdkErr?.error?.message || ""
-			const is400Error = sdkErr?.status === 400 || sdkErr?.response?.status === 400
-			const isPreviousResponseError =
-				errorMessage.includes("Previous response") || errorMessage.includes("not found")
-
-			if (is400Error && requestBody.previous_response_id && isPreviousResponseError) {
-				// Log the error and retry without the previous_response_id
-
-				// Clear the stored lastResponseId to prevent using it again
-				this.lastResponseId = undefined
-
-				// Re-prepare the full conversation without previous_response_id
-				let retryRequestBody = { ...requestBody }
-				delete retryRequestBody.previous_response_id
-
-				// If we have the original messages, re-prepare the full conversation
-				if (systemPrompt && messages) {
-					const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
-					retryRequestBody.input = formattedInput
-				}
-
-				try {
-					// Retry with the SDK
-					const retryStream = (await (this.client as any).responses.create(
-						retryRequestBody,
-					)) as AsyncIterable<any>
-
-					if (typeof (retryStream as any)[Symbol.asyncIterator] !== "function") {
-						// If SDK fails, fall back to SSE
-						yield* this.makeGpt5ResponsesAPIRequest(
-							retryRequestBody,
-							model,
-							metadata,
-							systemPrompt,
-							messages,
-						)
-						return
-					}
-
-					for await (const event of retryStream) {
-						for await (const outChunk of this.processEvent(event, model)) {
-							yield outChunk
-						}
-					}
-					return
-				} catch (retryErr) {
-					// If retry also fails, fall back to SSE
-					yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata, systemPrompt, messages)
-					return
-				}
-			}
-
-			// For other errors, fallback to manual SSE via fetch
+			// For errors, fallback to manual SSE via fetch
 			yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages)
 		}
 	}
@@ -377,6 +282,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	private formatFullConversation(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): any {
 		// Format the entire conversation history for the Responses API using structured format
 		// This supports both text and images
+		// Messages already include reasoning items from API history, so we just need to format them
 		const formattedMessages: any[] = []
 
 		// Do NOT embed the system prompt as a developer message in the Responses API input.
@@ -384,6 +290,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
 		// Process each message
 		for (const message of messages) {
+			// Check if this is a reasoning item (already formatted in API history)
+			if ((message as any).type === "reasoning") {
+				// Pass through reasoning items as-is
+				formattedMessages.push(message)
+				continue
+			}
+
 			const role = message.role === "user" ? "user" : "assistant"
 			const content: any[] = []
 
@@ -421,40 +334,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		return formattedMessages
 	}
 
-	private formatSingleStructuredMessage(message: Anthropic.Messages.MessageParam): any {
-		// Format a single message for the Responses API when using previous_response_id
-		// When using previous_response_id, we only send the latest user message
-		const role = message.role === "user" ? "user" : "assistant"
-
-		if (typeof message.content === "string") {
-			// For simple string content, return structured format with proper type
-			return {
-				role,
-				content: [{ type: "input_text", text: message.content }],
-			}
-		} else if (Array.isArray(message.content)) {
-			// Extract text and image content from blocks
-			const content: any[] = []
-
-			for (const block of message.content) {
-				if (block.type === "text") {
-					// User messages use input_text
-					content.push({ type: "input_text", text: (block as any).text })
-				} else if (block.type === "image") {
-					const image = block as Anthropic.Messages.ImageBlockParam
-					const imageUrl = `data:${image.source.media_type};base64,${image.source.data}`
-					content.push({ type: "input_image", image_url: imageUrl })
-				}
-			}
-
-			if (content.length > 0) {
-				return { role, content }
-			}
-		}
-
-		return null
-	}
-
 	private async *makeGpt5ResponsesAPIRequest(
 		requestBody: any,
 		model: OpenAiNativeModel,
@@ -498,53 +377,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 					errorDetails = errorText
 				}
 
-				// Check if this is a 400 error about previous_response_id not found
-				const isPreviousResponseError =
-					errorDetails.includes("Previous response") || errorDetails.includes("not found")
-
-				if (response.status === 400 && requestBody.previous_response_id && isPreviousResponseError) {
-					// Log the error and retry without the previous_response_id
-
-					// Clear the stored lastResponseId to prevent using it again
-					this.lastResponseId = undefined
-					// Resolve the promise once to unblock any waiting requests
-					this.resolveResponseId(undefined)
-
-					// Re-prepare the full conversation without previous_response_id
-					let retryRequestBody = { ...requestBody }
-					delete retryRequestBody.previous_response_id
-
-					// If we have the original messages, re-prepare the full conversation
-					if (systemPrompt && messages) {
-						const { formattedInput } = this.prepareStructuredInput(systemPrompt, messages, undefined)
-						retryRequestBody.input = formattedInput
-					}
-
-					// Retry the request with full conversation context
-					const retryResponse = await fetch(url, {
-						method: "POST",
-						headers: {
-							"Content-Type": "application/json",
-							Authorization: `Bearer ${apiKey}`,
-							Accept: "text/event-stream",
-						},
-						body: JSON.stringify(retryRequestBody),
-					})
-
-					if (!retryResponse.ok) {
-						// If retry also fails, throw the original error
-						throw new Error(`Responses API retry failed (${retryResponse.status})`)
-					}
-
-					if (!retryResponse.body) {
-						throw new Error("Responses API error: No response body from retry request")
-					}
-
-					// Handle the successful retry response
-					yield* this.handleStreamResponse(retryResponse.body, model)
-					return
-				}
-
 				// Provide user-friendly error messages based on status code
 				switch (response.status) {
 					case 400:
@@ -600,47 +432,6 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		}
 	}
 
-	/**
-	 * Prepares the input and conversation continuity parameters for a Responses API call.
-	 * Decides whether to send full conversation or just the latest message based on previousResponseId.
-	 *
-	 * - If a `previousResponseId` is available (either from metadata or the handler's state),
-	 *   it formats only the most recent user message for the input and returns the response ID
-	 *   to maintain conversation context.
-	 * - Otherwise, it formats the entire conversation history (system prompt + messages) for the input.
-	 *
-	 * @returns An object containing the formatted input and the previous response ID (if used).
-	 */
-	private prepareStructuredInput(
-		systemPrompt: string,
-		messages: Anthropic.Messages.MessageParam[],
-		metadata?: ApiHandlerCreateMessageMetadata,
-	): { formattedInput: any; previousResponseId?: string } {
-		// Note: suppressPreviousResponseId is handled in handleResponsesApiMessage
-		// This method now only handles formatting based on whether we have a previous response ID
-
-		// Check for previous response ID from metadata or fallback to lastResponseId
-		const isFirstMessage = messages.length === 1 && messages[0].role === "user"
-		const previousResponseId = metadata?.previousResponseId ?? (!isFirstMessage ? this.lastResponseId : undefined)
-
-		if (previousResponseId) {
-			// When using previous_response_id, only send the latest user message
-			const lastUserMessage = [...messages].reverse().find((msg) => msg.role === "user")
-			if (lastUserMessage) {
-				const formattedMessage = this.formatSingleStructuredMessage(lastUserMessage)
-				// formatSingleStructuredMessage now always returns an object with role and content
-				if (formattedMessage) {
-					return { formattedInput: [formattedMessage], previousResponseId }
-				}
-			}
-			return { formattedInput: [], previousResponseId }
-		} else {
-			// Format full conversation history (returns an array of structured messages)
-			const formattedInput = this.formatFullConversation(systemPrompt, messages)
-			return { formattedInput }
-		}
-	}
-
 	/**
 	 * Handles the streaming response from the Responses API.
 	 *
@@ -675,14 +466,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 						try {
 							const parsed = JSON.parse(data)
 
-							// Store response ID for conversation continuity
-							if (parsed.response?.id) {
-								this.resolveResponseId(parsed.response.id)
-							}
 							// Capture resolved service tier if present
 							if (parsed.response?.service_tier) {
 								this.lastServiceTier = parsed.response.service_tier as ServiceTier
 							}
+							// Capture complete output array (includes reasoning items with encrypted_content)
+							if (parsed.response?.output && Array.isArray(parsed.response.output)) {
+								this.lastResponseOutput = parsed.response.output
+							}
+							// Capture top-level response id
+							if (parsed.response?.id) {
+								this.lastResponseId = parsed.response.id as string
+							}
 
 							// Delegate standard event types to the shared processor to avoid duplication
 							if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) {
@@ -970,14 +765,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 									)
 								}
 							} else if (parsed.type === "response.completed" || parsed.type === "response.done") {
-								// Store response ID for conversation continuity
-								if (parsed.response?.id) {
-									this.resolveResponseId(parsed.response.id)
-								}
 								// Capture resolved service tier if present
 								if (parsed.response?.service_tier) {
 									this.lastServiceTier = parsed.response.service_tier as ServiceTier
 								}
+								// Capture top-level response id
+								if (parsed.response?.id) {
+									this.lastResponseId = parsed.response.id as string
+								}
+								// Capture complete output array (includes reasoning items with encrypted_content)
+								if (parsed.response?.output && Array.isArray(parsed.response.output)) {
+									this.lastResponseOutput = parsed.response.output
+								}
 
 								// Check if the done event contains the complete output (as a fallback)
 								if (
@@ -1098,14 +897,18 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	 * Shared processor for Responses API events.
 	 */
 	private async *processEvent(event: any, model: OpenAiNativeModel): ApiStream {
-		// Persist response id for conversation continuity when available
-		if (event?.response?.id) {
-			this.resolveResponseId(event.response.id)
-		}
 		// Capture resolved service tier when available
 		if (event?.response?.service_tier) {
 			this.lastServiceTier = event.response.service_tier as ServiceTier
 		}
+		// Capture complete output array (includes reasoning items with encrypted_content)
+		if (event?.response?.output && Array.isArray(event.response.output)) {
+			this.lastResponseOutput = event.response.output
+		}
+		// Capture top-level response id
+		if (event?.response?.id) {
+			this.lastResponseId = event.response.id as string
+		}
 
 		// Handle known streaming text deltas
 		if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
@@ -1251,21 +1054,29 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	}
 
 	/**
-	 * Gets the last response ID captured from the Responses API stream.
-	 * Used for maintaining conversation continuity across requests.
-	 * @returns The response ID, or undefined if not available yet
+	 * Extracts encrypted_content and id from the first reasoning item in the output array.
+	 * This is the minimal data needed for stateless API continuity.
+	 *
+	 * @returns Object with encrypted_content and id, or undefined if not available
 	 */
-	getLastResponseId(): string | undefined {
-		return this.lastResponseId
+	getEncryptedContent(): { encrypted_content: string; id?: string } | undefined {
+		if (!this.lastResponseOutput) return undefined
+
+		// Find the first reasoning item with encrypted_content
+		const reasoningItem = this.lastResponseOutput.find(
+			(item) => item.type === "reasoning" && item.encrypted_content,
+		)
+
+		if (!reasoningItem?.encrypted_content) return undefined
+
+		return {
+			encrypted_content: reasoningItem.encrypted_content,
+			...(reasoningItem.id ? { id: reasoningItem.id } : {}),
+		}
 	}
 
-	/**
-	 * Sets the last response ID for conversation continuity.
-	 * Typically only used in tests or special flows.
-	 * @param responseId The response ID to store
-	 */
-	setResponseId(responseId: string): void {
-		this.lastResponseId = responseId
+	getResponseId(): string | undefined {
+		return this.lastResponseId
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
@@ -1287,6 +1098,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 				],
 				stream: false, // Non-streaming for completePrompt
 				store: false, // Don't store prompt completions
+				include: ["reasoning.encrypted_content"],
 			}
 
 			// Include service tier if selected and supported

+ 0 - 3
src/core/task-persistence/__tests__/taskMessages.spec.ts

@@ -33,9 +33,6 @@ describe("taskMessages.saveTaskMessages", () => {
 				role: "assistant",
 				content: "Hello",
 				metadata: {
-					gpt5: {
-						previous_response_id: "resp_123",
-					},
 					other: "keep",
 				},
 			},

+ 9 - 1
src/core/task-persistence/apiMessages.ts

@@ -9,7 +9,15 @@ import { fileExistsAtPath } from "../../utils/fs"
 import { GlobalFileNames } from "../../shared/globalFileNames"
 import { getTaskDirectoryPath } from "../../utils/storage"
 
-export type ApiMessage = Anthropic.MessageParam & { ts?: number; isSummary?: boolean }
+export type ApiMessage = Anthropic.MessageParam & {
+	ts?: number
+	isSummary?: boolean
+	id?: string
+	// For reasoning items stored in API history
+	type?: "reasoning"
+	summary?: any[]
+	encrypted_content?: string
+}
 
 export async function readApiMessages({
 	taskId,

+ 65 - 130
src/core/task/Task.ts

@@ -114,7 +114,6 @@ import {
 } from "../checkpoints"
 import { processUserContentMentions } from "../mentions/processUserContentMentions"
 import { getMessagesSinceLastSummary, summarizeConversation } from "../condense"
-import { Gpt5Metadata, ClineMessageWithMetadata } from "./types"
 import { MessageQueueService } from "../message-queue/MessageQueueService"
 import { AutoApprovalHandler, checkAutoApproval } from "../auto-approval"
 
@@ -296,8 +295,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	didAlreadyUseTool = false
 	didCompleteReadingStream = false
 	assistantMessageParser: AssistantMessageParser
-	private lastUsedInstructions?: string
-	private skipPrevResponseIdOnce: boolean = false
 
 	// Token Usage Cache
 	private tokenUsageSnapshot?: TokenUsage
@@ -599,8 +596,39 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	}
 
 	private async addToApiConversationHistory(message: Anthropic.MessageParam) {
-		const messageWithTs = { ...message, ts: Date.now() }
-		this.apiConversationHistory.push(messageWithTs)
+		// Capture the encrypted_content from the provider (e.g., OpenAI Responses API) if present.
+		// We only persist data reported by the current response body.
+		const handler = this.api as ApiHandler & {
+			getResponseId?: () => string | undefined
+			getEncryptedContent?: () => { encrypted_content: string; id?: string } | undefined
+		}
+
+		if (message.role === "assistant") {
+			const responseId = handler.getResponseId?.()
+			const reasoningData = handler.getEncryptedContent?.()
+
+			// If we have encrypted_content, add it as a reasoning item before the assistant message
+			if (reasoningData?.encrypted_content) {
+				this.apiConversationHistory.push({
+					type: "reasoning",
+					summary: [],
+					encrypted_content: reasoningData.encrypted_content,
+					...(reasoningData.id ? { id: reasoningData.id } : {}),
+					ts: Date.now(),
+				} as any)
+			}
+
+			const messageWithTs = {
+				...message,
+				...(responseId ? { id: responseId } : {}),
+				ts: Date.now(),
+			}
+			this.apiConversationHistory.push(messageWithTs)
+		} else {
+			const messageWithTs = { ...message, ts: Date.now() }
+			this.apiConversationHistory.push(messageWithTs)
+		}
+
 		await this.saveApiConversationHistory()
 	}
 
@@ -647,18 +675,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
 	public async overwriteClineMessages(newMessages: ClineMessage[]) {
 		this.clineMessages = newMessages
-
-		// If deletion or history truncation leaves a condense_context as the last message,
-		// ensure the next API call suppresses previous_response_id so the condensed context is respected.
-		try {
-			const last = this.clineMessages.at(-1)
-			if (last && last.type === "say" && last.say === "condense_context") {
-				this.skipPrevResponseIdOnce = true
-			}
-		} catch {
-			// non-fatal
-		}
-
 		restoreTodoListForTask(this)
 		await this.saveClineMessages()
 	}
@@ -1089,9 +1105,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		}
 		await this.overwriteApiConversationHistory(messages)
 
-		// Set flag to skip previous_response_id on the next API call after manual condense
-		this.skipPrevResponseIdOnce = true
-
 		const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens }
 		await this.say(
 			"condense_context",
@@ -1117,7 +1130,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		progressStatus?: ToolProgressStatus,
 		options: {
 			isNonInteractive?: boolean
-			metadata?: Record<string, unknown>
 		} = {},
 		contextCondense?: ContextCondense,
 	): Promise<undefined> {
@@ -1155,7 +1167,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						images,
 						partial,
 						contextCondense,
-						metadata: options.metadata,
 					})
 				}
 			} else {
@@ -1171,14 +1182,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					lastMessage.images = images
 					lastMessage.partial = false
 					lastMessage.progressStatus = progressStatus
-					if (options.metadata) {
-						// Add metadata to the message
-						const messageWithMetadata = lastMessage as ClineMessage & ClineMessageWithMetadata
-						if (!messageWithMetadata.metadata) {
-							messageWithMetadata.metadata = {}
-						}
-						Object.assign(messageWithMetadata.metadata, options.metadata)
-					}
 
 					// Instead of streaming partialMessage events, we do a save
 					// and post like normal to persist to disk.
@@ -1201,7 +1204,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						text,
 						images,
 						contextCondense,
-						metadata: options.metadata,
 					})
 				}
 			}
@@ -1296,20 +1298,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
 		const modifiedClineMessages = await this.getSavedClineMessages()
 
-		// Check for any stored GPT-5 response IDs in the message history.
-		const gpt5Messages = modifiedClineMessages.filter(
-			(m): m is ClineMessage & ClineMessageWithMetadata =>
-				m.type === "say" &&
-				m.say === "text" &&
-				!!(m as ClineMessageWithMetadata).metadata?.gpt5?.previous_response_id,
-		)
-
-		if (gpt5Messages.length > 0) {
-			const lastGpt5Message = gpt5Messages[gpt5Messages.length - 1]
-			// The lastGpt5Message contains the previous_response_id that can be
-			// used for continuity.
-		}
-
 		// Remove any resume messages that may have been added before.
 		const lastRelevantMessageIndex = findLastIndex(
 			modifiedClineMessages,
@@ -1720,10 +1708,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				role: "user",
 				content: [{ type: "text", text: `[new_task completed] Result: ${lastMessage}` }],
 			})
-
-			// Set skipPrevResponseIdOnce to ensure the next API call sends the full conversation
-			// including the subtask result, not just from before the subtask was created
-			this.skipPrevResponseIdOnce = true
 		} catch (error) {
 			this.providerRef
 				.deref()
@@ -2377,7 +2361,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					}
 				}
 
-				await this.persistGpt5Metadata()
 				await this.saveClineMessages()
 				await this.providerRef.deref()?.postStateToWebview()
 
@@ -2762,7 +2745,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		Task.lastGlobalApiRequestTime = performance.now()
 
 		const systemPrompt = await this.getSystemPrompt()
-		this.lastUsedInstructions = systemPrompt
 		const { contextTokens } = this.getTokenUsage()
 
 		if (contextTokens) {
@@ -2800,10 +2782,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			if (truncateResult.error) {
 				await this.say("condense_context_error", truncateResult.error)
 			} else if (truncateResult.summary) {
-				// A condense operation occurred; for the next GPT‑5 API call we should NOT
-				// send previous_response_id so the request reflects the fresh condensed context.
-				this.skipPrevResponseIdOnce = true
-
 				const { summary, cost, prevContextTokens, newContextTokens = 0 } = truncateResult
 				const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens }
 				await this.say(
@@ -2819,10 +2797,33 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			}
 		}
 
+		// Properly type cleaned conversation history to include either standard Anthropic messages
+		// or provider-specific reasoning items (for encrypted continuity).
+		type ReasoningItemForRequest = {
+			type: "reasoning"
+			encrypted_content: string
+			id?: string
+			summary?: any[]
+		}
+		type CleanConversationMessage = Anthropic.Messages.MessageParam | ReasoningItemForRequest
+
 		const messagesSinceLastSummary = getMessagesSinceLastSummary(this.apiConversationHistory)
-		let cleanConversationHistory = maybeRemoveImageBlocks(messagesSinceLastSummary, this.api).map(
-			({ role, content }) => ({ role, content }),
-		)
+		const cleanConversationHistory: CleanConversationMessage[] = maybeRemoveImageBlocks(
+			messagesSinceLastSummary,
+			this.api,
+		).map((msg: ApiMessage): CleanConversationMessage => {
+			// Pass through reasoning items as-is (including id if present)
+			if (msg.type === "reasoning") {
+				return {
+					type: "reasoning",
+					summary: msg.summary,
+					encrypted_content: msg.encrypted_content!,
+					...(msg.id ? { id: msg.id } : {}),
+				}
+			}
+			// For regular messages, just return role and content
+			return { role: msg.role!, content: msg.content as Anthropic.Messages.ContentBlockParam[] | string }
+		})
 
 		// Check auto-approval limits
 		const approvalResult = await this.autoApprovalHandler.checkAutoApprovalLimits(
@@ -2836,48 +2837,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			throw new Error("Auto-approval limit reached and user did not approve continuation")
 		}
 
-		// Determine GPT‑5 previous_response_id from last persisted assistant turn (if available),
-		// unless a condense just occurred (skip once after condense).
-		let previousResponseId: string | undefined = undefined
-		try {
-			const modelId = this.api.getModel().id
-			if (modelId && modelId.startsWith("gpt-5") && !this.skipPrevResponseIdOnce) {
-				// Find the last assistant message that has a previous_response_id stored
-				const idx = findLastIndex(
-					this.clineMessages,
-					(m): m is ClineMessage & ClineMessageWithMetadata =>
-						m.type === "say" &&
-						m.say === "text" &&
-						!!(m as ClineMessageWithMetadata).metadata?.gpt5?.previous_response_id,
-				)
-				if (idx !== -1) {
-					// Use the previous_response_id from the last assistant message for this request
-					const message = this.clineMessages[idx] as ClineMessage & ClineMessageWithMetadata
-					previousResponseId = message.metadata?.gpt5?.previous_response_id
-				}
-			} else if (this.skipPrevResponseIdOnce) {
-				// Skipping previous_response_id due to recent condense operation - will send full conversation context
-			}
-		} catch (error) {
-			console.error(`[Task#${this.taskId}] Error retrieving GPT-5 response ID:`, error)
-			// non-fatal
-		}
-
 		const metadata: ApiHandlerCreateMessageMetadata = {
 			mode: mode,
 			taskId: this.taskId,
-			// Only include previousResponseId if we're NOT suppressing it
-			...(previousResponseId && !this.skipPrevResponseIdOnce ? { previousResponseId } : {}),
-			// If a condense just occurred, explicitly suppress continuity fallback for the next call
-			...(this.skipPrevResponseIdOnce ? { suppressPreviousResponseId: true } : {}),
-		}
-
-		// Reset skip flag after applying (it only affects the immediate next call)
-		if (this.skipPrevResponseIdOnce) {
-			this.skipPrevResponseIdOnce = false
 		}
 
-		const stream = this.api.createMessage(systemPrompt, cleanConversationHistory, metadata)
+		// The provider accepts reasoning items alongside standard messages; cast to the expected parameter type.
+		const stream = this.api.createMessage(
+			systemPrompt,
+			cleanConversationHistory as unknown as Anthropic.Messages.MessageParam[],
+			metadata,
+		)
 		const iterator = stream[Symbol.asyncIterator]()
 
 		try {
@@ -3080,41 +3050,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		}
 	}
 
-	/**
-	 * Persist GPT-5 per-turn metadata (previous_response_id only)
-	 * onto the last complete assistant say("text") message.
-	 *
-	 * Note: We do not persist system instructions or reasoning summaries.
-	 */
-	private async persistGpt5Metadata(): Promise<void> {
-		try {
-			const modelId = this.api.getModel().id
-			if (!modelId || !modelId.startsWith("gpt-5")) return
-
-			// Check if the API handler has a getLastResponseId method (OpenAiNativeHandler specific)
-			const handler = this.api as ApiHandler & { getLastResponseId?: () => string | undefined }
-			const lastResponseId = handler.getLastResponseId?.()
-			const idx = findLastIndex(
-				this.clineMessages,
-				(m) => m.type === "say" && m.say === "text" && m.partial !== true,
-			)
-			if (idx !== -1) {
-				const msg = this.clineMessages[idx] as ClineMessage & ClineMessageWithMetadata
-				if (!msg.metadata) {
-					msg.metadata = {}
-				}
-				const gpt5Metadata: Gpt5Metadata = {
-					...(msg.metadata.gpt5 ?? {}),
-					...(lastResponseId ? { previous_response_id: lastResponseId } : {}),
-				}
-				msg.metadata.gpt5 = gpt5Metadata
-			}
-		} catch (error) {
-			console.error(`[Task#${this.taskId}] Error persisting GPT-5 metadata:`, error)
-			// Non-fatal error in metadata persistence
-		}
-	}
-
 	// Getters
 
 	public get taskStatus(): TaskStatus {

+ 0 - 63
src/core/task/__tests__/Task.spec.ts

@@ -1623,69 +1623,6 @@ describe("Cline", () => {
 		})
 	})
 
-	describe("Conversation continuity after condense and deletion", () => {
-		it("should set suppressPreviousResponseId when last message is condense_context", async () => {
-			// Arrange: create task
-			const task = new Task({
-				provider: mockProvider,
-				apiConfiguration: mockApiConfig,
-				task: "initial task",
-				startTask: false,
-			})
-
-			// Ensure provider state returns required fields for attemptApiRequest
-			mockProvider.getState = vi.fn().mockResolvedValue({
-				apiConfiguration: mockApiConfig,
-			})
-
-			// Simulate deletion that leaves a condense_context as the last message
-			const condenseMsg = {
-				ts: Date.now(),
-				type: "say" as const,
-				say: "condense_context" as const,
-				contextCondense: {
-					summary: "summarized",
-					cost: 0.001,
-					prevContextTokens: 1200,
-					newContextTokens: 400,
-				},
-			}
-			await task.overwriteClineMessages([condenseMsg])
-
-			// Spy and return a minimal successful stream to exercise attemptApiRequest
-			const mockStream = {
-				async *[Symbol.asyncIterator]() {
-					yield { type: "text", text: "ok" }
-				},
-				async next() {
-					return { done: true, value: { type: "text", text: "ok" } }
-				},
-				async return() {
-					return { done: true, value: undefined }
-				},
-				async throw(e: any) {
-					throw e
-				},
-				[Symbol.asyncDispose]: async () => {},
-			} as AsyncGenerator<ApiStreamChunk>
-
-			const createMessageSpy = vi.spyOn(task.api, "createMessage").mockReturnValue(mockStream)
-
-			// Act: initiate an API request
-			const iterator = task.attemptApiRequest(0)
-			await iterator.next() // read first chunk to ensure call happened
-
-			// Assert: metadata includes suppressPreviousResponseId set to true
-			expect(createMessageSpy).toHaveBeenCalled()
-			const callArgs = createMessageSpy.mock.calls[0]
-			// Args: [systemPrompt, cleanConversationHistory, metadata]
-			const metadata = callArgs?.[2]
-			expect(metadata?.suppressPreviousResponseId).toBe(true)
-
-			// The skip flag should be reset after the call
-			expect((task as any).skipPrevResponseIdOnce).toBe(false)
-		})
-	})
 	describe("abortTask", () => {
 		it("should set abort flag and emit TaskAborted event", async () => {
 			const task = new Task({

+ 0 - 25
src/core/task/types.ts

@@ -1,25 +0,0 @@
-/**
- * Type definitions for Task-related metadata
- */
-
-/**
- * GPT-5 specific metadata stored with assistant messages
- * for maintaining conversation continuity across requests
- */
-export interface Gpt5Metadata {
-	/**
-	 * The response ID from the previous GPT-5 API response
-	 * Used to maintain conversation continuity in subsequent requests
-	 */
-	previous_response_id?: string
-}
-
-/**
- * Extended ClineMessage type with GPT-5 metadata
- */
-export interface ClineMessageWithMetadata {
-	metadata?: {
-		gpt5?: Gpt5Metadata
-		[key: string]: any
-	}
-}

+ 0 - 1
webview-ui/src/components/chat/ChatRow.tsx

@@ -1062,7 +1062,6 @@ export const ChatRowContent = ({
 							ts={message.ts}
 							isStreaming={isStreaming}
 							isLast={isLast}
-							metadata={message.metadata as any}
 						/>
 					)
 				case "api_req_started":