Просмотр исходного кода

fix: surface actual API error messages instead of generic NoOutputGeneratedError (#11359)

Hannes Rudolph 4 дней назад
Родитель
Сommit
2f9849071d

+ 15 - 4
src/api/providers/anthropic-vertex.ts

@@ -175,6 +175,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 		try {
 			const result = streamText(requestOptions)
 
+			let lastStreamError: string | undefined
 			for await (const part of result.fullStream) {
 				// Capture thinking signature from stream events
 				// The AI SDK's @ai-sdk/anthropic emits the signature as a reasoning-delta
@@ -193,15 +194,25 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 				}
 
 				for (const chunk of processAiSdkStreamPart(part)) {
+					if (chunk.type === "error") {
+						lastStreamError = chunk.message
+					}
 					yield chunk
 				}
 			}
 
 			// Yield usage metrics at the end, including cache metrics from providerMetadata
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
+			try {
+				const usage = await result.usage
+				const providerMetadata = await result.providerMetadata
+				if (usage) {
+					yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
+				}
+			} catch (usageError) {
+				if (lastStreamError) {
+					throw new Error(lastStreamError)
+				}
+				throw usageError
 			}
 		} catch (error) {
 			const errorMessage = error instanceof Error ? error.message : String(error)

+ 15 - 4
src/api/providers/anthropic.ts

@@ -151,6 +151,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 		try {
 			const result = streamText(requestOptions)
 
+			let lastStreamError: string | undefined
 			for await (const part of result.fullStream) {
 				// Capture thinking signature from stream events
 				// The AI SDK's @ai-sdk/anthropic emits the signature as a reasoning-delta
@@ -169,15 +170,25 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 				}
 
 				for (const chunk of processAiSdkStreamPart(part)) {
+					if (chunk.type === "error") {
+						lastStreamError = chunk.message
+					}
 					yield chunk
 				}
 			}
 
 			// Yield usage metrics at the end, including cache metrics from providerMetadata
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
+			try {
+				const usage = await result.usage
+				const providerMetadata = await result.providerMetadata
+				if (usage) {
+					yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
+				}
+			} catch (usageError) {
+				if (lastStreamError) {
+					throw new Error(lastStreamError)
+				}
+				throw usageError
 			}
 		} catch (error) {
 			const errorMessage = error instanceof Error ? error.message : String(error)

+ 6 - 14
src/api/providers/azure.ts

@@ -9,7 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -159,19 +159,11 @@ export class AzureHandler extends BaseProvider implements SingleCompletionHandle
 		const result = streamText(requestOptions)
 
 		try {
-			// Process the full stream to get all events including reasoning
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			// Yield usage metrics at the end, including cache metrics from providerMetadata
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, providerMetadata as any)
-			}
+			const processUsage = this.processUsageMetrics.bind(this)
+			yield* consumeAiSdkStream(result, async function* () {
+				const [usage, providerMetadata] = await Promise.all([result.usage, result.providerMetadata])
+				yield processUsage(usage, providerMetadata as Parameters<typeof processUsage>[1])
+			})
 		} catch (error) {
 			// Handle AI SDK errors (AI_RetryError, AI_APICallError, etc.)
 			throw handleAiSdkError(error, "Azure AI Foundry")

+ 6 - 11
src/api/providers/baseten.ts

@@ -9,7 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -118,16 +118,11 @@ export class BasetenHandler extends BaseProvider implements SingleCompletionHand
 		const result = streamText(requestOptions)
 
 		try {
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			const usage = await result.usage
-			if (usage) {
-				yield this.processUsageMetrics(usage)
-			}
+			const processUsage = this.processUsageMetrics.bind(this)
+			yield* consumeAiSdkStream(result, async function* () {
+				const usage = await result.usage
+				yield processUsage(usage)
+			})
 		} catch (error) {
 			throw handleAiSdkError(error, "Baseten")
 		}

+ 16 - 4
src/api/providers/bedrock.ts

@@ -343,6 +343,8 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		try {
 			const result = streamText(requestOptions)
 
+			let lastStreamError: string | undefined
+
 			// Process the full stream
 			for await (const part of result.fullStream) {
 				// Capture thinking signature from stream events.
@@ -371,15 +373,25 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 				}
 
 				for (const chunk of processAiSdkStreamPart(part)) {
+					if (chunk.type === "error") {
+						lastStreamError = chunk.message
+					}
 					yield chunk
 				}
 			}
 
 			// Yield usage metrics at the end
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
+			try {
+				const usage = await result.usage
+				const providerMetadata = await result.providerMetadata
+				if (usage) {
+					yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
+				}
+			} catch (usageError) {
+				if (lastStreamError) {
+					throw new Error(lastStreamError)
+				}
+				throw usageError
 			}
 		} catch (error) {
 			const errorMessage = error instanceof Error ? error.message : String(error)

+ 6 - 15
src/api/providers/deepseek.ts

@@ -9,7 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -137,21 +137,12 @@ export class DeepSeekHandler extends BaseProvider implements SingleCompletionHan
 		const result = streamText(requestOptions)
 
 		try {
-			// Process the full stream to get all events including reasoning
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			// Yield usage metrics at the end, including cache metrics from providerMetadata
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, providerMetadata as any)
-			}
+			const processUsage = this.processUsageMetrics.bind(this)
+			yield* consumeAiSdkStream(result, async function* () {
+				const [usage, providerMetadata] = await Promise.all([result.usage, result.providerMetadata])
+				yield processUsage(usage, providerMetadata as Parameters<typeof processUsage>[1])
+			})
 		} catch (error) {
-			// Handle AI SDK errors (AI_RetryError, AI_APICallError, etc.)
 			throw handleAiSdkError(error, "DeepSeek")
 		}
 	}

+ 6 - 15
src/api/providers/fireworks.ts

@@ -9,7 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -137,21 +137,12 @@ export class FireworksHandler extends BaseProvider implements SingleCompletionHa
 		const result = streamText(requestOptions)
 
 		try {
-			// Process the full stream to get all events including reasoning
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			// Yield usage metrics at the end, including cache metrics from providerMetadata
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, providerMetadata as any)
-			}
+			const processUsage = this.processUsageMetrics.bind(this)
+			yield* consumeAiSdkStream(result, async function* () {
+				const [usage, providerMetadata] = await Promise.all([result.usage, result.providerMetadata])
+				yield processUsage(usage, providerMetadata as Parameters<typeof processUsage>[1])
+			})
 		} catch (error) {
-			// Handle AI SDK errors (AI_RetryError, AI_APICallError, etc.)
 			throw handleAiSdkError(error, "Fireworks")
 		}
 	}

+ 16 - 1
src/api/providers/gemini.ts

@@ -133,6 +133,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 
 			// Track whether any text content was yielded (not just reasoning/thinking)
 			let hasContent = false
+			let lastStreamError: string | undefined
 
 			// Process the full stream to get all events including reasoning
 			for await (const part of result.fullStream) {
@@ -146,6 +147,9 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 				}
 
 				for (const chunk of processAiSdkStreamPart(part)) {
+					if (chunk.type === "error") {
+						lastStreamError = chunk.message
+					}
 					if (chunk.type === "text" || chunk.type === "tool_call_start") {
 						hasContent = true
 					}
@@ -163,7 +167,15 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 			}
 
 			// Extract grounding sources from providerMetadata if available
-			const providerMetadata = await result.providerMetadata
+			let providerMetadata: Awaited<typeof result.providerMetadata>
+			try {
+				providerMetadata = await result.providerMetadata
+			} catch (metaError) {
+				if (lastStreamError) {
+					throw new Error(lastStreamError)
+				}
+				throw metaError
+			}
 			const groundingMetadata = providerMetadata?.google as
 				| {
 						groundingMetadata?: {
@@ -190,6 +202,9 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 					yield this.processUsageMetrics(usage, info, providerMetadata)
 				}
 			} catch (usageError) {
+				if (lastStreamError) {
+					throw new Error(lastStreamError)
+				}
 				if (usageError instanceof NoOutputGeneratedError) {
 					// If we already yielded the empty-stream message, suppress this error
 					if (hasContent) {

+ 6 - 11
src/api/providers/lm-studio.ts

@@ -8,7 +8,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -79,16 +79,11 @@ export class LmStudioHandler extends OpenAICompatibleHandler implements SingleCo
 		const result = streamText(requestOptions)
 
 		try {
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			const usage = await result.usage
-			if (usage) {
-				yield this.processUsageMetrics(usage)
-			}
+			const processUsage = this.processUsageMetrics.bind(this)
+			yield* consumeAiSdkStream(result, async function* () {
+				const usage = await result.usage
+				yield processUsage(usage)
+			})
 		} catch (error) {
 			throw handleAiSdkError(error, "LM Studio")
 		}

+ 16 - 4
src/api/providers/minimax.ts

@@ -128,6 +128,8 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand
 
 		try {
 			const result = streamText(requestOptions as Parameters<typeof streamText>[0])
+	
+			let lastStreamError: string | undefined
 
 			for await (const part of result.fullStream) {
 				const anthropicMetadata = (
@@ -153,14 +155,24 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand
 				}
 
 				for (const chunk of processAiSdkStreamPart(part)) {
+					if (chunk.type === "error") {
+						lastStreamError = chunk.message
+					}
 					yield chunk
 				}
 			}
 
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
+			try {
+				const usage = await result.usage
+				const providerMetadata = await result.providerMetadata
+				if (usage) {
+					yield this.processUsageMetrics(usage, modelConfig.info, providerMetadata)
+				}
+			} catch (usageError) {
+				if (lastStreamError) {
+					throw new Error(lastStreamError)
+				}
+				throw usageError
 			}
 		} catch (error) {
 			throw handleAiSdkError(error, this.providerName)

+ 6 - 19
src/api/providers/mistral.ts

@@ -12,12 +12,7 @@ import {
 
 import type { ApiHandlerOptions } from "../../shared/api"
 
-import {
-	convertToAiSdkMessages,
-	convertToolsForAiSdk,
-	processAiSdkStreamPart,
-	handleAiSdkError,
-} from "../transform/ai-sdk"
+import { convertToAiSdkMessages, convertToolsForAiSdk, consumeAiSdkStream, handleAiSdkError } from "../transform/ai-sdk"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
 
@@ -170,20 +165,12 @@ export class MistralHandler extends BaseProvider implements SingleCompletionHand
 		const result = streamText(requestOptions)
 
 		try {
-			// Process the full stream to get all events including reasoning
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			// Yield usage metrics at the end
-			const usage = await result.usage
-			if (usage) {
-				yield this.processUsageMetrics(usage)
-			}
+			const processUsage = this.processUsageMetrics.bind(this)
+			yield* consumeAiSdkStream(result, async function* () {
+				const usage = await result.usage
+				yield processUsage(usage)
+			})
 		} catch (error) {
-			// Handle AI SDK errors (AI_RetryError, AI_APICallError, etc.)
 			throw handleAiSdkError(error, "Mistral")
 		}
 	}

+ 56 - 43
src/api/providers/openai-codex.ts

@@ -203,64 +203,77 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion
 				})
 
 				// Stream parts
+				let lastStreamError: string | undefined
+
 				for await (const part of result.fullStream) {
 					for (const chunk of processAiSdkStreamPart(part)) {
+						if (chunk.type === "error") {
+							lastStreamError = chunk.message
+						}
 						yield chunk
 					}
 				}
 
-				// Extract metadata from completed response
-				const providerMeta = await result.providerMetadata
-				const openaiMeta = (providerMeta as any)?.openai
+				// Extract metadata and usage — wrap in try/catch for stream error fallback
+				try {
+					// Extract metadata from completed response
+					const providerMeta = await result.providerMetadata
+					const openaiMeta = (providerMeta as any)?.openai
 
-				if (openaiMeta?.responseId) {
-					this.lastResponseId = openaiMeta.responseId
-				}
+					if (openaiMeta?.responseId) {
+						this.lastResponseId = openaiMeta.responseId
+					}
 
-				// Capture encrypted content from reasoning parts in the response
-				try {
-					const content = await (result as any).content
-					if (Array.isArray(content)) {
-						for (const part of content) {
-							if (part.type === "reasoning" && part.providerMetadata) {
-								const partMeta = (part.providerMetadata as any)?.openai
-								if (partMeta?.reasoningEncryptedContent) {
-									this.lastEncryptedContent = {
-										encrypted_content: partMeta.reasoningEncryptedContent,
-										...(partMeta.itemId ? { id: partMeta.itemId } : {}),
+					// Capture encrypted content from reasoning parts in the response
+					try {
+						const content = await (result as any).content
+						if (Array.isArray(content)) {
+							for (const part of content) {
+								if (part.type === "reasoning" && part.providerMetadata) {
+									const partMeta = (part.providerMetadata as any)?.openai
+									if (partMeta?.reasoningEncryptedContent) {
+										this.lastEncryptedContent = {
+											encrypted_content: partMeta.reasoningEncryptedContent,
+											...(partMeta.itemId ? { id: partMeta.itemId } : {}),
+										}
+										break
 									}
-									break
 								}
 							}
 						}
+					} catch {
+						// Content parts with encrypted reasoning may not always be available
 					}
-				} catch {
-					// Content parts with encrypted reasoning may not always be available
-				}
 
-				// Yield usage — subscription pricing means totalCost is always 0
-				const usage = await result.usage
-				if (usage) {
-					const inputTokens = usage.inputTokens || 0
-					const outputTokens = usage.outputTokens || 0
-					const details = (usage as any).details as
-						| { cachedInputTokens?: number; reasoningTokens?: number }
-						| undefined
-					const cacheReadTokens = details?.cachedInputTokens ?? 0
-					// The OpenAI Responses API does not report cache write tokens separately;
-					// only cached (read) tokens are available via usage.details.cachedInputTokens.
-					const cacheWriteTokens = 0
-					const reasoningTokens = details?.reasoningTokens
-
-					yield {
-						type: "usage",
-						inputTokens,
-						outputTokens,
-						cacheWriteTokens: cacheWriteTokens || undefined,
-						cacheReadTokens: cacheReadTokens || undefined,
-						...(typeof reasoningTokens === "number" ? { reasoningTokens } : {}),
-						totalCost: 0, // Subscription-based pricing
+					// Yield usage — subscription pricing means totalCost is always 0
+					const usage = await result.usage
+					if (usage) {
+						const inputTokens = usage.inputTokens || 0
+						const outputTokens = usage.outputTokens || 0
+						const details = (usage as any).details as
+							| { cachedInputTokens?: number; reasoningTokens?: number }
+							| undefined
+						const cacheReadTokens = details?.cachedInputTokens ?? 0
+						// The OpenAI Responses API does not report cache write tokens separately;
+						// only cached (read) tokens are available via usage.details.cachedInputTokens.
+						const cacheWriteTokens = 0
+						const reasoningTokens = details?.reasoningTokens
+
+						yield {
+							type: "usage",
+							inputTokens,
+							outputTokens,
+							cacheWriteTokens: cacheWriteTokens || undefined,
+							cacheReadTokens: cacheReadTokens || undefined,
+							...(typeof reasoningTokens === "number" ? { reasoningTokens } : {}),
+							totalCost: 0, // Subscription-based pricing
+						}
+					}
+				} catch (usageError) {
+					if (lastStreamError) {
+						throw new Error(lastStreamError)
 					}
+					throw usageError
 				}
 
 				// Success — exit the retry loop

+ 6 - 14
src/api/providers/openai-compatible.ts

@@ -15,7 +15,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -152,19 +152,11 @@ export abstract class OpenAICompatibleHandler extends BaseProvider implements Si
 		const result = streamText(requestOptions)
 
 		try {
-			// Process the full stream to get all events
-			for await (const part of result.fullStream) {
-				// Use the processAiSdkStreamPart utility to convert stream parts
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			// Yield usage metrics at the end
-			const usage = await result.usage
-			if (usage) {
-				yield this.processUsageMetrics(usage)
-			}
+			const processUsage = this.processUsageMetrics.bind(this)
+			yield* consumeAiSdkStream(result, async function* () {
+				const usage = await result.usage
+				yield processUsage(usage)
+			})
 		} catch (error) {
 			// Handle AI SDK errors (AI_RetryError, AI_APICallError, etc.)
 			throw handleAiSdkError(error, this.config.providerName)

+ 41 - 34
src/api/providers/openai-native.ts

@@ -22,7 +22,7 @@ import { calculateApiCostOpenAI } from "../../shared/cost"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -463,48 +463,55 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 
 		const result = streamText(requestOptions)
 
+		const processUsage = this.processUsageMetrics.bind(this)
+		const setResponseId = (id: string) => {
+			this.lastResponseId = id
+		}
+		const setServiceTier = (tier: ServiceTier) => {
+			this.lastServiceTier = tier
+		}
+		const setEncryptedContent = (content: { encrypted_content: string; id?: string }) => {
+			this.lastEncryptedContent = content
+		}
 		try {
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			const providerMeta = await result.providerMetadata
-			const openaiMeta = (providerMeta as any)?.openai
+			yield* consumeAiSdkStream(result, async function* () {
+				const providerMeta = await result.providerMetadata
+				const openaiMeta = providerMeta?.openai as Record<string, unknown> | undefined
 
-			if (openaiMeta?.responseId) {
-				this.lastResponseId = openaiMeta.responseId
-			}
-			if (openaiMeta?.serviceTier) {
-				this.lastServiceTier = openaiMeta.serviceTier as ServiceTier
-			}
+				if (typeof openaiMeta?.responseId === "string") {
+					setResponseId(openaiMeta.responseId)
+				}
+				if (typeof openaiMeta?.serviceTier === "string") {
+					setServiceTier(openaiMeta.serviceTier as ServiceTier)
+				}
 
-			// Capture encrypted content from reasoning parts in the response
-			try {
-				const content = await (result as any).content
-				if (Array.isArray(content)) {
-					for (const part of content) {
-						if (part.type === "reasoning" && part.providerMetadata) {
-							const partMeta = (part.providerMetadata as any)?.openai
-							if (partMeta?.reasoningEncryptedContent) {
-								this.lastEncryptedContent = {
-									encrypted_content: partMeta.reasoningEncryptedContent,
-									...(partMeta.itemId ? { id: partMeta.itemId } : {}),
+				// Capture encrypted content from reasoning parts in the response
+				try {
+					const content = await (result as unknown as { content?: Promise<unknown[]> }).content
+					if (Array.isArray(content)) {
+						for (const part of content) {
+							const p = part as Record<string, unknown>
+							if (p.type === "reasoning" && p.providerMetadata) {
+								const partMeta = (p.providerMetadata as Record<string, Record<string, unknown>>)?.openai
+								if (typeof partMeta?.reasoningEncryptedContent === "string") {
+									setEncryptedContent({
+										encrypted_content: partMeta.reasoningEncryptedContent,
+										...(typeof partMeta.itemId === "string" ? { id: partMeta.itemId } : {}),
+									})
+									break
 								}
-								break
 							}
 						}
 					}
+				} catch {
+					// Content parts with encrypted reasoning may not always be available
 				}
-			} catch {
-				// Content parts with encrypted reasoning may not always be available
-			}
 
-			const usage = await result.usage
-			if (usage) {
-				yield this.processUsageMetrics(usage, model, providerMeta as any)
-			}
+				const usage = await result.usage
+				if (usage) {
+					yield processUsage(usage, model, providerMeta as Parameters<typeof processUsage>[2])
+				}
+			})
 		} catch (error) {
 			throw handleAiSdkError(error, this.providerName)
 		}

+ 16 - 4
src/api/providers/openai.ts

@@ -198,8 +198,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		)
 
 		try {
+			let lastStreamError: string | undefined
+
 			for await (const part of result.fullStream) {
 				for (const chunk of processAiSdkStreamPart(part)) {
+					if (chunk.type === "error") {
+						lastStreamError = chunk.message
+					}
 					if (chunk.type === "text") {
 						for (const matchedChunk of matcher.update(chunk.text)) {
 							yield matchedChunk
@@ -214,10 +219,17 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				yield chunk
 			}
 
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, modelInfo, providerMetadata as any)
+			try {
+				const usage = await result.usage
+				const providerMetadata = await result.providerMetadata
+				if (usage) {
+					yield this.processUsageMetrics(usage, modelInfo, providerMetadata as any)
+				}
+			} catch (usageError) {
+				if (lastStreamError) {
+					throw new Error(lastStreamError)
+				}
+				throw usageError
 			}
 		} catch (error) {
 			throw handleAiSdkError(error, this.providerName)

+ 6 - 12
src/api/providers/requesty.ts

@@ -10,7 +10,7 @@ import { calculateApiCostOpenAI } from "../../shared/cost"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -199,17 +199,11 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 		const result = streamText(requestOptions)
 
 		try {
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, info, providerMetadata as RequestyProviderMetadata)
-			}
+			const processUsage = this.processUsageMetrics.bind(this)
+			yield* consumeAiSdkStream(result, async function* () {
+				const [usage, providerMetadata] = await Promise.all([result.usage, result.providerMetadata])
+				yield processUsage(usage, info, providerMetadata as RequestyProviderMetadata)
+			})
 		} catch (error) {
 			throw handleAiSdkError(error, "Requesty")
 		}

+ 6 - 15
src/api/providers/sambanova.ts

@@ -9,7 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 	flattenAiSdkMessagesToStringContent,
@@ -142,21 +142,12 @@ export class SambaNovaHandler extends BaseProvider implements SingleCompletionHa
 		const result = streamText(requestOptions)
 
 		try {
-			// Process the full stream to get all events including reasoning
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			// Yield usage metrics at the end, including cache metrics from providerMetadata
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, providerMetadata as any)
-			}
+			const processUsage = this.processUsageMetrics.bind(this)
+			yield* consumeAiSdkStream(result, async function* () {
+				const [usage, providerMetadata] = await Promise.all([result.usage, result.providerMetadata])
+				yield processUsage(usage, providerMetadata as Parameters<typeof processUsage>[1])
+			})
 		} catch (error) {
-			// Handle AI SDK errors (AI_RetryError, AI_APICallError, etc.)
 			throw handleAiSdkError(error, "SambaNova")
 		}
 	}

+ 16 - 4
src/api/providers/vercel-ai-gateway.ts

@@ -134,16 +134,28 @@ export class VercelAiGatewayHandler extends BaseProvider implements SingleComple
 		})
 
 		try {
+			let lastStreamError: string | undefined
+
 			for await (const part of result.fullStream) {
 				for (const chunk of processAiSdkStreamPart(part)) {
+					if (chunk.type === "error") {
+						lastStreamError = chunk.message
+					}
 					yield chunk
 				}
 			}
 
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, providerMetadata as any)
+			try {
+				const usage = await result.usage
+				const providerMetadata = await result.providerMetadata
+				if (usage) {
+					yield this.processUsageMetrics(usage, providerMetadata as any)
+				}
+			} catch (usageError) {
+				if (lastStreamError) {
+					throw new Error(lastStreamError)
+				}
+				throw usageError
 			}
 		} catch (error) {
 			throw handleAiSdkError(error, "Vercel AI Gateway")

+ 32 - 21
src/api/providers/vertex.ts

@@ -146,6 +146,7 @@ export class VertexHandler extends BaseProvider implements SingleCompletionHandl
 			const result = streamText(requestOptions)
 
 			// Process the full stream to get all events including reasoning
+			let lastStreamError: string | undefined
 			for await (const part of result.fullStream) {
 				// Capture thoughtSignature from tool-call events (Gemini 3 thought signatures)
 				// The AI SDK's tool-call event includes providerMetadata with the signature
@@ -160,33 +161,43 @@ export class VertexHandler extends BaseProvider implements SingleCompletionHandl
 				}
 
 				for (const chunk of processAiSdkStreamPart(part)) {
+					if (chunk.type === "error") {
+						lastStreamError = chunk.message
+					}
 					yield chunk
 				}
 			}
 
-			// Extract grounding sources from providerMetadata if available
-			const providerMetadata = await result.providerMetadata
-			const groundingMetadata = (providerMetadata?.vertex ?? providerMetadata?.google) as
-				| {
-						groundingMetadata?: {
-							groundingChunks?: Array<{
-								web?: { uri?: string; title?: string }
-							}>
-						}
-				  }
-				| undefined
-
-			if (groundingMetadata?.groundingMetadata) {
-				const sources = this.extractGroundingSources(groundingMetadata.groundingMetadata)
-				if (sources.length > 0) {
-					yield { type: "grounding", sources }
+			// Extract grounding sources and usage from providerMetadata
+			try {
+				const providerMetadata = await result.providerMetadata
+				const groundingMetadata = (providerMetadata?.vertex ?? providerMetadata?.google) as
+					| {
+							groundingMetadata?: {
+								groundingChunks?: Array<{
+									web?: { uri?: string; title?: string }
+								}>
+							}
+					  }
+					| undefined
+
+				if (groundingMetadata?.groundingMetadata) {
+					const sources = this.extractGroundingSources(groundingMetadata.groundingMetadata)
+					if (sources.length > 0) {
+						yield { type: "grounding", sources }
+					}
 				}
-			}
 
-			// Yield usage metrics at the end
-			const usage = await result.usage
-			if (usage) {
-				yield this.processUsageMetrics(usage, info, providerMetadata)
+				// Yield usage metrics at the end
+				const usage = await result.usage
+				if (usage) {
+					yield this.processUsageMetrics(usage, info, providerMetadata)
+				}
+			} catch (usageError) {
+				if (lastStreamError) {
+					throw new Error(lastStreamError)
+				}
+				throw usageError
 			}
 		} catch (error) {
 			const errorMessage = error instanceof Error ? error.message : String(error)

+ 6 - 15
src/api/providers/xai.ts

@@ -9,7 +9,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -147,21 +147,12 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
 		const result = streamText(requestOptions)
 
 		try {
-			// Process the full stream to get all events including reasoning
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			// Yield usage metrics at the end, including cache metrics from providerMetadata
-			const usage = await result.usage
-			const providerMetadata = await result.providerMetadata
-			if (usage) {
-				yield this.processUsageMetrics(usage, providerMetadata as any)
-			}
+			const processUsage = this.processUsageMetrics.bind(this)
+			yield* consumeAiSdkStream(result, async function* () {
+				const [usage, providerMetadata] = await Promise.all([result.usage, result.providerMetadata])
+				yield processUsage(usage, providerMetadata as Parameters<typeof processUsage>[1])
+			})
 		} catch (error) {
-			// Handle AI SDK errors (AI_RetryError, AI_APICallError, etc.)
 			throw handleAiSdkError(error, "xAI")
 		}
 	}

+ 2 - 15
src/api/providers/zai.ts

@@ -17,7 +17,7 @@ import { type ApiHandlerOptions, shouldUseReasoningEffort } from "../../shared/a
 import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
-	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	handleAiSdkError,
 } from "../transform/ai-sdk"
@@ -127,20 +127,7 @@ export class ZAiHandler extends BaseProvider implements SingleCompletionHandler
 		const result = streamText(requestOptions)
 
 		try {
-			for await (const part of result.fullStream) {
-				for (const chunk of processAiSdkStreamPart(part)) {
-					yield chunk
-				}
-			}
-
-			const usage = await result.usage
-			if (usage) {
-				yield {
-					type: "usage" as const,
-					inputTokens: usage.inputTokens || 0,
-					outputTokens: usage.outputTokens || 0,
-				}
-			}
+			yield* consumeAiSdkStream(result)
 		} catch (error) {
 			throw handleAiSdkError(error, "Z.ai")
 		}

+ 288 - 0
src/api/transform/__tests__/ai-sdk.spec.ts

@@ -4,8 +4,10 @@ import {
 	convertToAiSdkMessages,
 	convertToolsForAiSdk,
 	processAiSdkStreamPart,
+	consumeAiSdkStream,
 	mapToolChoice,
 	extractAiSdkErrorMessage,
+	extractMessageFromResponseBody,
 	handleAiSdkError,
 	flattenAiSdkMessagesToStringContent,
 } from "../ai-sdk"
@@ -793,6 +795,75 @@ describe("AI SDK conversion utilities", () => {
 			expect(extractAiSdkErrorMessage("string error")).toBe("string error")
 			expect(extractAiSdkErrorMessage({ custom: "object" })).toBe("[object Object]")
 		})
+
+		it("should extract message from AI_APICallError responseBody with JSON error", () => {
+			const apiError = {
+				name: "AI_APICallError",
+				message: "API call failed",
+				responseBody: '{"error":{"message":"Insufficient balance or no resource package.","code":"1113"}}',
+				statusCode: 402,
+			}
+
+			const result = extractAiSdkErrorMessage(apiError)
+			expect(result).toContain("Insufficient balance")
+			expect(result).not.toBe("API call failed")
+		})
+
+		it("should fall back to message when AI_APICallError responseBody is non-JSON", () => {
+			const apiError = {
+				name: "AI_APICallError",
+				message: "Server error",
+				responseBody: "Internal Server Error",
+				statusCode: 500,
+			}
+
+			const result = extractAiSdkErrorMessage(apiError)
+			expect(result).toContain("Server error")
+		})
+
+		it("should extract message from AI_RetryError lastError responseBody", () => {
+			const retryError = {
+				name: "AI_RetryError",
+				message: "Failed after retries",
+				lastError: {
+					name: "AI_APICallError",
+					message: "API call failed",
+					responseBody: '{"error":{"message":"Rate limit exceeded"}}',
+					statusCode: 429,
+				},
+				errors: [{}],
+			}
+
+			const result = extractAiSdkErrorMessage(retryError)
+			expect(result).toContain("Rate limit exceeded")
+		})
+
+		it("should extract message from NoOutputGeneratedError with APICallError cause", () => {
+			const error = {
+				name: "AI_NoOutputGeneratedError",
+				message: "No output generated",
+				cause: {
+					name: "AI_APICallError",
+					message: "Forbidden",
+					responseBody: '{"error":{"message":"Insufficient balance"}}',
+					statusCode: 403,
+				},
+			}
+
+			const result = extractAiSdkErrorMessage(error)
+			expect(result).toContain("Insufficient balance")
+			expect(result).not.toBe("No output generated")
+		})
+
+		it("should return own message from NoOutputGeneratedError without useful cause", () => {
+			const error = {
+				name: "AI_NoOutputGeneratedError",
+				message: "No output generated",
+			}
+
+			const result = extractAiSdkErrorMessage(error)
+			expect(result).toBe("No output generated")
+		})
 	})
 
 	describe("handleAiSdkError", () => {
@@ -839,6 +910,41 @@ describe("AI SDK conversion utilities", () => {
 		})
 	})
 
+	describe("extractMessageFromResponseBody", () => {
+		it("should extract message with code from error object", () => {
+			const body = '{"error": {"message": "Insufficient balance", "code": "1113"}}'
+			expect(extractMessageFromResponseBody(body)).toBe("[1113] Insufficient balance")
+		})
+
+		it("should extract message from error object without code", () => {
+			const body = '{"error": {"message": "Rate limit exceeded"}}'
+			expect(extractMessageFromResponseBody(body)).toBe("Rate limit exceeded")
+		})
+
+		it("should extract message from error string field", () => {
+			const body = '{"error": "Something went wrong"}'
+			expect(extractMessageFromResponseBody(body)).toBe("Something went wrong")
+		})
+
+		it("should extract message from top-level message field", () => {
+			const body = '{"message": "Bad request"}'
+			expect(extractMessageFromResponseBody(body)).toBe("Bad request")
+		})
+
+		it("should return undefined for non-JSON string", () => {
+			expect(extractMessageFromResponseBody("Not Found")).toBeUndefined()
+		})
+
+		it("should return undefined for empty string", () => {
+			expect(extractMessageFromResponseBody("")).toBeUndefined()
+		})
+
+		it("should return undefined for JSON without error fields", () => {
+			const body = '{"status": "ok"}'
+			expect(extractMessageFromResponseBody(body)).toBeUndefined()
+		})
+	})
+
 	describe("flattenAiSdkMessagesToStringContent", () => {
 		it("should return messages unchanged if content is already a string", () => {
 			const messages = [
@@ -1061,3 +1167,185 @@ describe("AI SDK conversion utilities", () => {
 		})
 	})
 })
+
+describe("consumeAiSdkStream", () => {
+	/**
+	 * Helper to create an AsyncIterable from an array of stream parts.
+	 */
+	async function* createAsyncIterable<T>(items: T[]): AsyncGenerator<T> {
+		for (const item of items) {
+			yield item
+		}
+	}
+
+	/**
+	 * Helper to collect all chunks from an async generator.
+	 * Returns { chunks, error } to support both success and error paths.
+	 */
+	async function collectStream(stream: AsyncGenerator<unknown>): Promise<{ chunks: unknown[]; error: Error | null }> {
+		const chunks: unknown[] = []
+		let error: Error | null = null
+		try {
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+		} catch (e) {
+			error = e instanceof Error ? e : new Error(String(e))
+		}
+		return { chunks, error }
+	}
+
+	it("yields stream chunks from fullStream", async () => {
+		const result = {
+			fullStream: createAsyncIterable([
+				{ type: "text-delta" as const, id: "1", text: "hello" },
+				{ type: "text" as const, text: " world" },
+			]),
+			usage: Promise.resolve({ inputTokens: 5, outputTokens: 10 }),
+		}
+
+		const { chunks, error } = await collectStream(consumeAiSdkStream(result as any))
+
+		expect(error).toBeNull()
+		// Two text chunks + one usage chunk
+		expect(chunks).toHaveLength(3)
+		expect(chunks[0]).toEqual({ type: "text", text: "hello" })
+		expect(chunks[1]).toEqual({ type: "text", text: " world" })
+	})
+
+	it("yields default usage chunk when no usageHandler provided", async () => {
+		const result = {
+			fullStream: createAsyncIterable([{ type: "text-delta" as const, id: "1", text: "hi" }]),
+			usage: Promise.resolve({ inputTokens: 10, outputTokens: 20 }),
+		}
+
+		const { chunks, error } = await collectStream(consumeAiSdkStream(result as any))
+
+		expect(error).toBeNull()
+		const usageChunk = chunks.find((c: any) => c.type === "usage")
+		expect(usageChunk).toEqual({
+			type: "usage",
+			inputTokens: 10,
+			outputTokens: 20,
+		})
+	})
+
+	it("uses usageHandler when provided", async () => {
+		const result = {
+			fullStream: createAsyncIterable([{ type: "text-delta" as const, id: "1", text: "hi" }]),
+			usage: Promise.resolve({ inputTokens: 10, outputTokens: 20 }),
+		}
+
+		async function* customUsageHandler() {
+			yield {
+				type: "usage" as const,
+				inputTokens: 42,
+				outputTokens: 84,
+				cacheWriteTokens: 5,
+				cacheReadTokens: 3,
+			}
+		}
+
+		const { chunks, error } = await collectStream(consumeAiSdkStream(result as any, customUsageHandler))
+
+		expect(error).toBeNull()
+		const usageChunk = chunks.find((c: any) => c.type === "usage")
+		expect(usageChunk).toEqual({
+			type: "usage",
+			inputTokens: 42,
+			outputTokens: 84,
+			cacheWriteTokens: 5,
+			cacheReadTokens: 3,
+		})
+	})
+
+	/**
+	 * THE KEY TEST: Verifies that when the stream contains an error chunk (e.g. "Insufficient balance")
+	 * and result.usage rejects with a generic error (AI SDK's NoOutputGeneratedError), the thrown
+	 * error preserves the specific stream error message rather than the generic one.
+	 */
+	it("captures stream error and throws it when usage fails", async () => {
+		const usageRejection = Promise.reject(new Error("No output generated. Check the stream for errors."))
+		// Prevent unhandled rejection warning — the rejection is intentionally caught inside consumeAiSdkStream
+		usageRejection.catch(() => {})
+
+		const result = {
+			fullStream: createAsyncIterable([
+				{ type: "text-delta" as const, id: "1", text: "partial" },
+				{
+					type: "error" as const,
+					error: new Error("Insufficient balance to complete this request"),
+				},
+			]),
+			usage: usageRejection,
+		}
+
+		const { chunks, error } = await collectStream(consumeAiSdkStream(result as any))
+
+		// The error chunk IS still yielded during stream iteration
+		const errorChunk = chunks.find((c: any) => c.type === "error")
+		expect(errorChunk).toEqual({
+			type: "error",
+			error: "StreamError",
+			message: "Insufficient balance to complete this request",
+		})
+
+		// The thrown error uses the captured stream error, NOT the generic usage error
+		expect(error).not.toBeNull()
+		expect(error!.message).toBe("Insufficient balance to complete this request")
+		expect(error!.message).not.toContain("No output generated")
+	})
+
+	it("re-throws usage error when no stream error captured", async () => {
+		const usageRejection = Promise.reject(new Error("Rate limit exceeded"))
+		usageRejection.catch(() => {})
+
+		const result = {
+			fullStream: createAsyncIterable([{ type: "text-delta" as const, id: "1", text: "hello" }]),
+			usage: usageRejection,
+		}
+
+		const { chunks, error } = await collectStream(consumeAiSdkStream(result as any))
+
+		// Text chunk should still be yielded
+		expect(chunks).toHaveLength(1)
+		expect(chunks[0]).toEqual({ type: "text", text: "hello" })
+
+		// The original usage error is re-thrown since no stream error was captured
+		expect(error).not.toBeNull()
+		expect(error!.message).toBe("Rate limit exceeded")
+	})
+
+	it("captures stream error and throws it when usageHandler fails", async () => {
+		const result = {
+			fullStream: createAsyncIterable([
+				{ type: "text-delta" as const, id: "1", text: "partial" },
+				{
+					type: "error" as const,
+					error: new Error("Insufficient balance to complete this request"),
+				},
+			]),
+			usage: Promise.resolve({ inputTokens: 0, outputTokens: 0 }),
+		}
+
+		// eslint-disable-next-line require-yield
+		async function* failingUsageHandler(): AsyncGenerator<never> {
+			throw new Error("No output generated. Check the stream for errors.")
+		}
+
+		const { chunks, error } = await collectStream(consumeAiSdkStream(result as any, failingUsageHandler))
+
+		// Error chunk was yielded during streaming
+		const errorChunk = chunks.find((c: any) => c.type === "error")
+		expect(errorChunk).toEqual({
+			type: "error",
+			error: "StreamError",
+			message: "Insufficient balance to complete this request",
+		})
+
+		// The thrown error uses the captured stream error, not the usageHandler error
+		expect(error).not.toBeNull()
+		expect(error!.message).toBe("Insufficient balance to complete this request")
+		expect(error!.message).not.toContain("No output generated")
+	})
+})

+ 198 - 14
src/api/transform/ai-sdk.ts

@@ -6,7 +6,7 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 import { tool as createTool, jsonSchema, type ModelMessage, type TextStreamPart } from "ai"
-import type { ApiStreamChunk } from "./stream"
+import type { ApiStreamChunk, ApiStream } from "./stream"
 
 /**
  * Options for converting Anthropic messages to AI SDK format.
@@ -460,6 +460,59 @@ export function* processAiSdkStreamPart(part: ExtendedStreamPart): Generator<Api
 	}
 }
 
+/**
+ * Consume an AI SDK stream result, processing stream parts and handling usage.
+ * Centralizes the common stream consumption pattern shared across all AI SDK
+ * providers, with built-in error recovery that preserves stream error messages
+ * when usage resolution throws (e.g. AI SDK's NoOutputGeneratedError).
+ *
+ * @param result - The stream result object from AI SDK's `streamText()`.
+ *   Must have `fullStream` and `usage` properties.
+ * @param usageHandler - Optional async generator that handles usage processing.
+ *   When provided, the handler is responsible for awaiting usage/providerMetadata
+ *   and yielding usage chunks. When omitted, a default handler awaits
+ *   `result.usage` and yields a basic usage chunk with inputTokens/outputTokens.
+ * @yields ApiStreamChunk objects from the stream and usage processing
+ */
+export async function* consumeAiSdkStream(
+	result: {
+		fullStream: AsyncIterable<ExtendedStreamPart>
+		usage: PromiseLike<{ inputTokens?: number; outputTokens?: number }>
+	},
+	usageHandler?: () => AsyncGenerator<ApiStreamChunk>,
+): ApiStream {
+	let lastStreamError: string | undefined
+
+	for await (const part of result.fullStream) {
+		for (const chunk of processAiSdkStreamPart(part)) {
+			if (chunk.type === "error") {
+				lastStreamError = chunk.message
+			}
+			yield chunk
+		}
+	}
+
+	try {
+		if (usageHandler) {
+			yield* usageHandler()
+		} else {
+			const usage = await result.usage
+			if (usage) {
+				yield {
+					type: "usage" as const,
+					inputTokens: usage.inputTokens || 0,
+					outputTokens: usage.outputTokens || 0,
+				}
+			}
+		}
+	} catch (usageError) {
+		if (lastStreamError) {
+			throw new Error(lastStreamError)
+		}
+		throw usageError
+	}
+}
+
 /**
  * Type for AI SDK tool choice format.
  */
@@ -501,6 +554,58 @@ export function mapToolChoice(toolChoice: any): AiSdkToolChoice {
 	return undefined
 }
 
+/**
+ * Extract a human-readable error message from an API response body string.
+ * Handles common JSON error formats returned by AI providers.
+ *
+ * @param responseBody - The raw HTTP response body string
+ * @returns The extracted error message, or undefined if none found
+ */
+export function extractMessageFromResponseBody(responseBody: string): string | undefined {
+	if (!responseBody || typeof responseBody !== "string") {
+		return undefined
+	}
+
+	try {
+		const parsed: unknown = JSON.parse(responseBody)
+
+		if (typeof parsed !== "object" || parsed === null) {
+			return undefined
+		}
+
+		const obj = parsed as Record<string, unknown>
+
+		// Format: {"error": {"message": "...", "code": "..."}} or {"error": {"message": "..."}}
+		if (typeof obj.error === "object" && obj.error !== null) {
+			const errorObj = obj.error as Record<string, unknown>
+			if (typeof errorObj.message === "string" && errorObj.message) {
+				if (typeof errorObj.code === "string" && errorObj.code) {
+					return `[${errorObj.code}] ${errorObj.message}`
+				}
+				if (typeof errorObj.code === "number") {
+					return `[${errorObj.code}] ${errorObj.message}`
+				}
+				return errorObj.message
+			}
+		}
+
+		// Format: {"error": "string message"}
+		if (typeof obj.error === "string" && obj.error) {
+			return obj.error
+		}
+
+		// Format: {"message": "..."}
+		if (typeof obj.message === "string" && obj.message) {
+			return obj.message
+		}
+
+		return undefined
+	} catch {
+		// JSON parse failed — responseBody is not valid JSON
+		return undefined
+	}
+}
+
 /**
  * Extract a user-friendly error message from AI SDK errors.
  * The AI SDK wraps errors in types like AI_RetryError and AI_APICallError
@@ -514,18 +619,41 @@ export function extractAiSdkErrorMessage(error: unknown): string {
 		return "Unknown error"
 	}
 
-	// Cast to access AI SDK error properties
-	const anyError = error as any
+	if (typeof error !== "object") {
+		return String(error)
+	}
+
+	const errorObj = error as Record<string, unknown>
 
 	// AI_RetryError has a lastError property with the actual error
-	if (anyError.name === "AI_RetryError") {
-		const retryCount = anyError.errors?.length || 0
-		const lastError = anyError.lastError
-		const lastErrorMessage = lastError?.message || lastError?.toString() || "Unknown error"
+	if (errorObj.name === "AI_RetryError") {
+		const retryCount = Array.isArray(errorObj.errors) ? errorObj.errors.length : 0
+		const lastError = errorObj.lastError
+
+		// Try to extract message from lastError's responseBody first
+		let lastErrorMessage: string | undefined
+		if (
+			typeof lastError === "object" &&
+			lastError !== null &&
+			"responseBody" in lastError &&
+			typeof (lastError as Record<string, unknown>).responseBody === "string"
+		) {
+			lastErrorMessage = extractMessageFromResponseBody(
+				(lastError as Record<string, unknown>).responseBody as string,
+			)
+		}
+
+		if (!lastErrorMessage) {
+			lastErrorMessage =
+				typeof lastError === "object" && lastError !== null && "message" in lastError
+					? String((lastError as Record<string, unknown>).message)
+					: lastError
+						? String(lastError)
+						: "Unknown error"
+		}
 
 		// Extract status code if available
-		const statusCode =
-			lastError?.status || lastError?.statusCode || anyError.status || anyError.statusCode || undefined
+		const statusCode = getStatusCode(lastError) ?? getStatusCode(error)
 
 		if (statusCode) {
 			return `Failed after ${retryCount} attempts (${statusCode}): ${lastErrorMessage}`
@@ -533,13 +661,52 @@ export function extractAiSdkErrorMessage(error: unknown): string {
 		return `Failed after ${retryCount} attempts: ${lastErrorMessage}`
 	}
 
-	// AI_APICallError has message and optional status
-	if (anyError.name === "AI_APICallError") {
-		const statusCode = anyError.status || anyError.statusCode
+	// AI_APICallError has message, optional status, and responseBody
+	if (errorObj.name === "AI_APICallError") {
+		const statusCode = getStatusCode(error)
+
+		// Try to extract a richer message from responseBody
+		let message: string | undefined
+		if ("responseBody" in errorObj && typeof errorObj.responseBody === "string") {
+			message = extractMessageFromResponseBody(errorObj.responseBody)
+		}
+
+		if (!message) {
+			message = typeof errorObj.message === "string" ? errorObj.message : "API call failed"
+		}
+
 		if (statusCode) {
-			return `API Error (${statusCode}): ${anyError.message}`
+			return `API Error (${statusCode}): ${message}`
+		}
+		return message
+	}
+
+	// AI_NoOutputGeneratedError wraps a cause that may be an APICallError
+	if (errorObj.name === "AI_NoOutputGeneratedError" || errorObj.name === "NoOutputGeneratedError") {
+		const cause = errorObj.cause
+		if (typeof cause === "object" && cause !== null) {
+			const causeObj = cause as Record<string, unknown>
+			// If cause is an AI_APICallError, recursively extract its message
+			if (causeObj.name === "AI_APICallError") {
+				return extractAiSdkErrorMessage(cause)
+			}
+			// Try responseBody on the cause directly
+			if ("responseBody" in causeObj && typeof causeObj.responseBody === "string") {
+				const bodyMessage = extractMessageFromResponseBody(causeObj.responseBody)
+				if (bodyMessage) {
+					return bodyMessage
+				}
+			}
+			// Fall through to cause's message
+			if ("message" in causeObj && typeof causeObj.message === "string") {
+				return causeObj.message
+			}
 		}
-		return anyError.message || "API call failed"
+		// Fall back to the error's own message
+		if (typeof errorObj.message === "string" && errorObj.message) {
+			return errorObj.message
+		}
+		return "No output generated"
 	}
 
 	// Standard Error
@@ -551,6 +718,23 @@ export function extractAiSdkErrorMessage(error: unknown): string {
 	return String(error)
 }
 
+/**
+ * Extract a numeric status code from an error-like object.
+ */
+function getStatusCode(obj: unknown): number | undefined {
+	if (typeof obj !== "object" || obj === null) {
+		return undefined
+	}
+	const record = obj as Record<string, unknown>
+	if (typeof record.status === "number") {
+		return record.status
+	}
+	if (typeof record.statusCode === "number") {
+		return record.statusCode
+	}
+	return undefined
+}
+
 /**
  * Handle AI SDK errors by extracting the message and preserving status codes.
  * Returns an Error object with proper status preserved for retry logic.

+ 10 - 3
src/core/task/Task.ts

@@ -3340,9 +3340,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 						const cancelReason: ClineApiReqCancelReason = this.abort ? "user_cancelled" : "streaming_failed"
 
 						const rawErrorMessage = error.message ?? JSON.stringify(serializeError(error), null, 2)
+
+						// Check auto-retry state BEFORE abortStream so we can suppress the error
+						// message on the api_req_started row when backoffAndAnnounce will display it instead.
+						const stateForBackoff = await this.providerRef.deref()?.getState()
+						const willAutoRetry = !this.abort && stateForBackoff?.autoApprovalEnabled
+
 						const streamingFailedMessage = this.abort
 							? undefined
-							: `${t("common:interruption.streamTerminatedByProvider")}: ${rawErrorMessage}`
+							: willAutoRetry
+								? undefined // backoffAndAnnounce will display the error with retry countdown
+								: `${t("common:interruption.streamTerminatedByProvider")}: ${rawErrorMessage}`
 
 						// Clean up partial state
 						await abortStream(cancelReason, streamingFailedMessage)
@@ -3355,11 +3363,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 							// Stream failed - log the error and retry with the same content
 							// The existing rate limiting will prevent rapid retries
 							console.error(
-								`[Task#${this.taskId}.${this.instanceId}] Stream failed, will retry: ${streamingFailedMessage}`,
+								`[Task#${this.taskId}.${this.instanceId}] Stream failed, will retry: ${rawErrorMessage}`,
 							)
 
 							// Apply exponential backoff similar to first-chunk errors when auto-resubmit is enabled
-							const stateForBackoff = await this.providerRef.deref()?.getState()
 							if (stateForBackoff?.autoApprovalEnabled) {
 								await this.backoffAndAnnounce(currentItem.retryAttempt ?? 0, error)