Browse Source

feat: Add supportsReasoning property for Z.ai GLM binary thinking mode (#8872)

* feat: Add supportsReasoning property for Z.ai GLM binary thinking mode

- Add supportsReasoning to ModelInfo schema for binary reasoning models
- Update GLM-4.5 and GLM-4.6 models to use supportsReasoning: true
- Implement thinking parameter support in ZAiHandler for Deep Thinking API
- Update ThinkingBudget component to show simple toggle for supportsReasoning models
- Add comprehensive tests for binary reasoning functionality

Closes #8465

* refactor: rename supportsReasoning to supportsReasoningBinary for clarity

- Rename supportsReasoning -> supportsReasoningBinary in model schema
- Update Z.AI GLM model configurations to use supportsReasoningBinary
- Update Z.AI provider logic in createStream and completePrompt methods
- Update ThinkingBudget UI component and tests
- Update all test comments and expectations

This change improves naming clarity by distinguishing between:
- supportsReasoningBinary: Simple on/off reasoning toggle
- supportsReasoningBudget: Advanced reasoning with token budget controls
- supportsReasoningEffort: Advanced reasoning with effort levels
Daniel 4 months ago
parent
commit
86190d8e2d

+ 2 - 0
packages/types/src/model.ts

@@ -61,6 +61,8 @@ export const modelInfoSchema = z.object({
 	// Capability flag to indicate whether the model supports an output verbosity parameter
 	// Capability flag to indicate whether the model supports an output verbosity parameter
 	supportsVerbosity: z.boolean().optional(),
 	supportsVerbosity: z.boolean().optional(),
 	supportsReasoningBudget: z.boolean().optional(),
 	supportsReasoningBudget: z.boolean().optional(),
+	// Capability flag to indicate whether the model supports simple on/off binary reasoning
+	supportsReasoningBinary: z.boolean().optional(),
 	// Capability flag to indicate whether the model supports temperature parameter
 	// Capability flag to indicate whether the model supports temperature parameter
 	supportsTemperature: z.boolean().optional(),
 	supportsTemperature: z.boolean().optional(),
 	requiredReasoningBudget: z.boolean().optional(),
 	requiredReasoningBudget: z.boolean().optional(),

+ 4 - 0
packages/types/src/providers/zai.ts

@@ -16,6 +16,7 @@ export const internationalZAiModels = {
 		contextWindow: 131_072,
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsImages: false,
 		supportsPromptCache: true,
 		supportsPromptCache: true,
+		supportsReasoningBinary: true,
 		inputPrice: 0.6,
 		inputPrice: 0.6,
 		outputPrice: 2.2,
 		outputPrice: 2.2,
 		cacheWritesPrice: 0,
 		cacheWritesPrice: 0,
@@ -86,6 +87,7 @@ export const internationalZAiModels = {
 		contextWindow: 200_000,
 		contextWindow: 200_000,
 		supportsImages: false,
 		supportsImages: false,
 		supportsPromptCache: true,
 		supportsPromptCache: true,
+		supportsReasoningBinary: true,
 		inputPrice: 0.6,
 		inputPrice: 0.6,
 		outputPrice: 2.2,
 		outputPrice: 2.2,
 		cacheWritesPrice: 0,
 		cacheWritesPrice: 0,
@@ -114,6 +116,7 @@ export const mainlandZAiModels = {
 		contextWindow: 131_072,
 		contextWindow: 131_072,
 		supportsImages: false,
 		supportsImages: false,
 		supportsPromptCache: true,
 		supportsPromptCache: true,
+		supportsReasoningBinary: true,
 		inputPrice: 0.29,
 		inputPrice: 0.29,
 		outputPrice: 1.14,
 		outputPrice: 1.14,
 		cacheWritesPrice: 0,
 		cacheWritesPrice: 0,
@@ -184,6 +187,7 @@ export const mainlandZAiModels = {
 		contextWindow: 204_800,
 		contextWindow: 204_800,
 		supportsImages: false,
 		supportsImages: false,
 		supportsPromptCache: true,
 		supportsPromptCache: true,
+		supportsReasoningBinary: true,
 		inputPrice: 0.29,
 		inputPrice: 0.29,
 		outputPrice: 1.14,
 		outputPrice: 1.14,
 		cacheWritesPrice: 0,
 		cacheWritesPrice: 0,

+ 138 - 0
src/api/providers/__tests__/zai.spec.ts

@@ -295,5 +295,143 @@ describe("ZAiHandler", () => {
 				undefined,
 				undefined,
 			)
 			)
 		})
 		})
+
+		describe("Reasoning functionality", () => {
+			it("should include thinking parameter when enableReasoningEffort is true and model supports reasoning in createMessage", async () => {
+				const handlerWithReasoning = new ZAiHandler({
+					apiModelId: "glm-4.6", // GLM-4.6 has supportsReasoningBinary: true
+					zaiApiKey: "test-zai-api-key",
+					zaiApiLine: "international_coding",
+					enableReasoningEffort: true,
+				})
+
+				mockCreate.mockImplementationOnce(() => {
+					return {
+						[Symbol.asyncIterator]: () => ({
+							async next() {
+								return { done: true }
+							},
+						}),
+					}
+				})
+
+				const systemPrompt = "Test system prompt"
+				const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
+
+				const messageGenerator = handlerWithReasoning.createMessage(systemPrompt, messages)
+				await messageGenerator.next()
+
+				expect(mockCreate).toHaveBeenCalledWith(
+					expect.objectContaining({
+						thinking: { type: "enabled" },
+					}),
+					undefined,
+				)
+			})
+
+			it("should not include thinking parameter when enableReasoningEffort is false in createMessage", async () => {
+				const handlerWithoutReasoning = new ZAiHandler({
+					apiModelId: "glm-4.6", // GLM-4.6 has supportsReasoningBinary: true
+					zaiApiKey: "test-zai-api-key",
+					zaiApiLine: "international_coding",
+					enableReasoningEffort: false,
+				})
+
+				mockCreate.mockImplementationOnce(() => {
+					return {
+						[Symbol.asyncIterator]: () => ({
+							async next() {
+								return { done: true }
+							},
+						}),
+					}
+				})
+
+				const systemPrompt = "Test system prompt"
+				const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
+
+				const messageGenerator = handlerWithoutReasoning.createMessage(systemPrompt, messages)
+				await messageGenerator.next()
+
+				expect(mockCreate).toHaveBeenCalledWith(
+					expect.not.objectContaining({
+						thinking: expect.anything(),
+					}),
+					undefined,
+				)
+			})
+
+			it("should not include thinking parameter when model does not support reasoning in createMessage", async () => {
+				const handlerWithNonReasoningModel = new ZAiHandler({
+					apiModelId: "glm-4-32b-0414-128k", // This model doesn't have supportsReasoningBinary: true
+					zaiApiKey: "test-zai-api-key",
+					zaiApiLine: "international_coding",
+					enableReasoningEffort: true,
+				})
+
+				mockCreate.mockImplementationOnce(() => {
+					return {
+						[Symbol.asyncIterator]: () => ({
+							async next() {
+								return { done: true }
+							},
+						}),
+					}
+				})
+
+				const systemPrompt = "Test system prompt"
+				const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
+
+				const messageGenerator = handlerWithNonReasoningModel.createMessage(systemPrompt, messages)
+				await messageGenerator.next()
+
+				expect(mockCreate).toHaveBeenCalledWith(
+					expect.not.objectContaining({
+						thinking: expect.anything(),
+					}),
+					undefined,
+				)
+			})
+
+			it("should include thinking parameter when enableReasoningEffort is true and model supports reasoning in completePrompt", async () => {
+				const handlerWithReasoning = new ZAiHandler({
+					apiModelId: "glm-4.5", // GLM-4.5 has supportsReasoningBinary: true
+					zaiApiKey: "test-zai-api-key",
+					zaiApiLine: "international_coding",
+					enableReasoningEffort: true,
+				})
+
+				const expectedResponse = "This is a test response"
+				mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: expectedResponse } }] })
+
+				await handlerWithReasoning.completePrompt("test prompt")
+
+				expect(mockCreate).toHaveBeenCalledWith(
+					expect.objectContaining({
+						thinking: { type: "enabled" },
+					}),
+				)
+			})
+
+			it("should not include thinking parameter when enableReasoningEffort is false in completePrompt", async () => {
+				const handlerWithoutReasoning = new ZAiHandler({
+					apiModelId: "glm-4.5", // GLM-4.5 has supportsReasoningBinary: true
+					zaiApiKey: "test-zai-api-key",
+					zaiApiLine: "international_coding",
+					enableReasoningEffort: false,
+				})
+
+				const expectedResponse = "This is a test response"
+				mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: expectedResponse } }] })
+
+				await handlerWithoutReasoning.completePrompt("test prompt")
+
+				expect(mockCreate).toHaveBeenCalledWith(
+					expect.not.objectContaining({
+						thinking: expect.anything(),
+					}),
+				)
+			})
+		})
 	})
 	})
 })
 })

+ 70 - 0
src/api/providers/zai.ts

@@ -10,7 +10,14 @@ import {
 	zaiApiLineConfigs,
 	zaiApiLineConfigs,
 } from "@roo-code/types"
 } from "@roo-code/types"
 
 
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI from "openai"
+
 import type { ApiHandlerOptions } from "../../shared/api"
 import type { ApiHandlerOptions } from "../../shared/api"
+import { getModelMaxOutputTokens } from "../../shared/api"
+import { convertToOpenAiMessages } from "../transform/openai-format"
+import type { ApiHandlerCreateMessageMetadata } from "../index"
+import { handleOpenAIError } from "./utils/openai-error-handler"
 
 
 import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider"
 import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider"
 
 
@@ -30,4 +37,67 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 			defaultTemperature: ZAI_DEFAULT_TEMPERATURE,
 			defaultTemperature: ZAI_DEFAULT_TEMPERATURE,
 		})
 		})
 	}
 	}
+
+	protected override createStream(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+		requestOptions?: OpenAI.RequestOptions,
+	) {
+		const { id: model, info } = this.getModel()
+
+		// Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply)
+		const max_tokens =
+			getModelMaxOutputTokens({
+				modelId: model,
+				model: info,
+				settings: this.options,
+				format: "openai",
+			}) ?? undefined
+
+		const temperature = this.options.modelTemperature ?? this.defaultTemperature
+
+		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+			model,
+			max_tokens,
+			temperature,
+			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+			stream: true,
+			stream_options: { include_usage: true },
+		}
+
+		// Add thinking parameter if reasoning is enabled and model supports it
+		const { id: modelId, info: modelInfo } = this.getModel()
+		if (this.options.enableReasoningEffort && modelInfo.supportsReasoningBinary) {
+			;(params as any).thinking = { type: "enabled" }
+		}
+
+		try {
+			return this.client.chat.completions.create(params, requestOptions)
+		} catch (error) {
+			throw handleOpenAIError(error, this.providerName)
+		}
+	}
+
+	override async completePrompt(prompt: string): Promise<string> {
+		const { id: modelId } = this.getModel()
+
+		const params: OpenAI.Chat.Completions.ChatCompletionCreateParams = {
+			model: modelId,
+			messages: [{ role: "user", content: prompt }],
+		}
+
+		// Add thinking parameter if reasoning is enabled and model supports it
+		const { info: modelInfo } = this.getModel()
+		if (this.options.enableReasoningEffort && modelInfo.supportsReasoningBinary) {
+			;(params as any).thinking = { type: "enabled" }
+		}
+
+		try {
+			const response = await this.client.chat.completions.create(params)
+			return response.choices[0]?.message.content || ""
+		} catch (error) {
+			throw handleOpenAIError(error, this.providerName)
+		}
+	}
 }
 }

+ 16 - 0
webview-ui/src/components/settings/ThinkingBudget.tsx

@@ -48,6 +48,7 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 	const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
 	const minThinkingTokens = isGemini25Pro ? GEMINI_25_PRO_MIN_THINKING_TOKENS : 1024
 
 
 	// Check model capabilities
 	// Check model capabilities
+	const isReasoningSupported = !!modelInfo && modelInfo.supportsReasoningBinary
 	const isReasoningBudgetSupported = !!modelInfo && modelInfo.supportsReasoningBudget
 	const isReasoningBudgetSupported = !!modelInfo && modelInfo.supportsReasoningBudget
 	const isReasoningBudgetRequired = !!modelInfo && modelInfo.requiredReasoningBudget
 	const isReasoningBudgetRequired = !!modelInfo && modelInfo.requiredReasoningBudget
 	const isReasoningEffortSupported = !!modelInfo && modelInfo.supportsReasoningEffort
 	const isReasoningEffortSupported = !!modelInfo && modelInfo.supportsReasoningEffort
@@ -103,6 +104,21 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 		return null
 		return null
 	}
 	}
 
 
+	// Models with supportsReasoningBinary (binary reasoning) show a simple on/off toggle
+	if (isReasoningSupported) {
+		return (
+			<div className="flex flex-col gap-1">
+				<Checkbox
+					checked={enableReasoningEffort}
+					onChange={(checked: boolean) =>
+						setApiConfigurationField("enableReasoningEffort", checked === true)
+					}>
+					{t("settings:providers.useReasoning")}
+				</Checkbox>
+			</div>
+		)
+	}
+
 	return isReasoningBudgetSupported && !!modelInfo.maxTokens ? (
 	return isReasoningBudgetSupported && !!modelInfo.maxTokens ? (
 		<>
 		<>
 			{!isReasoningBudgetRequired && (
 			{!isReasoningBudgetRequired && (

+ 21 - 0
webview-ui/src/components/settings/__tests__/ThinkingBudget.spec.tsx

@@ -77,6 +77,27 @@ describe("ThinkingBudget", () => {
 		expect(container.firstChild).toBeNull()
 		expect(container.firstChild).toBeNull()
 	})
 	})
 
 
+	it("should render simple reasoning toggle when model has supportsReasoningBinary (binary reasoning)", () => {
+		render(
+			<ThinkingBudget
+				{...defaultProps}
+				modelInfo={{
+					...mockModelInfo,
+					supportsReasoningBinary: true,
+					supportsReasoningBudget: false,
+					supportsReasoningEffort: false,
+				}}
+			/>,
+		)
+
+		// Should show the reasoning checkbox (translation key)
+		expect(screen.getByText("settings:providers.useReasoning")).toBeInTheDocument()
+
+		// Should NOT show sliders or other complex reasoning controls
+		expect(screen.queryByTestId("reasoning-budget")).not.toBeInTheDocument()
+		expect(screen.queryByTestId("reasoning-effort")).not.toBeInTheDocument()
+	})
+
 	it("should render sliders when model supports thinking", () => {
 	it("should render sliders when model supports thinking", () => {
 		render(<ThinkingBudget {...defaultProps} />)
 		render(<ThinkingBudget {...defaultProps} />)