Browse Source

Add max tokens checkbox option for OpenAI compatible provider (#4467)

Co-authored-by: AlexandruSmirnov <[email protected]>
Co-authored-by: Matt Rubens <[email protected]>
SmirnovDev 7 months ago
parent
commit
bfe2274813

+ 341 - 0
src/api/providers/__tests__/openai.spec.ts

@@ -5,6 +5,7 @@ import { OpenAiHandler } from "../openai"
 import { ApiHandlerOptions } from "../../../shared/api"
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
+import { openAiModelInfoSaneDefaults } from "@roo-code/types"
 
 const mockCreate = vitest.fn()
 
@@ -197,6 +198,113 @@ describe("OpenAiHandler", () => {
 			const callArgs = mockCreate.mock.calls[0][0]
 			expect(callArgs.reasoning_effort).toBeUndefined()
 		})
+
+		it("should include max_tokens when includeMaxTokens is true", async () => {
+			const optionsWithMaxTokens: ApiHandlerOptions = {
+				...mockOptions,
+				includeMaxTokens: true,
+				openAiCustomModelInfo: {
+					contextWindow: 128_000,
+					maxTokens: 4096,
+					supportsPromptCache: false,
+				},
+			}
+			const handlerWithMaxTokens = new OpenAiHandler(optionsWithMaxTokens)
+			const stream = handlerWithMaxTokens.createMessage(systemPrompt, messages)
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+			}
+			// Assert the mockCreate was called with max_tokens
+			expect(mockCreate).toHaveBeenCalled()
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.max_completion_tokens).toBe(4096)
+		})
+
+		it("should not include max_tokens when includeMaxTokens is false", async () => {
+			const optionsWithoutMaxTokens: ApiHandlerOptions = {
+				...mockOptions,
+				includeMaxTokens: false,
+				openAiCustomModelInfo: {
+					contextWindow: 128_000,
+					maxTokens: 4096,
+					supportsPromptCache: false,
+				},
+			}
+			const handlerWithoutMaxTokens = new OpenAiHandler(optionsWithoutMaxTokens)
+			const stream = handlerWithoutMaxTokens.createMessage(systemPrompt, messages)
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+			}
+			// Assert the mockCreate was called without max_tokens
+			expect(mockCreate).toHaveBeenCalled()
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.max_completion_tokens).toBeUndefined()
+		})
+
+		it("should not include max_tokens when includeMaxTokens is undefined", async () => {
+			const optionsWithUndefinedMaxTokens: ApiHandlerOptions = {
+				...mockOptions,
+				// includeMaxTokens is not set, should not include max_tokens
+				openAiCustomModelInfo: {
+					contextWindow: 128_000,
+					maxTokens: 4096,
+					supportsPromptCache: false,
+				},
+			}
+			const handlerWithDefaultMaxTokens = new OpenAiHandler(optionsWithUndefinedMaxTokens)
+			const stream = handlerWithDefaultMaxTokens.createMessage(systemPrompt, messages)
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+			}
+			// Assert the mockCreate was called without max_tokens
+			expect(mockCreate).toHaveBeenCalled()
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.max_completion_tokens).toBeUndefined()
+		})
+
+		it("should use user-configured modelMaxTokens instead of model default maxTokens", async () => {
+			const optionsWithUserMaxTokens: ApiHandlerOptions = {
+				...mockOptions,
+				includeMaxTokens: true,
+				modelMaxTokens: 32000, // User-configured value
+				openAiCustomModelInfo: {
+					contextWindow: 128_000,
+					maxTokens: 4096, // Model's default value (should not be used)
+					supportsPromptCache: false,
+				},
+			}
+			const handlerWithUserMaxTokens = new OpenAiHandler(optionsWithUserMaxTokens)
+			const stream = handlerWithUserMaxTokens.createMessage(systemPrompt, messages)
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+			}
+			// Assert the mockCreate was called with user-configured modelMaxTokens (32000), not model default maxTokens (4096)
+			expect(mockCreate).toHaveBeenCalled()
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.max_completion_tokens).toBe(32000)
+		})
+
+		it("should fallback to model default maxTokens when user modelMaxTokens is not set", async () => {
+			const optionsWithoutUserMaxTokens: ApiHandlerOptions = {
+				...mockOptions,
+				includeMaxTokens: true,
+				// modelMaxTokens is not set
+				openAiCustomModelInfo: {
+					contextWindow: 128_000,
+					maxTokens: 4096, // Model's default value (should be used as fallback)
+					supportsPromptCache: false,
+				},
+			}
+			const handlerWithoutUserMaxTokens = new OpenAiHandler(optionsWithoutUserMaxTokens)
+			const stream = handlerWithoutUserMaxTokens.createMessage(systemPrompt, messages)
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+			}
+			// Assert the mockCreate was called with model default maxTokens (4096) as fallback
+			expect(mockCreate).toHaveBeenCalled()
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.max_completion_tokens).toBe(4096)
+		})
 	})
 
 	describe("error handling", () => {
@@ -336,6 +444,10 @@ describe("OpenAiHandler", () => {
 				},
 				{ path: "/models/chat/completions" },
 			)
+
+			// Verify max_tokens is NOT included when includeMaxTokens is not set
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs).not.toHaveProperty("max_completion_tokens")
 		})
 
 		it("should handle non-streaming responses with Azure AI Inference Service", async () => {
@@ -378,6 +490,10 @@ describe("OpenAiHandler", () => {
 				},
 				{ path: "/models/chat/completions" },
 			)
+
+			// Verify max_tokens is NOT included when includeMaxTokens is not set
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs).not.toHaveProperty("max_completion_tokens")
 		})
 
 		it("should handle completePrompt with Azure AI Inference Service", async () => {
@@ -391,6 +507,10 @@ describe("OpenAiHandler", () => {
 				},
 				{ path: "/models/chat/completions" },
 			)
+
+			// Verify max_tokens is NOT included when includeMaxTokens is not set
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs).not.toHaveProperty("max_completion_tokens")
 		})
 	})
 
@@ -433,4 +553,225 @@ describe("OpenAiHandler", () => {
 			expect(lastCall[0]).not.toHaveProperty("stream_options")
 		})
 	})
+
+	describe("O3 Family Models", () => {
+		const o3Options = {
+			...mockOptions,
+			openAiModelId: "o3-mini",
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				maxTokens: 65536,
+				supportsPromptCache: false,
+				reasoningEffort: "medium" as "low" | "medium" | "high",
+			},
+		}
+
+		it("should handle O3 model with streaming and include max_completion_tokens when includeMaxTokens is true", async () => {
+			const o3Handler = new OpenAiHandler({
+				...o3Options,
+				includeMaxTokens: true,
+				modelMaxTokens: 32000,
+				modelTemperature: 0.5,
+			})
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Hello!",
+				},
+			]
+
+			const stream = o3Handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "o3-mini",
+					messages: [
+						{
+							role: "developer",
+							content: "Formatting re-enabled\nYou are a helpful assistant.",
+						},
+						{ role: "user", content: "Hello!" },
+					],
+					stream: true,
+					stream_options: { include_usage: true },
+					reasoning_effort: "medium",
+					temperature: 0.5,
+					// O3 models do not support deprecated max_tokens but do support max_completion_tokens
+					max_completion_tokens: 32000,
+				}),
+				{},
+			)
+		})
+
+		it("should handle O3 model with streaming and exclude max_tokens when includeMaxTokens is false", async () => {
+			const o3Handler = new OpenAiHandler({
+				...o3Options,
+				includeMaxTokens: false,
+				modelTemperature: 0.7,
+			})
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Hello!",
+				},
+			]
+
+			const stream = o3Handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "o3-mini",
+					messages: [
+						{
+							role: "developer",
+							content: "Formatting re-enabled\nYou are a helpful assistant.",
+						},
+						{ role: "user", content: "Hello!" },
+					],
+					stream: true,
+					stream_options: { include_usage: true },
+					reasoning_effort: "medium",
+					temperature: 0.7,
+				}),
+				{},
+			)
+
+			// Verify max_tokens is NOT included
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs).not.toHaveProperty("max_completion_tokens")
+		})
+
+		it("should handle O3 model non-streaming with reasoning_effort and max_completion_tokens when includeMaxTokens is true", async () => {
+			const o3Handler = new OpenAiHandler({
+				...o3Options,
+				openAiStreamingEnabled: false,
+				includeMaxTokens: true,
+				modelTemperature: 0.3,
+			})
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Hello!",
+				},
+			]
+
+			const stream = o3Handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "o3-mini",
+					messages: [
+						{
+							role: "developer",
+							content: "Formatting re-enabled\nYou are a helpful assistant.",
+						},
+						{ role: "user", content: "Hello!" },
+					],
+					reasoning_effort: "medium",
+					temperature: 0.3,
+					// O3 models do not support deprecated max_tokens but do support max_completion_tokens
+					max_completion_tokens: 65536, // Using default maxTokens from o3Options
+				}),
+				{},
+			)
+
+			// Verify stream is not set
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs).not.toHaveProperty("stream")
+		})
+
+		it("should use default temperature of 0 when not specified for O3 models", async () => {
+			const o3Handler = new OpenAiHandler({
+				...o3Options,
+				// No modelTemperature specified
+			})
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Hello!",
+				},
+			]
+
+			const stream = o3Handler.createMessage(systemPrompt, messages)
+			await stream.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					temperature: 0, // Default temperature
+				}),
+				{},
+			)
+		})
+
+		it("should handle O3 model with Azure AI Inference Service respecting includeMaxTokens", async () => {
+			const o3AzureHandler = new OpenAiHandler({
+				...o3Options,
+				openAiBaseUrl: "https://test.services.ai.azure.com",
+				includeMaxTokens: false, // Should NOT include max_tokens
+			})
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Hello!",
+				},
+			]
+
+			const stream = o3AzureHandler.createMessage(systemPrompt, messages)
+			await stream.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "o3-mini",
+				}),
+				{ path: "/models/chat/completions" },
+			)
+
+			// Verify max_tokens is NOT included when includeMaxTokens is false
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs).not.toHaveProperty("max_completion_tokens")
+		})
+
+		it("should NOT include max_tokens for O3 model with Azure AI Inference Service even when includeMaxTokens is true", async () => {
+			const o3AzureHandler = new OpenAiHandler({
+				...o3Options,
+				openAiBaseUrl: "https://test.services.ai.azure.com",
+				includeMaxTokens: true, // Should include max_tokens
+			})
+			const systemPrompt = "You are a helpful assistant."
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{
+					role: "user",
+					content: "Hello!",
+				},
+			]
+
+			const stream = o3AzureHandler.createMessage(systemPrompt, messages)
+			await stream.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "o3-mini",
+					// O3 models do not support max_tokens
+				}),
+				{ path: "/models/chat/completions" },
+			)
+		})
+	})
 })

+ 60 - 21
src/api/providers/openai.ts

@@ -158,10 +158,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				...(reasoning && reasoning),
 			}
 
-			// @TODO: Move this to the `getModelParams` function.
-			if (this.options.includeMaxTokens) {
-				requestOptions.max_tokens = modelInfo.maxTokens
-			}
+			// Add max_tokens if needed
+			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
 			const stream = await this.client.chat.completions.create(
 				requestOptions,
@@ -222,6 +220,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 						: [systemMessage, ...convertToOpenAiMessages(messages)],
 			}
 
+			// Add max_tokens if needed
+			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
+
 			const response = await this.client.chat.completions.create(
 				requestOptions,
 				this._isAzureAiInference(modelUrl) ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
@@ -256,12 +257,17 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
+			const model = this.getModel()
+			const modelInfo = model.info
 
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
-				model: this.getModel().id,
+				model: model.id,
 				messages: [{ role: "user", content: prompt }],
 			}
 
+			// Add max_tokens if needed
+			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
+
 			const response = await this.client.chat.completions.create(
 				requestOptions,
 				isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
@@ -282,25 +288,34 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 	): ApiStream {
-		if (this.options.openAiStreamingEnabled ?? true) {
-			const methodIsAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
+		const modelInfo = this.getModel().info
+		const methodIsAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
 
+		if (this.options.openAiStreamingEnabled ?? true) {
 			const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl)
 
+			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+				model: modelId,
+				messages: [
+					{
+						role: "developer",
+						content: `Formatting re-enabled\n${systemPrompt}`,
+					},
+					...convertToOpenAiMessages(messages),
+				],
+				stream: true,
+				...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
+				reasoning_effort: modelInfo.reasoningEffort,
+				temperature: this.options.modelTemperature ?? 0,
+			}
+
+			// O3 family models do not support the deprecated max_tokens parameter
+			// but they do support max_completion_tokens (the modern OpenAI parameter)
+			// This allows O3 models to limit response length when includeMaxTokens is enabled
+			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
+
 			const stream = await this.client.chat.completions.create(
-				{
-					model: modelId,
-					messages: [
-						{
-							role: "developer",
-							content: `Formatting re-enabled\n${systemPrompt}`,
-						},
-						...convertToOpenAiMessages(messages),
-					],
-					stream: true,
-					...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
-					reasoning_effort: this.getModel().info.reasoningEffort,
-				},
+				requestOptions,
 				methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
 			)
 
@@ -315,9 +330,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					},
 					...convertToOpenAiMessages(messages),
 				],
+				reasoning_effort: modelInfo.reasoningEffort,
+				temperature: this.options.modelTemperature ?? 0,
 			}
 
-			const methodIsAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
+			// O3 family models do not support the deprecated max_tokens parameter
+			// but they do support max_completion_tokens (the modern OpenAI parameter)
+			// This allows O3 models to limit response length when includeMaxTokens is enabled
+			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
 			const response = await this.client.chat.completions.create(
 				requestOptions,
@@ -369,6 +389,25 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const urlHost = this._getUrlHost(baseUrl)
 		return urlHost.endsWith(".services.ai.azure.com")
 	}
+
+	/**
+	 * Adds max_completion_tokens to the request body if needed based on provider configuration
+	 * Note: max_tokens is deprecated in favor of max_completion_tokens as per OpenAI documentation
+	 * O3 family models handle max_tokens separately in handleO3FamilyMessage
+	 */
+	private addMaxTokensIfNeeded(
+		requestOptions:
+			| OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming
+			| OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming,
+		modelInfo: ModelInfo,
+	): void {
+		// Only add max_completion_tokens if includeMaxTokens is true
+		if (this.options.includeMaxTokens === true) {
+			// Use user-configured modelMaxTokens if available, otherwise fall back to model's default maxTokens
+			// Using max_completion_tokens as max_tokens is deprecated
+			requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
+		}
+	}
 }
 
 export async function getOpenAiModels(baseUrl?: string, apiKey?: string, openAiHeaders?: Record<string, string>) {

+ 10 - 0
webview-ui/src/components/settings/providers/OpenAICompatible.tsx

@@ -164,6 +164,16 @@ export const OpenAICompatible = ({
 				onChange={handleInputChange("openAiStreamingEnabled", noTransform)}>
 				{t("settings:modelInfo.enableStreaming")}
 			</Checkbox>
+			<div>
+				<Checkbox
+					checked={apiConfiguration?.includeMaxTokens ?? true}
+					onChange={handleInputChange("includeMaxTokens", noTransform)}>
+					{t("settings:includeMaxOutputTokens")}
+				</Checkbox>
+				<div className="text-sm text-vscode-descriptionForeground ml-6">
+					{t("settings:includeMaxOutputTokensDescription")}
+				</div>
+			</div>
 			<Checkbox
 				checked={apiConfiguration?.openAiUseAzure ?? false}
 				onChange={handleInputChange("openAiUseAzure", noTransform)}>

+ 314 - 0
webview-ui/src/components/settings/providers/__tests__/OpenAICompatible.spec.tsx

@@ -0,0 +1,314 @@
+import React from "react"
+import { render, screen, fireEvent } from "@testing-library/react"
+import { OpenAICompatible } from "../OpenAICompatible"
+import { ProviderSettings } from "@roo-code/types"
+
+// Mock the vscrui Checkbox component
+jest.mock("vscrui", () => ({
+	Checkbox: ({ children, checked, onChange }: any) => (
+		<label data-testid={`checkbox-${children?.toString().replace(/\s+/g, "-").toLowerCase()}`}>
+			<input
+				type="checkbox"
+				checked={checked}
+				onChange={() => onChange(!checked)} // Toggle the checked state
+				data-testid={`checkbox-input-${children?.toString().replace(/\s+/g, "-").toLowerCase()}`}
+			/>
+			{children}
+		</label>
+	),
+}))
+
+// Mock the VSCodeTextField and VSCodeButton components
+jest.mock("@vscode/webview-ui-toolkit/react", () => ({
+	VSCodeTextField: ({
+		children,
+		value,
+		onInput,
+		placeholder,
+		className,
+		style,
+		"data-testid": dataTestId,
+		...rest
+	}: any) => {
+		return (
+			<div
+				data-testid={dataTestId ? `${dataTestId}-text-field` : "vscode-text-field"}
+				className={className}
+				style={style}>
+				{children}
+				<input
+					type="text"
+					value={value}
+					onChange={(e) => onInput && onInput(e)}
+					placeholder={placeholder}
+					data-testid={dataTestId}
+					{...rest}
+				/>
+			</div>
+		)
+	},
+	VSCodeButton: ({ children, onClick, appearance, title }: any) => (
+		<button onClick={onClick} title={title} data-testid={`vscode-button-${appearance}`}>
+			{children}
+		</button>
+	),
+}))
+
+// Mock the translation hook
+jest.mock("@src/i18n/TranslationContext", () => ({
+	useAppTranslation: () => ({
+		t: (key: string) => key,
+	}),
+}))
+
+// Mock the UI components
+jest.mock("@src/components/ui", () => ({
+	Button: ({ children, onClick }: any) => <button onClick={onClick}>{children}</button>,
+}))
+
+// Mock other components
+jest.mock("../../ModelPicker", () => ({
+	ModelPicker: () => <div data-testid="model-picker">Model Picker</div>,
+}))
+
+jest.mock("../../R1FormatSetting", () => ({
+	R1FormatSetting: () => <div data-testid="r1-format-setting">R1 Format Setting</div>,
+}))
+
+jest.mock("../../ThinkingBudget", () => ({
+	ThinkingBudget: () => <div data-testid="thinking-budget">Thinking Budget</div>,
+}))
+
+// Mock react-use
+jest.mock("react-use", () => ({
+	useEvent: jest.fn(),
+}))
+
+describe("OpenAICompatible Component - includeMaxTokens checkbox", () => {
+	const mockSetApiConfigurationField = jest.fn()
+	const mockOrganizationAllowList = {
+		allowAll: true,
+		providers: {},
+	}
+
+	beforeEach(() => {
+		jest.clearAllMocks()
+	})
+
+	describe("Checkbox Rendering", () => {
+		it("should render the includeMaxTokens checkbox", () => {
+			const apiConfiguration: Partial<ProviderSettings> = {
+				includeMaxTokens: true,
+			}
+
+			render(
+				<OpenAICompatible
+					apiConfiguration={apiConfiguration as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			// Check that the checkbox is rendered
+			const checkbox = screen.getByTestId("checkbox-settings:includemaxoutputtokens")
+			expect(checkbox).toBeInTheDocument()
+
+			// Check that the description text is rendered
+			expect(screen.getByText("settings:includeMaxOutputTokensDescription")).toBeInTheDocument()
+		})
+
+		it("should render the checkbox with correct translation keys", () => {
+			const apiConfiguration: Partial<ProviderSettings> = {
+				includeMaxTokens: true,
+			}
+
+			render(
+				<OpenAICompatible
+					apiConfiguration={apiConfiguration as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			// Check that the correct translation key is used for the label
+			expect(screen.getByText("settings:includeMaxOutputTokens")).toBeInTheDocument()
+
+			// Check that the correct translation key is used for the description
+			expect(screen.getByText("settings:includeMaxOutputTokensDescription")).toBeInTheDocument()
+		})
+	})
+
+	describe("Initial State", () => {
+		it("should show checkbox as checked when includeMaxTokens is true", () => {
+			const apiConfiguration: Partial<ProviderSettings> = {
+				includeMaxTokens: true,
+			}
+
+			render(
+				<OpenAICompatible
+					apiConfiguration={apiConfiguration as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			const checkboxInput = screen.getByTestId("checkbox-input-settings:includemaxoutputtokens")
+			expect(checkboxInput).toBeChecked()
+		})
+
+		it("should show checkbox as unchecked when includeMaxTokens is false", () => {
+			const apiConfiguration: Partial<ProviderSettings> = {
+				includeMaxTokens: false,
+			}
+
+			render(
+				<OpenAICompatible
+					apiConfiguration={apiConfiguration as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			const checkboxInput = screen.getByTestId("checkbox-input-settings:includemaxoutputtokens")
+			expect(checkboxInput).not.toBeChecked()
+		})
+
+		it("should default to checked when includeMaxTokens is undefined", () => {
+			const apiConfiguration: Partial<ProviderSettings> = {
+				// includeMaxTokens is not defined
+			}
+
+			render(
+				<OpenAICompatible
+					apiConfiguration={apiConfiguration as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			const checkboxInput = screen.getByTestId("checkbox-input-settings:includemaxoutputtokens")
+			expect(checkboxInput).toBeChecked()
+		})
+
+		it("should default to checked when includeMaxTokens is null", () => {
+			const apiConfiguration: Partial<ProviderSettings> = {
+				includeMaxTokens: null as any,
+			}
+
+			render(
+				<OpenAICompatible
+					apiConfiguration={apiConfiguration as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			const checkboxInput = screen.getByTestId("checkbox-input-settings:includemaxoutputtokens")
+			expect(checkboxInput).toBeChecked()
+		})
+	})
+
+	describe("User Interaction", () => {
+		it("should call handleInputChange with correct parameters when checkbox is clicked from checked to unchecked", () => {
+			const apiConfiguration: Partial<ProviderSettings> = {
+				includeMaxTokens: true,
+			}
+
+			render(
+				<OpenAICompatible
+					apiConfiguration={apiConfiguration as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			const checkboxInput = screen.getByTestId("checkbox-input-settings:includemaxoutputtokens")
+			fireEvent.click(checkboxInput)
+
+			// Verify setApiConfigurationField was called with correct parameters
+			expect(mockSetApiConfigurationField).toHaveBeenCalledWith("includeMaxTokens", false)
+		})
+
+		it("should call handleInputChange with correct parameters when checkbox is clicked from unchecked to checked", () => {
+			const apiConfiguration: Partial<ProviderSettings> = {
+				includeMaxTokens: false,
+			}
+
+			render(
+				<OpenAICompatible
+					apiConfiguration={apiConfiguration as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			const checkboxInput = screen.getByTestId("checkbox-input-settings:includemaxoutputtokens")
+			fireEvent.click(checkboxInput)
+
+			// Verify setApiConfigurationField was called with correct parameters
+			expect(mockSetApiConfigurationField).toHaveBeenCalledWith("includeMaxTokens", true)
+		})
+	})
+
+	describe("Component Updates", () => {
+		it("should update checkbox state when apiConfiguration changes", () => {
+			const apiConfigurationInitial: Partial<ProviderSettings> = {
+				includeMaxTokens: true,
+			}
+
+			const { rerender } = render(
+				<OpenAICompatible
+					apiConfiguration={apiConfigurationInitial as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			// Verify initial state
+			let checkboxInput = screen.getByTestId("checkbox-input-settings:includemaxoutputtokens")
+			expect(checkboxInput).toBeChecked()
+
+			// Update with new configuration
+			const apiConfigurationUpdated: Partial<ProviderSettings> = {
+				includeMaxTokens: false,
+			}
+
+			rerender(
+				<OpenAICompatible
+					apiConfiguration={apiConfigurationUpdated as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			// Verify updated state
+			checkboxInput = screen.getByTestId("checkbox-input-settings:includemaxoutputtokens")
+			expect(checkboxInput).not.toBeChecked()
+		})
+	})
+
+	describe("UI Structure", () => {
+		it("should render the checkbox with description in correct structure", () => {
+			const apiConfiguration: Partial<ProviderSettings> = {
+				includeMaxTokens: true,
+			}
+
+			render(
+				<OpenAICompatible
+					apiConfiguration={apiConfiguration as ProviderSettings}
+					setApiConfigurationField={mockSetApiConfigurationField}
+					organizationAllowList={mockOrganizationAllowList}
+				/>,
+			)
+
+			// Check that the checkbox and description are in a div container
+			const checkbox = screen.getByTestId("checkbox-settings:includemaxoutputtokens")
+			const parentDiv = checkbox.closest("div")
+			expect(parentDiv).toBeInTheDocument()
+
+			// Check that the description has the correct styling classes
+			const description = screen.getByText("settings:includeMaxOutputTokensDescription")
+			expect(description).toHaveClass("text-sm", "text-vscode-descriptionForeground", "ml-6")
+		})
+	})
+})

+ 3 - 1
webview-ui/src/i18n/locales/ca/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "ARN personalitzat",
 		"useCustomArn": "Utilitza ARN personalitzat..."
-	}
+	},
+	"includeMaxOutputTokens": "Incloure tokens màxims de sortida",
+	"includeMaxOutputTokensDescription": "Enviar el paràmetre de tokens màxims de sortida a les sol·licituds API. Alguns proveïdors poden no admetre això."
 }

+ 3 - 1
webview-ui/src/i18n/locales/de/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "Benutzerdefinierte ARN",
 		"useCustomArn": "Benutzerdefinierte ARN verwenden..."
-	}
+	},
+	"includeMaxOutputTokens": "Maximale Ausgabe-Tokens einbeziehen",
+	"includeMaxOutputTokensDescription": "Sende den Parameter für maximale Ausgabe-Tokens in API-Anfragen. Einige Anbieter unterstützen dies möglicherweise nicht."
 }

+ 3 - 1
webview-ui/src/i18n/locales/en/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "Custom ARN",
 		"useCustomArn": "Use custom ARN..."
-	}
+	},
+	"includeMaxOutputTokens": "Include max output tokens",
+	"includeMaxOutputTokensDescription": "Send max output tokens parameter in API requests. Some providers may not support this."
 }

+ 3 - 1
webview-ui/src/i18n/locales/es/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "ARN personalizado",
 		"useCustomArn": "Usar ARN personalizado..."
-	}
+	},
+	"includeMaxOutputTokens": "Incluir tokens máximos de salida",
+	"includeMaxOutputTokensDescription": "Enviar parámetro de tokens máximos de salida en solicitudes API. Algunos proveedores pueden no soportar esto."
 }

+ 3 - 1
webview-ui/src/i18n/locales/fr/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "ARN personnalisé",
 		"useCustomArn": "Utiliser un ARN personnalisé..."
-	}
+	},
+	"includeMaxOutputTokens": "Inclure les tokens de sortie maximum",
+	"includeMaxOutputTokensDescription": "Envoyer le paramètre de tokens de sortie maximum dans les requêtes API. Certains fournisseurs peuvent ne pas supporter cela."
 }

+ 3 - 1
webview-ui/src/i18n/locales/hi/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "कस्टम ARN",
 		"useCustomArn": "कस्टम ARN का उपयोग करें..."
-	}
+	},
+	"includeMaxOutputTokens": "अधिकतम आउटपुट टोकन शामिल करें",
+	"includeMaxOutputTokensDescription": "API अनुरोधों में अधिकतम आउटपुट टोकन पैरामीटर भेजें। कुछ प्रदाता इसका समर्थन नहीं कर सकते हैं।"
 }

+ 3 - 1
webview-ui/src/i18n/locales/id/settings.json

@@ -642,5 +642,7 @@
 	"labels": {
 		"customArn": "ARN Kustom",
 		"useCustomArn": "Gunakan ARN kustom..."
-	}
+	},
+	"includeMaxOutputTokens": "Sertakan token output maksimum",
+	"includeMaxOutputTokensDescription": "Kirim parameter token output maksimum dalam permintaan API. Beberapa provider mungkin tidak mendukung ini."
 }

+ 3 - 1
webview-ui/src/i18n/locales/it/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "ARN personalizzato",
 		"useCustomArn": "Usa ARN personalizzato..."
-	}
+	},
+	"includeMaxOutputTokens": "Includi token di output massimi",
+	"includeMaxOutputTokensDescription": "Invia il parametro dei token di output massimi nelle richieste API. Alcuni provider potrebbero non supportarlo."
 }

+ 3 - 1
webview-ui/src/i18n/locales/ja/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "カスタム ARN",
 		"useCustomArn": "カスタム ARN を使用..."
-	}
+	},
+	"includeMaxOutputTokens": "最大出力トークンを含める",
+	"includeMaxOutputTokensDescription": "APIリクエストで最大出力トークンパラメータを送信します。一部のプロバイダーはこれをサポートしていない場合があります。"
 }

+ 3 - 1
webview-ui/src/i18n/locales/ko/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "사용자 지정 ARN",
 		"useCustomArn": "사용자 지정 ARN 사용..."
-	}
+	},
+	"includeMaxOutputTokens": "최대 출력 토큰 포함",
+	"includeMaxOutputTokensDescription": "API 요청에서 최대 출력 토큰 매개변수를 전송합니다. 일부 제공업체는 이를 지원하지 않을 수 있습니다."
 }

+ 3 - 1
webview-ui/src/i18n/locales/nl/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "Aangepaste ARN",
 		"useCustomArn": "Aangepaste ARN gebruiken..."
-	}
+	},
+	"includeMaxOutputTokens": "Maximale output tokens opnemen",
+	"includeMaxOutputTokensDescription": "Stuur maximale output tokens parameter in API-verzoeken. Sommige providers ondersteunen dit mogelijk niet."
 }

+ 3 - 1
webview-ui/src/i18n/locales/pl/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "Niestandardowy ARN",
 		"useCustomArn": "Użyj niestandardowego ARN..."
-	}
+	},
+	"includeMaxOutputTokens": "Uwzględnij maksymalne tokeny wyjściowe",
+	"includeMaxOutputTokensDescription": "Wyślij parametr maksymalnych tokenów wyjściowych w żądaniach API. Niektórzy dostawcy mogą tego nie obsługiwać."
 }

+ 3 - 1
webview-ui/src/i18n/locales/pt-BR/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "ARN personalizado",
 		"useCustomArn": "Usar ARN personalizado..."
-	}
+	},
+	"includeMaxOutputTokens": "Incluir tokens máximos de saída",
+	"includeMaxOutputTokensDescription": "Enviar parâmetro de tokens máximos de saída nas solicitações de API. Alguns provedores podem não suportar isso."
 }

+ 3 - 1
webview-ui/src/i18n/locales/ru/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "Пользовательский ARN",
 		"useCustomArn": "Использовать пользовательский ARN..."
-	}
+	},
+	"includeMaxOutputTokens": "Включить максимальные выходные токены",
+	"includeMaxOutputTokensDescription": "Отправлять параметр максимальных выходных токенов в API-запросах. Некоторые провайдеры могут не поддерживать это."
 }

+ 3 - 1
webview-ui/src/i18n/locales/tr/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "Özel ARN",
 		"useCustomArn": "Özel ARN kullan..."
-	}
+	},
+	"includeMaxOutputTokens": "Maksimum çıktı tokenlerini dahil et",
+	"includeMaxOutputTokensDescription": "API isteklerinde maksimum çıktı token parametresini gönder. Bazı sağlayıcılar bunu desteklemeyebilir."
 }

+ 3 - 1
webview-ui/src/i18n/locales/vi/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "ARN tùy chỉnh",
 		"useCustomArn": "Sử dụng ARN tùy chỉnh..."
-	}
+	},
+	"includeMaxOutputTokens": "Bao gồm token đầu ra tối đa",
+	"includeMaxOutputTokensDescription": "Gửi tham số token đầu ra tối đa trong các yêu cầu API. Một số nhà cung cấp có thể không hỗ trợ điều này."
 }

+ 3 - 1
webview-ui/src/i18n/locales/zh-CN/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "自定义 ARN",
 		"useCustomArn": "使用自定义 ARN..."
-	}
+	},
+	"includeMaxOutputTokens": "包含最大输出 Token 数",
+	"includeMaxOutputTokensDescription": "在 API 请求中发送最大输出 Token 参数。某些提供商可能不支持此功能。"
 }

+ 3 - 1
webview-ui/src/i18n/locales/zh-TW/settings.json

@@ -613,5 +613,7 @@
 	"labels": {
 		"customArn": "自訂 ARN",
 		"useCustomArn": "使用自訂 ARN..."
-	}
+	},
+	"includeMaxOutputTokens": "包含最大輸出 Token 數",
+	"includeMaxOutputTokensDescription": "在 API 請求中傳送最大輸出 Token 參數。某些提供商可能不支援此功能。"
 }