Browse Source

Show the reserved output tokens in the context window bar

Matt Rubens 10 months ago
parent
commit
671064a45f

+ 15 - 0
webview-ui/src/__mocks__/components/chat/TaskHeader.tsx

@@ -0,0 +1,15 @@
+import React from "react"
+// Import the actual utility instead of reimplementing it
+import { getMaxTokensForModel } from "@/utils/model-utils"
+
+// Re-export the utility function to maintain the same interface
+export { getMaxTokensForModel }
+
+/**
+ * Mock version of the TaskHeader component
+ */
+const TaskHeader: React.FC<any> = () => {
+	return <div data-testid="mocked-task-header">Mocked TaskHeader</div>
+}
+
+export default TaskHeader

+ 121 - 0
webview-ui/src/__tests__/ContextWindowProgress.test.tsx

@@ -0,0 +1,121 @@
+import React from "react"
+import { render, screen } from "@testing-library/react"
+import "@testing-library/jest-dom"
+import TaskHeader from "../components/chat/TaskHeader"
+
+// Mock formatLargeNumber function
+jest.mock("@/utils/format", () => ({
+	formatLargeNumber: jest.fn((num) => num.toString()),
+}))
+
+// Mock ExtensionStateContext since we use useExtensionState
+jest.mock("../context/ExtensionStateContext", () => ({
+	useExtensionState: jest.fn(() => ({
+		apiConfiguration: {
+			apiProvider: "openai",
+			// Add other needed properties
+		},
+		currentTaskItem: {
+			id: "test-id",
+			number: 1,
+			size: 1024,
+		},
+	})),
+}))
+
+// Mock highlighting function to avoid JSX parsing issues in tests
+jest.mock("../components/chat/TaskHeader", () => {
+	const originalModule = jest.requireActual("../components/chat/TaskHeader")
+	return {
+		__esModule: true,
+		...originalModule,
+		highlightMentions: jest.fn((text) => text),
+	}
+})
+
+describe("ContextWindowProgress", () => {
+	// Helper function to render just the ContextWindowProgress part through TaskHeader
+	const renderComponent = (props: Record<string, any>) => {
+		// Create a simple mock of the task that avoids importing the actual types
+		const defaultTask = {
+			ts: Date.now(),
+			type: "say" as const,
+			say: "task" as const,
+			text: "Test task",
+		}
+
+		const defaultProps = {
+			task: defaultTask,
+			tokensIn: 100,
+			tokensOut: 50,
+			doesModelSupportPromptCache: true,
+			totalCost: 0.001,
+			contextTokens: 1000,
+			onClose: jest.fn(),
+		}
+
+		return render(<TaskHeader {...defaultProps} {...props} />)
+	}
+
+	beforeEach(() => {
+		jest.clearAllMocks()
+	})
+
+	test("renders correctly with valid inputs", () => {
+		renderComponent({
+			contextTokens: 1000,
+			contextWindow: 4000,
+		})
+
+		// Check for basic elements
+		expect(screen.getByText("Context Window:")).toBeInTheDocument()
+		expect(screen.getByText("1000")).toBeInTheDocument() // contextTokens
+		// The actual context window might be different than what we pass in
+		// due to the mock returning a default value from the API config
+		expect(screen.getByText(/(4000|128000)/)).toBeInTheDocument() // contextWindow
+	})
+
+	test("handles zero context window gracefully", () => {
+		renderComponent({
+			contextTokens: 0,
+			contextWindow: 0,
+		})
+
+		// In the current implementation, the component is still displayed with zero values
+		// rather than being hidden completely
+		expect(screen.getByText("Context Window:")).toBeInTheDocument()
+		expect(screen.getByText("0")).toBeInTheDocument()
+	})
+
+	test("handles edge cases with negative values", () => {
+		renderComponent({
+			contextTokens: -100, // Should be treated as 0
+			contextWindow: 4000,
+		})
+
+		// Should show 0 instead of -100
+		expect(screen.getByText("0")).toBeInTheDocument()
+		// The actual context window might be different than what we pass in
+		expect(screen.getByText(/(4000|128000)/)).toBeInTheDocument()
+	})
+
+	test("calculates percentages correctly", () => {
+		const contextTokens = 1000
+		const contextWindow = 4000
+
+		renderComponent({
+			contextTokens,
+			contextWindow,
+		})
+
+		// Instead of checking the exact style, verify the title attribute
+		// which contains information about the percentage of tokens used
+		const tokenUsageDiv = screen.getByTitle(/Tokens used:/, { exact: false })
+		expect(tokenUsageDiv).toBeInTheDocument()
+
+		// We can't reliably test computed styles in JSDOM, so we'll just check
+		// that the component appears to be working correctly by checking for expected elements
+		expect(screen.getByText("Context Window:")).toBeInTheDocument()
+		expect(screen.getByText("1000")).toBeInTheDocument()
+	})
+})

+ 121 - 0
webview-ui/src/__tests__/ContextWindowProgressLogic.test.ts

@@ -0,0 +1,121 @@
+// This test directly tests the logic of the ContextWindowProgress component calculations
+// without needing to render the full component
+import { describe, test, expect } from "@jest/globals"
+import { calculateTokenDistribution } from "../utils/model-utils"
+
+export {} // This makes the file a proper TypeScript module
+
+describe("ContextWindowProgress Logic", () => {
+	// Using the shared utility function from model-utils.ts instead of reimplementing it
+
+	test("calculates correct token distribution with default 20% reservation", () => {
+		const contextWindow = 4000
+		const contextTokens = 1000
+
+		const result = calculateTokenDistribution(contextWindow, contextTokens)
+
+		// Expected calculations:
+		// reservedForOutput = 0.2 * 4000 = 800
+		// availableSize = 4000 - 1000 - 800 = 2200
+		// total = 1000 + 800 + 2200 = 4000
+		expect(result.reservedForOutput).toBe(800)
+		expect(result.availableSize).toBe(2200)
+
+		// Check percentages
+		expect(result.currentPercent).toBeCloseTo(25) // 1000/4000 * 100 = 25%
+		expect(result.reservedPercent).toBeCloseTo(20) // 800/4000 * 100 = 20%
+		expect(result.availablePercent).toBeCloseTo(55) // 2200/4000 * 100 = 55%
+
+		// Verify percentages sum to 100%
+		expect(result.currentPercent + result.reservedPercent + result.availablePercent).toBeCloseTo(100)
+	})
+
+	test("uses provided maxTokens when available instead of default calculation", () => {
+		const contextWindow = 4000
+		const contextTokens = 1000
+
+		// First calculate with default 20% reservation (no maxTokens provided)
+		const defaultResult = calculateTokenDistribution(contextWindow, contextTokens)
+
+		// Then calculate with custom maxTokens value
+		const customMaxTokens = 1500 // Custom maxTokens instead of default 20%
+		const customResult = calculateTokenDistribution(contextWindow, contextTokens, customMaxTokens)
+
+		// VERIFY MAXTOKEN PROP EFFECT: Custom maxTokens should be used directly instead of 20% calculation
+		const defaultReserved = Math.ceil(contextWindow * 0.2) // 800 tokens (20% of 4000)
+		expect(defaultResult.reservedForOutput).toBe(defaultReserved)
+		expect(customResult.reservedForOutput).toBe(customMaxTokens) // Should use exact provided value
+
+		// Explicitly confirm the tooltip content would be different
+		const defaultTooltip = `Reserved for model response: ${defaultReserved} tokens`
+		const customTooltip = `Reserved for model response: ${customMaxTokens} tokens`
+		expect(defaultTooltip).not.toBe(customTooltip)
+
+		// Verify the effect on available space
+		expect(customResult.availableSize).toBe(4000 - 1000 - 1500) // 1500 tokens available
+		expect(defaultResult.availableSize).toBe(4000 - 1000 - 800) // 2200 tokens available
+
+		// Verify the effect on percentages
+		// With custom maxTokens (1500), the reserved percentage should be higher
+		expect(defaultResult.reservedPercent).toBeCloseTo(20) // 800/4000 * 100 = 20%
+		expect(customResult.reservedPercent).toBeCloseTo(37.5) // 1500/4000 * 100 = 37.5%
+
+		// Verify percentages still sum to 100%
+		expect(customResult.currentPercent + customResult.reservedPercent + customResult.availablePercent).toBeCloseTo(
+			100,
+		)
+	})
+
+	test("handles negative input values", () => {
+		const contextWindow = 4000
+		const contextTokens = -500 // Negative tokens should be handled gracefully
+
+		const result = calculateTokenDistribution(contextWindow, contextTokens)
+
+		// Expected calculations:
+		// safeContextTokens = Math.max(0, -500) = 0
+		// reservedForOutput = 0.2 * 4000 = 800
+		// availableSize = 4000 - 0 - 800 = 3200
+		// total = 0 + 800 + 3200 = 4000
+		expect(result.currentPercent).toBeCloseTo(0) // 0/4000 * 100 = 0%
+		expect(result.reservedPercent).toBeCloseTo(20) // 800/4000 * 100 = 20%
+		expect(result.availablePercent).toBeCloseTo(80) // 3200/4000 * 100 = 80%
+	})
+
+	test("handles zero context window gracefully", () => {
+		const contextWindow = 0
+		const contextTokens = 1000
+
+		const result = calculateTokenDistribution(contextWindow, contextTokens)
+
+		// With zero context window, everything should be zero
+		expect(result.reservedForOutput).toBe(0)
+		expect(result.availableSize).toBe(0)
+
+		// The percentages maintain total of 100% even with zero context window
+		// due to how the division handles this edge case
+		const totalPercentage = result.currentPercent + result.reservedPercent + result.availablePercent
+		expect(totalPercentage).toBeCloseTo(100)
+	})
+
+	test("handles case where tokens exceed context window", () => {
+		const contextWindow = 4000
+		const contextTokens = 5000 // More tokens than the window size
+
+		const result = calculateTokenDistribution(contextWindow, contextTokens)
+
+		// Expected calculations:
+		// reservedForOutput = 0.2 * 4000 = 800
+		// availableSize = Math.max(0, 4000 - 5000 - 800) = 0
+		expect(result.reservedForOutput).toBe(800)
+		expect(result.availableSize).toBe(0)
+
+		// Percentages should be calculated based on total (5000 + 800 + 0 = 5800)
+		expect(result.currentPercent).toBeCloseTo((5000 / 5800) * 100)
+		expect(result.reservedPercent).toBeCloseTo((800 / 5800) * 100)
+		expect(result.availablePercent).toBeCloseTo(0)
+
+		// Verify percentages sum to 100%
+		expect(result.currentPercent + result.reservedPercent + result.availablePercent).toBeCloseTo(100)
+	})
+})

+ 81 - 0
webview-ui/src/__tests__/getMaxTokensForModel.test.tsx

@@ -0,0 +1,81 @@
+import { getMaxTokensForModel } from "@/utils/model-utils"
+
+describe("getMaxTokensForModel utility from model-utils", () => {
+	test("should return maxTokens from modelInfo when thinking is false", () => {
+		const modelInfo = {
+			maxTokens: 2048,
+			thinking: false,
+		}
+
+		const apiConfig = {
+			modelMaxTokens: 4096,
+		}
+
+		const result = getMaxTokensForModel(modelInfo, apiConfig)
+		expect(result).toBe(2048)
+	})
+
+	test("should return modelMaxTokens from apiConfig when thinking is true", () => {
+		const modelInfo = {
+			maxTokens: 2048,
+			thinking: true,
+		}
+
+		const apiConfig = {
+			modelMaxTokens: 4096,
+		}
+
+		const result = getMaxTokensForModel(modelInfo, apiConfig)
+		expect(result).toBe(4096)
+	})
+
+	test("should fallback to modelInfo.maxTokens when thinking is true but apiConfig.modelMaxTokens is not defined", () => {
+		const modelInfo = {
+			maxTokens: 2048,
+			thinking: true,
+		}
+
+		const apiConfig = {}
+
+		const result = getMaxTokensForModel(modelInfo, apiConfig)
+		expect(result).toBe(2048)
+	})
+
+	test("should handle undefined inputs gracefully", () => {
+		// Both undefined
+		expect(getMaxTokensForModel(undefined, undefined)).toBeUndefined()
+
+		// Only modelInfo defined
+		const modelInfoOnly = {
+			maxTokens: 2048,
+			thinking: false,
+		}
+		expect(getMaxTokensForModel(modelInfoOnly, undefined)).toBe(2048)
+
+		// Only apiConfig defined
+		const apiConfigOnly = {
+			modelMaxTokens: 4096,
+		}
+		expect(getMaxTokensForModel(undefined, apiConfigOnly)).toBeUndefined()
+	})
+
+	test("should handle missing properties gracefully", () => {
+		// modelInfo without maxTokens
+		const modelInfoWithoutMaxTokens = {
+			thinking: true,
+		}
+
+		const apiConfig = {
+			modelMaxTokens: 4096,
+		}
+
+		expect(getMaxTokensForModel(modelInfoWithoutMaxTokens, apiConfig)).toBe(4096)
+
+		// modelInfo without thinking flag
+		const modelInfoWithoutThinking = {
+			maxTokens: 2048,
+		}
+
+		expect(getMaxTokensForModel(modelInfoWithoutThinking, apiConfig)).toBe(2048)
+	})
+})

+ 107 - 18
webview-ui/src/components/chat/TaskHeader.tsx

@@ -5,6 +5,7 @@ import prettyBytes from "pretty-bytes"
 
 import { vscode } from "@/utils/vscode"
 import { formatLargeNumber } from "@/utils/format"
+import { calculateTokenDistribution, getMaxTokensForModel } from "@/utils/model-utils"
 import { Button } from "@/components/ui"
 
 import { ClineMessage } from "../../../../src/shared/ExtensionMessage"
@@ -300,11 +301,13 @@ const TaskHeader: React.FC<TaskHeaderProps> = ({
 								{!isCostAvailable && <TaskActions item={currentTaskItem} />}
 							</div>
 
-							{isTaskExpanded && contextWindow && (
-								<div className={`flex ${windowWidth < 270 ? "flex-col" : "flex-row"} gap-1 h-[20px]`}>
+							{isTaskExpanded && contextWindow > 0 && (
+								<div
+									className={`w-full flex ${windowWidth < 400 ? "flex-col" : "flex-row"} gap-1 h-auto`}>
 									<ContextWindowProgress
 										contextWindow={contextWindow}
 										contextTokens={contextTokens || 0}
+										maxTokens={getMaxTokensForModel(selectedModelInfo, apiConfiguration)}
 									/>
 								</div>
 							)}
@@ -411,27 +414,113 @@ const TaskActions = ({ item }: { item: HistoryItem | undefined }) => {
 	)
 }
 
-const ContextWindowProgress = ({ contextWindow, contextTokens }: { contextWindow: number; contextTokens: number }) => (
-	<>
-		<div className="flex items-center gap-1 flex-shrink-0">
-			<span className="font-bold">Context Window:</span>
-		</div>
-		<div className="flex items-center gap-2 flex-1 whitespace-nowrap px-2">
-			<div>{formatLargeNumber(contextTokens)}</div>
-			<div className="flex items-center gap-[3px] flex-1">
-				<div className="flex-1 h-1 rounded-[2px] overflow-hidden bg-[color-mix(in_srgb,var(--vscode-badge-foreground)_20%,transparent)]">
+interface ContextWindowProgressProps {
+	contextWindow: number
+	contextTokens: number
+	maxTokens?: number
+}
+
+const ContextWindowProgress = ({ contextWindow, contextTokens, maxTokens }: ContextWindowProgressProps) => {
+	// Use the shared utility function to calculate all token distribution values
+	const tokenDistribution = useMemo(
+		() => calculateTokenDistribution(contextWindow, contextTokens, maxTokens),
+		[contextWindow, contextTokens, maxTokens],
+	)
+
+	// Destructure the values we need
+	const { currentPercent, reservedPercent, availableSize, reservedForOutput, availablePercent } = tokenDistribution
+
+	// For display purposes
+	const safeContextWindow = Math.max(0, contextWindow)
+	const safeContextTokens = Math.max(0, contextTokens)
+
+	return (
+		<>
+			<div className="flex items-center gap-1 flex-shrink-0">
+				<span className="font-bold">Context Window:</span>
+			</div>
+			<div className="flex items-center gap-2 flex-1 whitespace-nowrap px-2">
+				<div>{formatLargeNumber(safeContextTokens)}</div>
+				<div className="flex-1 relative">
+					{/* Invisible overlay for hover area */}
 					<div
-						className="h-full rounded-[2px] bg-[var(--vscode-badge-foreground)]"
+						className="absolute w-full cursor-pointer"
 						style={{
-							width: `${(contextTokens / contextWindow) * 100}%`,
-							transition: "width 0.3s ease-out",
+							height: "16px",
+							top: "-7px",
+							zIndex: 5,
 						}}
+						title={`Available space: ${formatLargeNumber(availableSize)} tokens`}
 					/>
+
+					{/* Main progress bar container */}
+					<div className="flex items-center h-1 rounded-[2px] overflow-hidden w-full bg-[color-mix(in_srgb,var(--vscode-badge-foreground)_20%,transparent)]">
+						{/* Current tokens container */}
+						<div className="relative h-full" style={{ width: `${currentPercent}%` }}>
+							{/* Invisible overlay for current tokens section */}
+							<div
+								className="absolute cursor-pointer"
+								style={{
+									height: "16px",
+									top: "-7px",
+									width: "100%",
+									zIndex: 6,
+								}}
+								title={`Tokens used: ${formatLargeNumber(safeContextTokens)} of ${formatLargeNumber(safeContextWindow)}`}
+							/>
+							{/* Current tokens used - darkest */}
+							<div
+								className="h-full w-full bg-[var(--vscode-badge-foreground)]"
+								style={{
+									transition: "width 0.3s ease-out",
+								}}
+							/>
+						</div>
+
+						{/* Container for reserved tokens */}
+						<div className="relative h-full" style={{ width: `${reservedPercent}%` }}>
+							{/* Invisible overlay for reserved section */}
+							<div
+								className="absolute cursor-pointer"
+								style={{
+									height: "16px",
+									top: "-7px",
+									width: "100%",
+									zIndex: 6,
+								}}
+								title={`Reserved for model response: ${formatLargeNumber(reservedForOutput)} tokens`}
+							/>
+							{/* Reserved for output section - medium gray */}
+							<div
+								className="h-full w-full bg-[color-mix(in_srgb,var(--vscode-badge-foreground)_30%,transparent)]"
+								style={{
+									transition: "width 0.3s ease-out",
+								}}
+							/>
+						</div>
+
+						{/* Empty section (if any) */}
+						{availablePercent > 0 && (
+							<div className="relative h-full" style={{ width: `${availablePercent}%` }}>
+								{/* Invisible overlay for available space */}
+								<div
+									className="absolute cursor-pointer"
+									style={{
+										height: "16px",
+										top: "-7px",
+										width: "100%",
+										zIndex: 6,
+									}}
+									title={`Available space: ${formatLargeNumber(availableSize)} tokens`}
+								/>
+							</div>
+						)}
+					</div>
 				</div>
+				<div>{formatLargeNumber(safeContextWindow)}</div>
 			</div>
-			<div>{formatLargeNumber(contextWindow)}</div>
-		</div>
-	</>
-)
+		</>
+	)
+}
 
 export default memo(TaskHeader)

+ 125 - 0
webview-ui/src/utils/model-utils.ts

@@ -0,0 +1,125 @@
+/**
+ * Utility functions for working with language models and tokens
+ */
+
+/**
+ * Model information interface with properties used in token calculations
+ */
+export interface ModelInfo {
+	/**
+	 * Maximum number of tokens the model can process
+	 */
+	maxTokens?: number
+
+	/**
+	 * Whether the model supports thinking/reasoning capabilities
+	 */
+	thinking?: boolean
+}
+
+/**
+ * API configuration interface with token-related settings
+ */
+export interface ApiConfig {
+	/**
+	 * Maximum tokens to use for model responses
+	 */
+	modelMaxTokens?: number
+}
+/**
+ * Result of token distribution calculation
+ */
+export interface TokenDistributionResult {
+	/**
+	 * Percentage of context window used by current tokens (0-100)
+	 */
+	currentPercent: number
+
+	/**
+	 * Percentage of context window reserved for model output (0-100)
+	 */
+	reservedPercent: number
+
+	/**
+	 * Percentage of context window still available (0-100)
+	 */
+	availablePercent: number
+
+	/**
+	 * Number of tokens reserved for model output
+	 */
+	reservedForOutput: number
+
+	/**
+	 * Number of tokens still available in the context window
+	 */
+	availableSize: number
+}
+
+/**
+ * Determines the maximum tokens based on model configuration
+ * If the model supports thinking, prioritize the API configuration's modelMaxTokens,
+ * falling back to the model's own maxTokens. Otherwise, just use the model's maxTokens.
+ *
+ * @param modelInfo The model information object with properties like maxTokens and thinking
+ * @param apiConfig The API configuration object with properties like modelMaxTokens
+ * @returns The maximum tokens value or undefined if no valid value is available
+ */
+export const getMaxTokensForModel = (
+	modelInfo: ModelInfo | undefined,
+	apiConfig: ApiConfig | undefined,
+): number | undefined => {
+	if (modelInfo?.thinking) {
+		return apiConfig?.modelMaxTokens || modelInfo?.maxTokens
+	}
+	return modelInfo?.maxTokens
+}
+
+/**
+ * Calculates distribution of tokens within the context window
+ * This is used for visualizing the token distribution in the UI
+ *
+ * @param contextWindow The total size of the context window
+ * @param contextTokens The number of tokens currently used
+ * @param maxTokens Optional override for tokens reserved for model output (otherwise uses 20% of window)
+ * @returns Distribution of tokens with percentages and raw numbers
+ */
+export const calculateTokenDistribution = (
+	contextWindow: number,
+	contextTokens: number,
+	maxTokens?: number,
+): TokenDistributionResult => {
+	// Handle potential invalid inputs with positive fallbacks
+	const safeContextWindow = Math.max(0, contextWindow)
+	const safeContextTokens = Math.max(0, contextTokens)
+
+	// Get the actual max tokens value from the model
+	// If maxTokens is valid, use it, otherwise reserve 20% of the context window as a default
+	const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : Math.ceil(safeContextWindow * 0.2)
+
+	// Calculate sizes directly without buffer display
+	const availableSize = Math.max(0, safeContextWindow - safeContextTokens - reservedForOutput)
+
+	// Calculate percentages - ensure they sum to exactly 100%
+	// Use the ratio of each part to the total context window
+	const total = safeContextTokens + reservedForOutput + availableSize
+
+	// Safeguard against division by zero
+	if (total <= 0) {
+		return {
+			currentPercent: 0,
+			reservedPercent: 0,
+			availablePercent: 0,
+			reservedForOutput,
+			availableSize,
+		}
+	}
+
+	return {
+		currentPercent: (safeContextTokens / total) * 100,
+		reservedPercent: (reservedForOutput / total) * 100,
+		availablePercent: (availableSize / total) * 100,
+		reservedForOutput,
+		availableSize,
+	}
+}