Browse Source

update xai models and pricing (#4315)

* update xai models and pricing

* cache accounting for xAI

* change log
Edwin P Jacques 8 months ago
parent
commit
8b6f5f8baa

+ 5 - 0
.changeset/cruel-roses-stick.md

@@ -0,0 +1,5 @@
+---
+"@roo-code/types": patch
+---
+
+Update x.ai supported models and metadata. Ensure accurate cost accounting.

+ 26 - 110
packages/types/src/providers/xai.ts

@@ -6,152 +6,68 @@ export type XAIModelId = keyof typeof xaiModels
 export const xaiDefaultModelId: XAIModelId = "grok-3"
 
 export const xaiModels = {
-	"grok-3-beta": {
-		maxTokens: 8192,
-		contextWindow: 131072,
-		supportsImages: false,
-		supportsPromptCache: false,
-		inputPrice: 3.0,
-		outputPrice: 15.0,
-		description: "xAI's Grok-3 beta model with 131K context window",
-	},
-	"grok-3-fast-beta": {
-		maxTokens: 8192,
-		contextWindow: 131072,
-		supportsImages: false,
-		supportsPromptCache: false,
-		inputPrice: 5.0,
-		outputPrice: 25.0,
-		description: "xAI's Grok-3 fast beta model with 131K context window",
-	},
-	"grok-3-mini-beta": {
+	"grok-2-1212": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
 		supportsPromptCache: false,
-		inputPrice: 0.3,
-		outputPrice: 0.5,
-		description: "xAI's Grok-3 mini beta model with 131K context window",
-		supportsReasoningEffort: true,
+		inputPrice: 2.0,
+		outputPrice: 10.0,
+		description: "xAI's Grok-2 model (version 1212) with 128K context window",
 	},
-	"grok-3-mini-fast-beta": {
+	"grok-2-vision-1212": {
 		maxTokens: 8192,
-		contextWindow: 131072,
-		supportsImages: false,
+		contextWindow: 32768,
+		supportsImages: true,
 		supportsPromptCache: false,
-		inputPrice: 0.6,
-		outputPrice: 4.0,
-		description: "xAI's Grok-3 mini fast beta model with 131K context window",
-		supportsReasoningEffort: true,
+		inputPrice: 2.0,
+		outputPrice: 10.0,
+		description: "xAI's Grok-2 Vision model (version 1212) with image support and 32K context window",
 	},
 	"grok-3": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 3.0,
 		outputPrice: 15.0,
-		description: "xAI's Grok-3 model with 131K context window",
+		cacheWritesPrice: 0.75,
+		cacheReadsPrice: 0.75,
+		description: "xAI's Grok-3 model with 128K context window",
 	},
 	"grok-3-fast": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 5.0,
 		outputPrice: 25.0,
-		description: "xAI's Grok-3 fast model with 131K context window",
+		cacheWritesPrice: 1.25,
+		cacheReadsPrice: 1.25,
+		description: "xAI's Grok-3 fast model with 128K context window",
 	},
 	"grok-3-mini": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.3,
 		outputPrice: 0.5,
-		description: "xAI's Grok-3 mini model with 131K context window",
+		cacheWritesPrice: 0.07,
+		cacheReadsPrice: 0.07,
+		description: "xAI's Grok-3 mini model with 128K context window",
 		supportsReasoningEffort: true,
 	},
 	"grok-3-mini-fast": {
 		maxTokens: 8192,
 		contextWindow: 131072,
 		supportsImages: false,
-		supportsPromptCache: false,
+		supportsPromptCache: true,
 		inputPrice: 0.6,
 		outputPrice: 4.0,
-		description: "xAI's Grok-3 mini fast model with 131K context window",
+		cacheWritesPrice: 0.15,
+		cacheReadsPrice: 0.15,
+		description: "xAI's Grok-3 mini fast model with 128K context window",
 		supportsReasoningEffort: true,
 	},
-	"grok-2-latest": {
-		maxTokens: 8192,
-		contextWindow: 131072,
-		supportsImages: false,
-		supportsPromptCache: false,
-		inputPrice: 2.0,
-		outputPrice: 10.0,
-		description: "xAI's Grok-2 model - latest version with 131K context window",
-	},
-	"grok-2": {
-		maxTokens: 8192,
-		contextWindow: 131072,
-		supportsImages: false,
-		supportsPromptCache: false,
-		inputPrice: 2.0,
-		outputPrice: 10.0,
-		description: "xAI's Grok-2 model with 131K context window",
-	},
-	"grok-2-1212": {
-		maxTokens: 8192,
-		contextWindow: 131072,
-		supportsImages: false,
-		supportsPromptCache: false,
-		inputPrice: 2.0,
-		outputPrice: 10.0,
-		description: "xAI's Grok-2 model (version 1212) with 131K context window",
-	},
-	"grok-2-vision-latest": {
-		maxTokens: 8192,
-		contextWindow: 32768,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 2.0,
-		outputPrice: 10.0,
-		description: "xAI's Grok-2 Vision model - latest version with image support and 32K context window",
-	},
-	"grok-2-vision": {
-		maxTokens: 8192,
-		contextWindow: 32768,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 2.0,
-		outputPrice: 10.0,
-		description: "xAI's Grok-2 Vision model with image support and 32K context window",
-	},
-	"grok-2-vision-1212": {
-		maxTokens: 8192,
-		contextWindow: 32768,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 2.0,
-		outputPrice: 10.0,
-		description: "xAI's Grok-2 Vision model (version 1212) with image support and 32K context window",
-	},
-	"grok-vision-beta": {
-		maxTokens: 8192,
-		contextWindow: 8192,
-		supportsImages: true,
-		supportsPromptCache: false,
-		inputPrice: 5.0,
-		outputPrice: 15.0,
-		description: "xAI's Grok Vision Beta model with image support and 8K context window",
-	},
-	"grok-beta": {
-		maxTokens: 8192,
-		contextWindow: 131072,
-		supportsImages: false,
-		supportsPromptCache: false,
-		inputPrice: 5.0,
-		outputPrice: 15.0,
-		description: "xAI's Grok Beta model (legacy) with 131K context window",
-	},
 } as const satisfies Record<string, ModelInfo>

+ 4 - 4
src/api/providers/__tests__/xai.test.ts

@@ -62,7 +62,7 @@ describe("XAIHandler", () => {
 	})
 
 	test("should return specified model when valid model is provided", () => {
-		const testModelId = "grok-2-latest"
+		const testModelId = "grok-3"
 		const handlerWithModel = new XAIHandler({ apiModelId: testModelId })
 		const model = handlerWithModel.getModel()
 
@@ -72,7 +72,7 @@ describe("XAIHandler", () => {
 
 	test("should include reasoning_effort parameter for mini models", async () => {
 		const miniModelHandler = new XAIHandler({
-			apiModelId: "grok-3-mini-beta",
+			apiModelId: "grok-3-mini",
 			reasoningEffort: "high",
 		})
 
@@ -101,7 +101,7 @@ describe("XAIHandler", () => {
 
 	test("should not include reasoning_effort parameter for non-mini models", async () => {
 		const regularModelHandler = new XAIHandler({
-			apiModelId: "grok-2-latest",
+			apiModelId: "grok-3",
 			reasoningEffort: "high",
 		})
 
@@ -255,7 +255,7 @@ describe("XAIHandler", () => {
 
 	test("createMessage should pass correct parameters to OpenAI client", async () => {
 		// Setup a handler with specific model
-		const modelId = "grok-2-latest"
+		const modelId = "grok-3"
 		const modelInfo = xaiModels[modelId]
 		const handlerWithModel = new XAIHandler({ apiModelId: modelId })
 

+ 11 - 7
src/api/providers/xai.ts

@@ -76,17 +76,21 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
 			}
 
 			if (chunk.usage) {
+				// Extract detailed token information if available
+				// First check for prompt_tokens_details structure (real API response)
+				const promptDetails = "prompt_tokens_details" in chunk.usage ? chunk.usage.prompt_tokens_details : null;
+				const cachedTokens = promptDetails && "cached_tokens" in promptDetails ? promptDetails.cached_tokens : 0;
+
+				// Fall back to direct fields in usage (used in test mocks)
+				const readTokens = cachedTokens || ("cache_read_input_tokens" in chunk.usage ? (chunk.usage as any).cache_read_input_tokens : 0);
+				const writeTokens = "cache_creation_input_tokens" in chunk.usage ? (chunk.usage as any).cache_creation_input_tokens : 0;
+
 				yield {
 					type: "usage",
 					inputTokens: chunk.usage.prompt_tokens || 0,
 					outputTokens: chunk.usage.completion_tokens || 0,
-					// X.AI might include these fields in the future, handle them if present.
-					cacheReadTokens:
-						"cache_read_input_tokens" in chunk.usage ? (chunk.usage as any).cache_read_input_tokens : 0,
-					cacheWriteTokens:
-						"cache_creation_input_tokens" in chunk.usage
-							? (chunk.usage as any).cache_creation_input_tokens
-							: 0,
+					cacheReadTokens: readTokens,
+					cacheWriteTokens: writeTokens,
 				}
 			}
 		}