Selaa lähdekoodia

fix: update Cerebras maxTokens to 16384 (#10387)

Seb Duerr 1 päivä sitten
vanhempi
sitoutus
19b7dac719
1 muutettua tiedostoa jossa 5 lisäystä ja 5 poistoa
  1. 5 5
      packages/types/src/providers/cerebras.ts

+ 5 - 5
packages/types/src/providers/cerebras.ts

@@ -7,7 +7,7 @@ export const cerebrasDefaultModelId: CerebrasModelId = "gpt-oss-120b"
 
 export const cerebrasModels = {
 	"zai-glm-4.6": {
-		maxTokens: 8192, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
+		maxTokens: 16384, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
 		contextWindow: 131072,
 		supportsImages: false,
 		supportsPromptCache: false,
@@ -18,7 +18,7 @@ export const cerebrasModels = {
 		description: "Highly intelligent general purpose model with up to 1,000 tokens/s",
 	},
 	"qwen-3-235b-a22b-instruct-2507": {
-		maxTokens: 8192, // Conservative default to avoid premature rate limiting
+		maxTokens: 16384, // Conservative default to avoid premature rate limiting
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,
@@ -29,7 +29,7 @@ export const cerebrasModels = {
 		description: "Intelligent model with ~1400 tokens/s",
 	},
 	"llama-3.3-70b": {
-		maxTokens: 8192, // Conservative default to avoid premature rate limiting
+		maxTokens: 16384, // Conservative default to avoid premature rate limiting
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,
@@ -40,7 +40,7 @@ export const cerebrasModels = {
 		description: "Powerful model with ~2600 tokens/s",
 	},
 	"qwen-3-32b": {
-		maxTokens: 8192, // Conservative default to avoid premature rate limiting
+		maxTokens: 16384, // Conservative default to avoid premature rate limiting
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,
@@ -51,7 +51,7 @@ export const cerebrasModels = {
 		description: "SOTA coding performance with ~2500 tokens/s",
 	},
 	"gpt-oss-120b": {
-		maxTokens: 8192, // Conservative default to avoid premature rate limiting
+		maxTokens: 16384, // Conservative default to avoid premature rate limiting
 		contextWindow: 64000,
 		supportsImages: false,
 		supportsPromptCache: false,