|
|
@@ -7,7 +7,7 @@ export const cerebrasDefaultModelId: CerebrasModelId = "gpt-oss-120b"
|
|
|
|
|
|
export const cerebrasModels = {
|
|
|
"zai-glm-4.6": {
|
|
|
- maxTokens: 8192, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
|
|
|
+ maxTokens: 16384, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
|
|
|
contextWindow: 131072,
|
|
|
supportsImages: false,
|
|
|
supportsPromptCache: false,
|
|
|
@@ -18,7 +18,7 @@ export const cerebrasModels = {
|
|
|
description: "Highly intelligent general purpose model with up to 1,000 tokens/s",
|
|
|
},
|
|
|
"qwen-3-235b-a22b-instruct-2507": {
|
|
|
- maxTokens: 8192, // Conservative default to avoid premature rate limiting
|
|
|
+ maxTokens: 16384, // Conservative default to avoid premature rate limiting
|
|
|
contextWindow: 64000,
|
|
|
supportsImages: false,
|
|
|
supportsPromptCache: false,
|
|
|
@@ -29,7 +29,7 @@ export const cerebrasModels = {
|
|
|
description: "Intelligent model with ~1400 tokens/s",
|
|
|
},
|
|
|
"llama-3.3-70b": {
|
|
|
- maxTokens: 8192, // Conservative default to avoid premature rate limiting
|
|
|
+ maxTokens: 16384, // Conservative default to avoid premature rate limiting
|
|
|
contextWindow: 64000,
|
|
|
supportsImages: false,
|
|
|
supportsPromptCache: false,
|
|
|
@@ -40,7 +40,7 @@ export const cerebrasModels = {
|
|
|
description: "Powerful model with ~2600 tokens/s",
|
|
|
},
|
|
|
"qwen-3-32b": {
|
|
|
- maxTokens: 8192, // Conservative default to avoid premature rate limiting
|
|
|
+ maxTokens: 16384, // Conservative default to avoid premature rate limiting
|
|
|
contextWindow: 64000,
|
|
|
supportsImages: false,
|
|
|
supportsPromptCache: false,
|
|
|
@@ -51,7 +51,7 @@ export const cerebrasModels = {
|
|
|
description: "SOTA coding performance with ~2500 tokens/s",
|
|
|
},
|
|
|
"gpt-oss-120b": {
|
|
|
- maxTokens: 8192, // Conservative default to avoid premature rate limiting
|
|
|
+ maxTokens: 16384, // Conservative default to avoid premature rate limiting
|
|
|
contextWindow: 64000,
|
|
|
supportsImages: false,
|
|
|
supportsPromptCache: false,
|