преди 6 дни · da81e2b623
--- a/.changeset/add-slovak-translation.md
+++ b/.changeset/add-slovak-translation.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"kilo-code": patch
			
 
				----
			
 
				-
			
 
				-Add Slovak (sk) language translation for Kilo Code extension and UI
			
--- a/.changeset/fair-clocks-lick.md
+++ b/.changeset/fair-clocks-lick.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+feat (fireworks.ai): add minimax 2.1,  glm 4.7, updated other models
			
--- a/.changeset/fix-agent-manager-double-scrollbar.md
+++ b/.changeset/fix-agent-manager-double-scrollbar.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"kilo-code": patch
			
 
				----
			
 
				-
			
 
				-fix(agent-manager): Fix double scrollbar in mode selector dropdowns
			
--- a/.changeset/fix-disable-zsh-history-expansion.md
+++ b/.changeset/fix-disable-zsh-history-expansion.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+fix: disable zsh history expansion (#4926)
			
--- a/.changeset/fix-edit-message-images.md
+++ b/.changeset/fix-edit-message-images.md
@@ -0,0 +1,11 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix attached images being lost when editing a message with checkpoint
			
 
				+
			
 
				+When editing a message that has a checkpoint, the images attached to the edited message were not being included in the `editMessageConfirm` webview message. This caused images to be silently dropped and not sent to the backend.
			
 
				+
			
 
				+The fix adds the `images` field to the message payload in both the checkpoint and non-checkpoint edit confirmation paths.
			
 
				+
			
 
				+Fixes #3489
			
--- a/.changeset/fix-jetbrains-build.md
+++ b/.changeset/fix-jetbrains-build.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"kilo-code": patch
			
 
				----
			
 
				-
			
 
				-Fix JetBrains build failure by adding missing vsix dependency for build pipeline
			
--- a/.changeset/fix-review-scope-loading.md
+++ b/.changeset/fix-review-scope-loading.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"kilo-code": patch
			
 
				----
			
 
				-
			
 
				-Show loading spinner immediately when opening review scope dialog while scope information is being computed, improving perceived performance for repositories with many changes
			
--- a/.changeset/fix-session-title-readability.md
+++ b/.changeset/fix-session-title-readability.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"kilo-code": patch
			
 
				----
			
 
				-
			
 
				-Fix unreadable text and poor contrast issues in Agent Manager
			
--- a/.changeset/friendly-yaks-float.md
+++ b/.changeset/friendly-yaks-float.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Prevent sending thinkingLevel to unsupporting Gemini models
			
--- a/.changeset/green-sheep-mate.md
+++ b/.changeset/green-sheep-mate.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": minor
			
 
				+---
			
 
				+
			
 
				+feat: add Zenmux provider
			
--- a/.changeset/new-ghosts-press.md
+++ b/.changeset/new-ghosts-press.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"kilo-code": patch
			
 
				----
			
 
				-
			
 
				-Fix 'Delete' toggle button in Auto Approve settings
			
--- a/.changeset/old-eels-yawn.md
+++ b/.changeset/old-eels-yawn.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"kilo-code": minor
			
 
				----
			
 
				-
			
 
				-Added Corethink as a new AI provider
			
--- a/.changeset/remove-dup-title.md
+++ b/.changeset/remove-dup-title.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"kilo-code": patch
			
 
				----
			
 
				-
			
 
				-Remove duplicate "Kilo Code Marketplace" title in toolbar (thanks @bernaferrari!)
			
--- a/.changeset/spotty-turtles-retire.md
+++ b/.changeset/spotty-turtles-retire.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"kilo-code": patch
			
 
				----
			
 
				-
			
 
				-Hook embedding timeout into settings for ollama
			
--- a/.changeset/thirty-singers-refuse.md
+++ b/.changeset/thirty-singers-refuse.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+feat: add chars count to ListFilesTool
			
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,33 @@
 
				 # kilo-code
			
 
				 
			
 
				+## 5.6.0
			
 
				+
			
 
				+### Minor Changes
			
 
				+
			
 
				+- [#5040](https://github.com/Kilo-Org/kilocode/pull/5040) [`abe3047`](https://github.com/Kilo-Org/kilocode/commit/abe30473feffb84e885fc8abd5595033fe8b5431) Thanks [@luthraansh](https://github.com/luthraansh)! - Added Corethink as a new AI provider
			
 
				+
			
 
				+### Patch Changes
			
 
				+
			
 
				+- [#5749](https://github.com/Kilo-Org/kilocode/pull/5749) [`b2fa0a9`](https://github.com/Kilo-Org/kilocode/commit/b2fa0a9b239a396feee39d14eb60eafb088c0ed4) Thanks [@skaldamramra](https://github.com/skaldamramra)! - Add Slovak (sk) language translation for Kilo Code extension and UI
			
 
				+
			
 
				+- [#5681](https://github.com/Kilo-Org/kilocode/pull/5681) [`b5ef707`](https://github.com/Kilo-Org/kilocode/commit/b5ef70717068a791da5c3b3068eadb8e189ff484) Thanks [@Drilmo](https://github.com/Drilmo)! - fix(agent-manager): Fix double scrollbar in mode selector dropdowns
			
 
				+
			
 
				+- [#5722](https://github.com/Kilo-Org/kilocode/pull/5722) [`f7cf4fd`](https://github.com/Kilo-Org/kilocode/commit/f7cf4fd5002b697f1e41e744b01f096e57666acf) Thanks [@Neonsy](https://github.com/Neonsy)! - Improve Chutes Kimi reliability by preventing terminated-stream retry loops and handling tool/reasoning chunks more safely.
			
 
				+
			
 
				+- [#5747](https://github.com/Kilo-Org/kilocode/pull/5747) [`95be119`](https://github.com/Kilo-Org/kilocode/commit/95be1193449184869e49d44b7fe9f09e1620b3ce) Thanks [@Githubguy132010](https://github.com/Githubguy132010)! - Fix JetBrains build failure by adding missing vsix dependency for build pipeline
			
 
				+
			
 
				+- [#5733](https://github.com/Kilo-Org/kilocode/pull/5733) [`1b5c4f4`](https://github.com/Kilo-Org/kilocode/commit/1b5c4f4fab28f03b81a9bdf3cd789b1425108765) Thanks [@krisztian-gajdar](https://github.com/krisztian-gajdar)! - Show loading spinner immediately when opening review scope dialog while scope information is being computed, improving perceived performance for repositories with many changes
			
 
				+
			
 
				+- [#5699](https://github.com/Kilo-Org/kilocode/pull/5699) [`e560e47`](https://github.com/Kilo-Org/kilocode/commit/e560e47e39f605f78a6d18fdbfc0dd680ceb5557) Thanks [@Patel230](https://github.com/Patel230)! - Fix unreadable text and poor contrast issues in Agent Manager
			
 
				+
			
 
				+- [#5722](https://github.com/Kilo-Org/kilocode/pull/5722) [`a834092`](https://github.com/Kilo-Org/kilocode/commit/a8340925c72e9ee0494e1bffd47dbc1aaddc1c8e) Thanks [@Neonsy](https://github.com/Neonsy)! - Fixed Moonshot Kimi tool-calling and thinking-mode behavior for `kimi-k2.5` and `kimi-for-coding`.
			
 
				+
			
 
				+- [#4749](https://github.com/Kilo-Org/kilocode/pull/4749) [`ed70dad`](https://github.com/Kilo-Org/kilocode/commit/ed70dad320a80160dc793bf34f52b87d995285ff) Thanks [@lgrgic](https://github.com/lgrgic)! - Fix 'Delete' toggle button in Auto Approve settings
			
 
				+
			
 
				+- [#5756](https://github.com/Kilo-Org/kilocode/pull/5756) [`5d9d4d1`](https://github.com/Kilo-Org/kilocode/commit/5d9d4d1c4a6236fccf7082ea9e8d83d95bbd207a) Thanks [@bernaferrari](https://github.com/bernaferrari)! - Remove duplicate "Kilo Code Marketplace" title in toolbar (thanks @bernaferrari!)
			
 
				+
			
 
				+- [#3807](https://github.com/Kilo-Org/kilocode/pull/3807) [`e37717e`](https://github.com/Kilo-Org/kilocode/commit/e37717ee2fad8efb53bea92752dd9ea25f79bbed) Thanks [@davidraedev](https://github.com/davidraedev)! - Hook embedding timeout into settings for ollama
			
 
				+
			
 
				 ## 5.5.0
			
 
				 
			
 
				 ### Minor Changes
			
--- a/apps/kilocode-docs/pages/ai-providers/zenmux.md
+++ b/apps/kilocode-docs/pages/ai-providers/zenmux.md
@@ -0,0 +1,197 @@
 
				+---
			
 
				+title: ZenMux
			
 
				+---
			
 
				+
			
 
				+import Codicon from "@site/src/components/Codicon";
			
 
				+
			
 
				+# Using ZenMux With Kilo Code
			
 
				+
			
 
				+[ZenMux](https://zenmux.ai) provides a unified API gateway to access multiple AI models from different providers through a single endpoint. It supports OpenAI, Anthropic, Google, and other major AI providers, automatically handling routing, fallbacks, and cost optimization.
			
 
				+
			
 
				+## Getting Started
			
 
				+
			
 
				+1. **Sign up for ZenMux:** Visit [zenmux.ai](https://zenmux.ai) to create an account.
			
 
				+2. **Get your API key:** After signing up, navigate to your dashboard to generate an API key.
			
 
				+3. **Configure in Kilo Code:** Add your API key to Kilo Code settings.
			
 
				+
			
 
				+## Configuration in Kilo Code
			
 
				+
			
 
				+1. **Open Kilo Code Settings:** Click the gear icon (<Codicon name="gear" />) in the Kilo Code panel.
			
 
				+2. **Select Provider:** Choose "ZenMux" from the "API Provider" dropdown.
			
 
				+3. **Enter API Key:** Paste your ZenMux API key into the "ZenMux API Key" field.
			
 
				+4. **Select Model:** Choose your desired model from the "Model" dropdown.
			
 
				+5. **(Optional) Custom Base URL:** If you need to use a custom base URL for the ZenMux API, check "Use custom base URL" and enter the URL. Leave this blank for most users.
			
 
				+
			
 
				+## Supported Models
			
 
				+
			
 
				+ZenMux supports a wide range of models from various providers:
			
 
				+
			
 
				+Visi [zenmux.ai/models](https://zenmux.ai/models) to see the complete list of available models.
			
 
				+
			
 
				+### Other Providers
			
 
				+
			
 
				+ZenMux also supports models from Meta, Mistral, and many other providers. Check your ZenMux dashboard for the complete list of available models.
			
 
				+
			
 
				+## API Compatibility
			
 
				+
			
 
				+ZenMux provides multiple API endpoints for different protocols:
			
 
				+
			
 
				+### OpenAI Compatible API
			
 
				+
			
 
				+Use the standard OpenAI SDK with ZenMux's base URL:
			
 
				+
			
 
				+```javascript
			
 
				+import OpenAI from "openai"
			
 
				+
			
 
				+const openai = new OpenAI({
			
 
				+	baseURL: "https://zenmux.ai/api/v1",
			
 
				+	apiKey: "<ZENMUX_API_KEY>",
			
 
				+})
			
 
				+
			
 
				+async function main() {
			
 
				+	const completion = await openai.chat.completions.create({
			
 
				+		model: "openai/gpt-5",
			
 
				+		messages: [
			
 
				+			{
			
 
				+				role: "user",
			
 
				+				content: "What is the meaning of life?",
			
 
				+			},
			
 
				+		],
			
 
				+	})
			
 
				+
			
 
				+	console.log(completion.choices[0].message)
			
 
				+}
			
 
				+
			
 
				+main()
			
 
				+```
			
 
				+
			
 
				+### Anthropic API
			
 
				+
			
 
				+For Anthropic models, use the dedicated endpoint:
			
 
				+
			
 
				+```typescript
			
 
				+import Anthropic from "@anthropic-ai/sdk"
			
 
				+
			
 
				+// 1. Initialize the Anthropic client
			
 
				+const anthropic = new Anthropic({
			
 
				+	// 2. Replace with the API key from your ZenMux console
			
 
				+	apiKey: "<YOUR ZENMUX_API_KEY>",
			
 
				+	// 3. Point the base URL to the ZenMux endpoint
			
 
				+	baseURL: "https://zenmux.ai/api/anthropic",
			
 
				+})
			
 
				+
			
 
				+async function main() {
			
 
				+	const msg = await anthropic.messages.create({
			
 
				+		model: "anthropic/claude-sonnet-4.5",
			
 
				+		max_tokens: 1024,
			
 
				+		messages: [{ role: "user", content: "Hello, Claude" }],
			
 
				+	})
			
 
				+	console.log(msg)
			
 
				+}
			
 
				+
			
 
				+main()
			
 
				+```
			
 
				+
			
 
				+### Platform API
			
 
				+
			
 
				+The Get generation interface is used to query generation information, such as usage and costs.
			
 
				+
			
 
				+```bash
			
 
				+curl https://zenmux.ai/api/v1/generation?id=<generation_id> \
			
 
				+  -H "Authorization: Bearer $ZENMUX_API_KEY"
			
 
				+```
			
 
				+
			
 
				+### Google Vertex AI API
			
 
				+
			
 
				+For Google models:
			
 
				+
			
 
				+```typescript
			
 
				+const genai = require("@google/genai")
			
 
				+
			
 
				+const client = new genai.GoogleGenAI({
			
 
				+	apiKey: "$ZENMUX_API_KEY",
			
 
				+	vertexai: true,
			
 
				+	httpOptions: {
			
 
				+		baseUrl: "https://zenmux.ai/api/vertex-ai",
			
 
				+		apiVersion: "v1",
			
 
				+	},
			
 
				+})
			
 
				+
			
 
				+const response = await client.models.generateContent({
			
 
				+	model: "google/gemini-2.5-pro",
			
 
				+	contents: "How does AI work?",
			
 
				+})
			
 
				+console.log(response)
			
 
				+```
			
 
				+
			
 
				+## Features
			
 
				+
			
 
				+### Automatic Routing
			
 
				+
			
 
				+ZenMux automatically routes your requests to the best available provider based on:
			
 
				+
			
 
				+- Model availability
			
 
				+- Response time
			
 
				+- Cost optimization
			
 
				+- Provider health status
			
 
				+
			
 
				+### Fallback Support
			
 
				+
			
 
				+If a provider is unavailable, ZenMux automatically falls back to alternative providers that support the same model capabilities.
			
 
				+
			
 
				+### Cost Optimization
			
 
				+
			
 
				+ZenMux can be configured to optimize for cost, routing requests to the most cost-effective provider while maintaining quality.
			
 
				+
			
 
				+### Zero Data Retention (ZDR)
			
 
				+
			
 
				+Enable ZDR mode to ensure that no request or response data is stored by ZenMux, providing maximum privacy for sensitive applications.
			
 
				+
			
 
				+## Advanced Configuration
			
 
				+
			
 
				+### Provider Routing
			
 
				+
			
 
				+You can specify routing preferences:
			
 
				+
			
 
				+- **Price**: Route to the lowest cost provider
			
 
				+- **Throughput**: Route to the provider with highest tokens/second
			
 
				+- **Latency**: Route to the provider with fastest response time
			
 
				+
			
 
				+### Data Collection Settings
			
 
				+
			
 
				+Control how ZenMux handles your data:
			
 
				+
			
 
				+- **Allow**: Allow data collection for service improvement
			
 
				+- **Deny**: Disable all data collection
			
 
				+
			
 
				+### Middle-Out Transform
			
 
				+
			
 
				+Enable the middle-out transform feature to optimize prompts that exceed model context limits.
			
 
				+
			
 
				+## Troubleshooting
			
 
				+
			
 
				+### API Key Issues
			
 
				+
			
 
				+- Ensure your API key is correctly copied without any extra spaces
			
 
				+- Check that your ZenMux account is active and has available credits
			
 
				+- Verify the API key has the necessary permissions
			
 
				+
			
 
				+### Model Availability
			
 
				+
			
 
				+- Some models may have regional restrictions
			
 
				+- Check the ZenMux dashboard for current model availability
			
 
				+- Ensure your account tier has access to the desired models
			
 
				+
			
 
				+### Connection Issues
			
 
				+
			
 
				+- Verify your internet connection
			
 
				+- Check if you're behind a firewall that might block API requests
			
 
				+- Try using a custom base URL if the default endpoint is blocked
			
 
				+
			
 
				+## Support
			
 
				+
			
 
				+For additional support:
			
 
				+
			
 
				+- Visit the [ZenMux documentation](https://zenmux.ai/docs)
			
 
				+- Contact ZenMux support through their dashboard
			
 
				+- Check the [Kilo Code GitHub repository](https://github.com/kilocode/kilocode) for integration-specific issues
			
--- a/cli/src/config/mapper.ts
+++ b/cli/src/config/mapper.ts
@@ -106,6 +106,8 @@ export function getModelIdForProvider(provider: ProviderConfig): string {
 
				 			return provider.apiModelId || ""
			
 
				 		case "openrouter":
			
 
				 			return provider.openRouterModelId || ""
			
 
				+		case "zenmux":
			
 
				+			return provider.zenmuxModelId || ""
			
 
				 		case "ollama":
			
 
				 			return provider.ollamaModelId || ""
			
 
				 		case "lmstudio":
			
--- a/cli/src/constants/providers/labels.ts
+++ b/cli/src/constants/providers/labels.ts
@@ -11,6 +11,7 @@ export const PROVIDER_LABELS: Record<ProviderName, string> = {
 
				 	"openai-codex": "OpenAI - ChatGPT Plus/Pro",
			
 
				 	"openai-responses": "OpenAI Compatible (Responses)",
			
 
				 	openrouter: "OpenRouter",
			
 
				+	zenmux: "ZenMux",
			
 
				 	bedrock: "Amazon Bedrock",
			
 
				 	gemini: "Google Gemini",
			
 
				 	vertex: "GCP Vertex AI",
			
--- a/cli/src/constants/providers/models.ts
+++ b/cli/src/constants/providers/models.ts
@@ -46,6 +46,7 @@ import {
 
				 	minimaxModels,
			
 
				 	minimaxDefaultModelId,
			
 
				 	ovhCloudAiEndpointsDefaultModelId,
			
 
				+	zenmuxDefaultModelId,
			
 
				 } from "@roo-code/types"
			
 
				 
			
 
				 /**
			
@@ -64,6 +65,7 @@ export type RouterName =
 
				 	| "deepinfra"
			
 
				 	| "vercel-ai-gateway"
			
 
				 	| "ovhcloud"
			
 
				+	| "zenmux"
			
 
				 	| "nano-gpt"
			
 
				 
			
 
				 /**
			
@@ -120,6 +122,7 @@ export type RouterModels = Record<RouterName, ModelRecord>
 
				 export const PROVIDER_TO_ROUTER_NAME: Record<ProviderName, RouterName | null> = {
			
 
				 	kilocode: "kilocode",
			
 
				 	openrouter: "openrouter",
			
 
				+	zenmux: "zenmux", // kilocode_change
			
 
				 	ollama: "ollama",
			
 
				 	lmstudio: "lmstudio",
			
 
				 	litellm: "litellm",
			
@@ -174,6 +177,7 @@ export const PROVIDER_TO_ROUTER_NAME: Record<ProviderName, RouterName | null> =
 
				 export const PROVIDER_MODEL_FIELD: Record<ProviderName, string | null> = {
			
 
				 	kilocode: "kilocodeModel",
			
 
				 	openrouter: "openRouterModelId",
			
 
				+	zenmux: "zenmuxModelId", // kilocode_change
			
 
				 	ollama: "ollamaModelId",
			
 
				 	lmstudio: "lmStudioModelId",
			
 
				 	litellm: "litellmModelId",
			
@@ -285,6 +289,7 @@ export const DEFAULT_MODEL_IDS: Partial<Record<ProviderName, string>> = {
 
				 	zai: internationalZAiDefaultModelId,
			
 
				 	roo: rooDefaultModelId,
			
 
				 	ovhcloud: ovhCloudAiEndpointsDefaultModelId,
			
 
				+	zenmux: zenmuxDefaultModelId,
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -460,6 +465,8 @@ export function getModelIdKey(provider: ProviderName): string {
 
				 			return "vercelAiGatewayModelId"
			
 
				 		case "ovhcloud":
			
 
				 			return "ovhCloudAiEndpointsModelId"
			
 
				+		case "zenmux":
			
 
				+			return "zenmuxModelId"
			
 
				 		case "nano-gpt":
			
 
				 			return "nanoGptModelId"
			
 
				 		default:
			
--- a/cli/src/constants/providers/settings.ts
+++ b/cli/src/constants/providers/settings.ts
@@ -91,6 +91,25 @@ export const FIELD_REGISTRY: Record<string, FieldMetadata> = {
 
				 		placeholder: "Enter base URL (or leave empty for default)...",
			
 
				 		isOptional: true,
			
 
				 	},
			
 
				+
			
 
				+	// kilocode_change start - ZenMux fields
			
 
				+	zenmuxApiKey: {
			
 
				+		label: "API Key",
			
 
				+		type: "password",
			
 
				+		placeholder: "Enter ZenMux API key...",
			
 
				+	},
			
 
				+	zenmuxModelId: {
			
 
				+		label: "Model",
			
 
				+		type: "text",
			
 
				+		placeholder: "Enter model name...",
			
 
				+	},
			
 
				+	zenmuxBaseUrl: {
			
 
				+		label: "Base URL",
			
 
				+		type: "text",
			
 
				+		placeholder: "Enter base URL (or leave empty for default)...",
			
 
				+		isOptional: true,
			
 
				+	},
			
 
				+	// kilocode_change end
			
 
				 	openRouterProviderDataCollection: {
			
 
				 		label: "Provider Data Collection",
			
 
				 		type: "select",
			
@@ -791,6 +810,13 @@ export const getProviderSettings = (provider: ProviderName, config: ProviderSett
 
				 				createFieldConfig("openRouterBaseUrl", config, "Default"),
			
 
				 			]
			
 
				 
			
 
				+		case "zenmux": // kilocode_change
			
 
				+			return [
			
 
				+				createFieldConfig("zenmuxApiKey", config),
			
 
				+				createFieldConfig("zenmuxModelId", config, "openai/gpt-5"),
			
 
				+				createFieldConfig("zenmuxBaseUrl", config, "Default"),
			
 
				+			]
			
 
				+
			
 
				 		case "openai-native":
			
 
				 			return [
			
 
				 				createFieldConfig("openAiNativeApiKey", config),
			
@@ -1043,6 +1069,7 @@ export const PROVIDER_DEFAULT_MODELS: Record<ProviderName, string> = {
 
				 	"openai-codex": "gpt-4o",
			
 
				 	"openai-responses": "gpt-4o",
			
 
				 	openrouter: "anthropic/claude-3-5-sonnet",
			
 
				+	zenmux: "openai/gpt-5", // kilocode_change
			
 
				 	bedrock: "anthropic.claude-3-5-sonnet-20241022-v2:0",
			
 
				 	gemini: "gemini-1.5-pro-latest",
			
 
				 	vertex: "claude-3-5-sonnet@20241022",
			
--- a/cli/src/constants/providers/validation.ts
+++ b/cli/src/constants/providers/validation.ts
@@ -10,6 +10,7 @@ export const PROVIDER_REQUIRED_FIELDS: Record<ProviderName, string[]> = {
 
				 	"openai-native": ["openAiNativeApiKey", "apiModelId"],
			
 
				 	"openai-codex": ["apiModelId"],
			
 
				 	openrouter: ["openRouterApiKey", "openRouterModelId"],
			
 
				+	zenmux: ["zenmuxApiKey", "zenmuxModelId"], // kilocode_change
			
 
				 	ollama: ["ollamaBaseUrl", "ollamaModelId"],
			
 
				 	lmstudio: ["lmStudioBaseUrl", "lmStudioModelId"],
			
 
				 	bedrock: ["awsRegion", "apiModelId"], // Auth fields handled in handleSpecialValidations (supports API key, profile, or direct credentials)
			
--- a/packages/core-schemas/src/config/provider.ts
+++ b/packages/core-schemas/src/config/provider.ts
@@ -88,6 +88,21 @@ export const openRouterProviderSchema = baseProviderSchema.extend({
 
				 	openRouterZdr: z.boolean().optional(),
			
 
				 })
			
 
				 
			
 
				+// kilocode_change start
			
 
				+// ZenMux provider
			
 
				+export const zenmuxProviderSchema = baseProviderSchema.extend({
			
 
				+	provider: z.literal("zenmux"),
			
 
				+	zenmuxModelId: z.string().optional(),
			
 
				+	zenmuxApiKey: z.string().optional(),
			
 
				+	zenmuxBaseUrl: z.string().optional(),
			
 
				+	zenmuxSpecificProvider: z.string().optional(),
			
 
				+	zenmuxUseMiddleOutTransform: z.boolean().optional(),
			
 
				+	zenmuxProviderDataCollection: z.enum(["allow", "deny"]).optional(),
			
 
				+	zenmuxProviderSort: z.enum(["price", "throughput", "latency"]).optional(),
			
 
				+	zenmuxZdr: z.boolean().optional(),
			
 
				+})
			
 
				+// kilocode_change end
			
 
				+
			
 
				 // Ollama provider
			
 
				 export const ollamaProviderSchema = baseProviderSchema.extend({
			
 
				 	provider: z.literal("ollama"),
			
@@ -407,6 +422,7 @@ export const providerConfigSchema = z.discriminatedUnion("provider", [
 
				 	openAIProviderSchema,
			
 
				 	openAIResponsesProviderSchema, // kilocode_change
			
 
				 	openRouterProviderSchema,
			
 
				+	zenmuxProviderSchema, // kilocode_change
			
 
				 	ollamaProviderSchema,
			
 
				 	lmStudioProviderSchema,
			
 
				 	glamaProviderSchema,
			
@@ -453,6 +469,7 @@ export type OpenAICodexProviderConfig = z.infer<typeof openAICodexProviderSchema
 
				 export type OpenAIProviderConfig = z.infer<typeof openAIProviderSchema>
			
 
				 export type OpenAIResponsesProviderConfig = z.infer<typeof openAIResponsesProviderSchema> // kilocode_change
			
 
				 export type OpenRouterProviderConfig = z.infer<typeof openRouterProviderSchema>
			
 
				+export type ZenmuxProviderConfig = z.infer<typeof zenmuxProviderSchema> // kilocode_change
			
 
				 export type OllamaProviderConfig = z.infer<typeof ollamaProviderSchema>
			
 
				 export type LMStudioProviderConfig = z.infer<typeof lmStudioProviderSchema>
			
 
				 export type GlamaProviderConfig = z.infer<typeof glamaProviderSchema>
			
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -57,6 +57,7 @@ export const dynamicProviders = [
 
				 	"inception",
			
 
				 	"synthetic",
			
 
				 	"sap-ai-core",
			
 
				+	"zenmux",
			
 
				 	// kilocode_change end
			
 
				 	"deepinfra",
			
 
				 	"io-intelligence",
			
@@ -163,6 +164,7 @@ export const providerNames = [
 
				 	"virtual-quota-fallback",
			
 
				 	"synthetic",
			
 
				 	"inception",
			
 
				+	"zenmux",
			
 
				 	// kilocode_change end
			
 
				 	"sambanova",
			
 
				 	"vertex",
			
@@ -250,6 +252,10 @@ const nanoGptSchema = baseProviderSettingsSchema.extend({
 
				 
			
 
				 export const openRouterProviderDataCollectionSchema = z.enum(["allow", "deny"])
			
 
				 export const openRouterProviderSortSchema = z.enum(["price", "throughput", "latency"])
			
 
				+
			
 
				+// ZenMux provider schemas - kilocode_change
			
 
				+export const zenmuxProviderDataCollectionSchema = z.enum(["allow", "deny"])
			
 
				+export const zenmuxProviderSortSchema = z.enum(["price", "throughput", "latency"])
			
 
				 // kilocode_change end
			
 
				 
			
 
				 const openRouterSchema = baseProviderSettingsSchema.extend({
			
@@ -264,6 +270,19 @@ const openRouterSchema = baseProviderSettingsSchema.extend({
 
				 	// kilocode_change end
			
 
				 })
			
 
				 
			
 
				+// kilocode_change start
			
 
				+const zenmuxSchema = baseProviderSettingsSchema.extend({
			
 
				+	zenmuxApiKey: z.string().optional(),
			
 
				+	zenmuxModelId: z.string().optional(),
			
 
				+	zenmuxBaseUrl: z.string().optional(),
			
 
				+	zenmuxSpecificProvider: z.string().optional(),
			
 
				+	zenmuxUseMiddleOutTransform: z.boolean().optional(),
			
 
				+	zenmuxProviderDataCollection: zenmuxProviderDataCollectionSchema.optional(),
			
 
				+	zenmuxProviderSort: zenmuxProviderSortSchema.optional(),
			
 
				+	zenmuxZdr: z.boolean().optional(),
			
 
				+})
			
 
				+// kilocode_change end
			
 
				+
			
 
				 const bedrockSchema = apiModelIdProviderModelSchema.extend({
			
 
				 	awsAccessKey: z.string().optional(),
			
 
				 	awsSecretKey: z.string().optional(),
			
@@ -571,6 +590,7 @@ export const providerSettingsSchemaDiscriminated = z.discriminatedUnion("apiProv
 
				 	glamaSchema.merge(z.object({ apiProvider: z.literal("glama") })), // kilocode_change
			
 
				 	nanoGptSchema.merge(z.object({ apiProvider: z.literal("nano-gpt") })), // kilocode_change
			
 
				 	openRouterSchema.merge(z.object({ apiProvider: z.literal("openrouter") })),
			
 
				+	zenmuxSchema.merge(z.object({ apiProvider: z.literal("zenmux") })), // kilocode_change
			
 
				 	bedrockSchema.merge(z.object({ apiProvider: z.literal("bedrock") })),
			
 
				 	vertexSchema.merge(z.object({ apiProvider: z.literal("vertex") })),
			
 
				 	openAiSchema.merge(z.object({ apiProvider: z.literal("openai") })),
			
@@ -625,6 +645,7 @@ export const providerSettingsSchema = z.object({
 
				 	...glamaSchema.shape, // kilocode_change
			
 
				 	...nanoGptSchema.shape, // kilocode_change
			
 
				 	...openRouterSchema.shape,
			
 
				+	...zenmuxSchema.shape, // kilocode_change
			
 
				 	...bedrockSchema.shape,
			
 
				 	...vertexSchema.shape,
			
 
				 	...openAiSchema.shape,
			
@@ -694,6 +715,7 @@ export const modelIdKeys = [
 
				 	"glamaModelId", // kilocode_change
			
 
				 	"nanoGptModelId", // kilocode_change
			
 
				 	"openRouterModelId",
			
 
				+	"zenmuxModelId", // kilocode_change
			
 
				 	"openAiModelId",
			
 
				 	"ollamaModelId",
			
 
				 	"lmStudioModelId",
			
@@ -756,6 +778,7 @@ export const modelIdKeysByProvider: Record<TypicalProvider, ModelIdKey> = {
 
				 	ovhcloud: "ovhCloudAiEndpointsModelId",
			
 
				 	inception: "inceptionLabsModelId",
			
 
				 	"sap-ai-core": "sapAiCoreModelId",
			
 
				+	zenmux: "zenmuxModelId", // kilocode_change
			
 
				 	// kilocode_change end
			
 
				 	groq: "apiModelId",
			
 
				 	baseten: "apiModelId",
			
@@ -930,6 +953,7 @@ export const MODELS_BY_PROVIDER: Record<
 
				 	inception: { id: "inception", label: "Inception", models: [] },
			
 
				 	kilocode: { id: "kilocode", label: "Kilocode", models: [] },
			
 
				 	"virtual-quota-fallback": { id: "virtual-quota-fallback", label: "Virtual Quota Fallback", models: [] },
			
 
				+	zenmux: { id: "zenmux", label: "ZenMux", models: [] }, // kilocode_change
			
 
				 	// kilocode_change end
			
 
				 	deepinfra: { id: "deepinfra", label: "DeepInfra", models: [] },
			
 
				 	"vercel-ai-gateway": { id: "vercel-ai-gateway", label: "Vercel AI Gateway", models: [] },
			
--- a/packages/types/src/providers/fireworks.ts
+++ b/packages/types/src/providers/fireworks.ts
@@ -1,18 +1,24 @@
 
				 import type { ModelInfo } from "../model.js"
			
 
				 
			
 
				 export type FireworksModelId =
			
 
				+	| "accounts/fireworks/models/kimi-k2p5"
			
 
				 	| "accounts/fireworks/models/kimi-k2-instruct"
			
 
				 	| "accounts/fireworks/models/kimi-k2-instruct-0905"
			
 
				 	| "accounts/fireworks/models/kimi-k2-thinking"
			
 
				 	| "accounts/fireworks/models/minimax-m2"
			
 
				+	| "accounts/fireworks/models/minimax-m2p1"
			
 
				+	| "accounts/fireworks/models/qwen3-235b-a22b"
			
 
				 	| "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
			
 
				 	| "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct"
			
 
				 	| "accounts/fireworks/models/deepseek-r1-0528"
			
 
				 	| "accounts/fireworks/models/deepseek-v3"
			
 
				+	| "accounts/fireworks/models/deepseek-v3-0324"
			
 
				 	| "accounts/fireworks/models/deepseek-v3p1"
			
 
				+	| "accounts/fireworks/models/deepseek-v3p2"
			
 
				 	| "accounts/fireworks/models/glm-4p5"
			
 
				 	| "accounts/fireworks/models/glm-4p5-air"
			
 
				 	| "accounts/fireworks/models/glm-4p6"
			
 
				+	| "accounts/fireworks/models/glm-4p7"
			
 
				 	| "accounts/fireworks/models/gpt-oss-20b"
			
 
				 	| "accounts/fireworks/models/gpt-oss-120b"
			
 
				 
			
@@ -28,7 +34,8 @@ export const fireworksModels = {
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.6,
			
 
				 		outputPrice: 2.5,
			
 
				-		cacheReadsPrice: 0.15,
			
 
				+		cacheReadsPrice: 0.3,
			
 
				+		displayName: "Kimi K2 Instruct 0905",
			
 
				 		description:
			
 
				 			"Kimi K2 model gets a new version update: Agentic coding: more accurate, better generalization across scaffolds. Frontend coding: improved aesthetics and functionalities on web, 3d, and other tasks. Context length: extended from 128k to 256k, providing better long-horizon support.",
			
 
				 	},
			
@@ -41,8 +48,18 @@ export const fireworksModels = {
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.6,
			
 
				 		outputPrice: 2.5,
			
 
				-		description:
			
 
				-			"Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities.",
			
 
				+		deprecated: true,
			
 
				+	},
			
 
				+	"accounts/fireworks/models/kimi-k2p5": {
			
 
				+		maxTokens: 256000,
			
 
				+		contextWindow: 256000,
			
 
				+		supportsImages: true,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.6,
			
 
				+		outputPrice: 3,
			
 
				+		cacheReadsPrice: 0.1,
			
 
				+		displayName: "Kimi K2.5",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/kimi-k2-thinking": {
			
 
				 		maxTokens: 16000,
			
@@ -60,19 +77,41 @@ export const fireworksModels = {
 
				 			"The kimi-k2-thinking model is a general-purpose agentic reasoning model developed by Moonshot AI. Thanks to its strength in deep reasoning and multi-turn tool use, it can solve even the hardest problems.",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/minimax-m2": {
			
 
				-		maxTokens: 4096,
			
 
				-		contextWindow: 204800,
			
 
				+		maxTokens: 192000,
			
 
				+		contextWindow: 192000,
			
 
				 		supportsImages: false,
			
 
				-		supportsPromptCache: false,
			
 
				+		supportsPromptCache: true,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.3,
			
 
				 		outputPrice: 1.2,
			
 
				-		description:
			
 
				-			"MiniMax M2 is a high-performance language model with 204.8K context window, optimized for long-context understanding and generation tasks.",
			
 
				+		cacheReadsPrice: 0.15,
			
 
				+		displayName: "MiniMax-M2",
			
 
				+	},
			
 
				+	"accounts/fireworks/models/minimax-m2p1": {
			
 
				+		maxTokens: 200000,
			
 
				+		contextWindow: 200000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.3,
			
 
				+		outputPrice: 1.2,
			
 
				+		cacheReadsPrice: 0.15,
			
 
				+		displayName: "MiniMax-M2.1",
			
 
				+	},
			
 
				+	"accounts/fireworks/models/qwen3-235b-a22b": {
			
 
				+		maxTokens: 16384,
			
 
				+		contextWindow: 128000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.22,
			
 
				+		outputPrice: 0.88,
			
 
				+		cacheReadsPrice: 0.11,
			
 
				+		displayName: "Qwen3 235B A22B",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/qwen3-235b-a22b-instruct-2507": {
			
 
				-		maxTokens: 32768,
			
 
				+		maxTokens: 256000,
			
 
				 		contextWindow: 256000,
			
 
				 		supportsImages: false,
			
 
				 		supportsPromptCache: false,
			
@@ -81,20 +120,22 @@ export const fireworksModels = {
 
				 		inputPrice: 0.22,
			
 
				 		outputPrice: 0.88,
			
 
				 		description: "Latest Qwen3 thinking model, competitive against the best closed source models in Jul 2025.",
			
 
				+		displayName: "Qwen3 235B A22B Instruct 2507",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/qwen3-coder-480b-a35b-instruct": {
			
 
				-		maxTokens: 32768,
			
 
				-		contextWindow: 256000,
			
 
				+		maxTokens: 256_000,
			
 
				+		contextWindow: 256_000,
			
 
				 		supportsImages: false,
			
 
				-		supportsPromptCache: false,
			
 
				+		supportsPromptCache: true,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.45,
			
 
				 		outputPrice: 1.8,
			
 
				-		description: "Qwen3's most agentic code model to date.",
			
 
				+		cacheReadsPrice: 0.23,
			
 
				+		displayName: "Qwen3 Coder 480B A35B Instruct",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/deepseek-r1-0528": {
			
 
				-		maxTokens: 20480,
			
 
				+		maxTokens: 160000,
			
 
				 		contextWindow: 160000,
			
 
				 		supportsImages: false,
			
 
				 		supportsPromptCache: false,
			
@@ -102,8 +143,7 @@ export const fireworksModels = {
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 3,
			
 
				 		outputPrice: 8,
			
 
				-		description:
			
 
				-			"05/28 updated checkpoint of Deepseek R1. Its overall performance is now approaching that of leading models, such as O3 and Gemini 2.5 Pro. Compared to the previous version, the upgraded model shows significant improvements in handling complex reasoning tasks, and this version also offers a reduced hallucination rate, enhanced support for function calling, and better experience for vibe coding. Note that fine-tuning for this model is only available through contacting fireworks at https://fireworks.ai/company/contact-us.",
			
 
				+		displayName: "DeepSeek R1 0528",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/deepseek-v3": {
			
 
				 		maxTokens: 16384,
			
@@ -114,79 +154,113 @@ export const fireworksModels = {
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.9,
			
 
				 		outputPrice: 0.9,
			
 
				+		deprecated: true,
			
 
				 		description:
			
 
				-			"A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek. Note that fine-tuning for this model is only available through contacting fireworks at https://fireworks.ai/company/contact-us.",
			
 
				+			"A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek.",
			
 
				+	},
			
 
				+	"accounts/fireworks/models/deepseek-v3-0324": {
			
 
				+		maxTokens: 160000,
			
 
				+		contextWindow: 160000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: false,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.9,
			
 
				+		outputPrice: 0.9,
			
 
				+		displayName: "DeepSeek V3 0324",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/deepseek-v3p1": {
			
 
				-		maxTokens: 16384,
			
 
				-		contextWindow: 163840,
			
 
				+		maxTokens: 160_000,
			
 
				+		contextWindow: 160_000,
			
 
				 		supportsImages: false,
			
 
				 		supportsPromptCache: false,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.56,
			
 
				 		outputPrice: 1.68,
			
 
				-		description:
			
 
				-			"DeepSeek v3.1 is an improved version of the v3 model with enhanced performance, better reasoning capabilities, and improved code generation. This Mixture-of-Experts (MoE) model maintains the same 671B total parameters with 37B activated per token.",
			
 
				+		displayName: "DeepSeek V3.1",
			
 
				+	},
			
 
				+	"accounts/fireworks/models/deepseek-v3p2": {
			
 
				+		maxTokens: 160_000,
			
 
				+		contextWindow: 160_000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.56,
			
 
				+		outputPrice: 1.68,
			
 
				+		cacheReadsPrice: 0.28,
			
 
				+		displayName: "Deepseek v3.2",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/glm-4p5": {
			
 
				-		maxTokens: 16384,
			
 
				-		contextWindow: 128000,
			
 
				+		maxTokens: 131_072,
			
 
				+		contextWindow: 131_072,
			
 
				 		supportsImages: false,
			
 
				 		supportsPromptCache: false,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.55,
			
 
				 		outputPrice: 2.19,
			
 
				-		description:
			
 
				-			"Z.ai GLM-4.5 with 355B total parameters and 32B active parameters. Features unified reasoning, coding, and intelligent agent capabilities.",
			
 
				+		displayName: "GLM-4.5",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/glm-4p5-air": {
			
 
				-		maxTokens: 16384,
			
 
				-		contextWindow: 128000,
			
 
				+		maxTokens: 131_072,
			
 
				+		contextWindow: 131_072,
			
 
				 		supportsImages: false,
			
 
				 		supportsPromptCache: false,
			
 
				 		supportsNativeTools: true,
			
 
				-		defaultToolProtocol: "native",
			
 
				-		inputPrice: 0.55,
			
 
				-		outputPrice: 2.19,
			
 
				+    defaultToolProtocol: "native",
			
 
				+		inputPrice: 0.22,
			
 
				+		outputPrice: 0.88,
			
 
				+		displayName: "GLM-4.5 Air",
			
 
				 		description:
			
 
				 			"Z.ai GLM-4.5-Air with 106B total parameters and 12B active parameters. Features unified reasoning, coding, and intelligent agent capabilities.",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/glm-4p6": {
			
 
				-		maxTokens: 25344,
			
 
				-		contextWindow: 198000,
			
 
				+		maxTokens: 198_000,
			
 
				+		contextWindow: 198_000,
			
 
				 		supportsImages: false,
			
 
				-		supportsPromptCache: false,
			
 
				+		supportsPromptCache: true,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.55,
			
 
				 		outputPrice: 2.19,
			
 
				-		description:
			
 
				-			"Z.ai GLM-4.6 is an advanced coding model with exceptional performance on complex programming tasks. Features improved reasoning capabilities and enhanced code generation quality, making it ideal for software development workflows.",
			
 
				+		cacheReadsPrice: 0.28,
			
 
				+		displayName: "GLM-4.6",
			
 
				+	},
			
 
				+	"accounts/fireworks/models/glm-4p7": {
			
 
				+		maxTokens: 198_000,
			
 
				+		contextWindow: 198_000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.6,
			
 
				+		outputPrice: 2.2,
			
 
				+		cacheReadsPrice: 0.3,
			
 
				+		displayName: "GLM-4.7",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/gpt-oss-20b": {
			
 
				-		maxTokens: 16384,
			
 
				-		contextWindow: 128000,
			
 
				+		maxTokens: 128_000,
			
 
				+		contextWindow: 128_000,
			
 
				 		supportsImages: false,
			
 
				-		supportsPromptCache: false,
			
 
				+		supportsPromptCache: true,
			
 
				 		supportsNativeTools: true,
			
 
				-		defaultToolProtocol: "native",
			
 
				-		inputPrice: 0.07,
			
 
				-		outputPrice: 0.3,
			
 
				+    defaultToolProtocol: "native",
			
 
				+		inputPrice: 0.05,
			
 
				+		outputPrice: 0.2,
			
 
				+		cacheReadsPrice: 0.04,
			
 
				+		displayName: "GPT-OSS 20B",
			
 
				 		description:
			
 
				 			"OpenAI gpt-oss-20b: Compact model for local/edge deployments. Optimized for low-latency and resource-constrained environments with chain-of-thought output, adjustable reasoning, and agentic workflows.",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/gpt-oss-120b": {
			
 
				-		maxTokens: 16384,
			
 
				-		contextWindow: 128000,
			
 
				+		maxTokens: 128_000,
			
 
				+		contextWindow: 128_000,
			
 
				 		supportsImages: false,
			
 
				-		supportsPromptCache: false,
			
 
				+		supportsPromptCache: true,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.15,
			
 
				 		outputPrice: 0.6,
			
 
				-		description:
			
 
				-			"OpenAI gpt-oss-120b: Production-grade, general-purpose model that fits on a single H100 GPU. Features complex reasoning, configurable effort, full chain-of-thought transparency, and supports function calling, tool use, and structured outputs.",
			
 
				+		cacheReadsPrice: 0.08,
			
 
				+		displayName: "GPT-OSS 120B",
			
 
				 	},
			
 
				 } as const satisfies Record<string, ModelInfo>
			
--- a/packages/types/src/providers/index.ts
+++ b/packages/types/src/providers/index.ts
@@ -16,6 +16,7 @@ export * from "./synthetic.js"
 
				 export * from "./inception.js"
			
 
				 export * from "./minimax.js"
			
 
				 export * from "./glama.js"
			
 
				+export * from "./zenmux.js"
			
 
				 // kilocode_change end
			
 
				 export * from "./groq.js"
			
 
				 export * from "./huggingface.js"
			
@@ -56,6 +57,7 @@ import { featherlessDefaultModelId } from "./featherless.js"
 
				 import { fireworksDefaultModelId } from "./fireworks.js"
			
 
				 import { geminiDefaultModelId } from "./gemini.js"
			
 
				 import { glamaDefaultModelId } from "./glama.js" // kilocode_change
			
 
				+import { zenmuxDefaultModelId } from "./zenmux.js" // kilocode_change
			
 
				 import { groqDefaultModelId } from "./groq.js"
			
 
				 import { ioIntelligenceDefaultModelId } from "./io-intelligence.js"
			
 
				 import { litellmDefaultModelId } from "./lite-llm.js"
			
@@ -91,6 +93,8 @@ export function getProviderDefaultModelId(
 
				 	switch (provider) {
			
 
				 		case "openrouter":
			
 
				 			return openRouterDefaultModelId
			
 
				+		case "zenmux": // kilocode_change
			
 
				+			return zenmuxDefaultModelId // kilocode_change
			
 
				 		case "requesty":
			
 
				 			return requestyDefaultModelId
			
 
				 		// kilocode_change start
			
--- a/packages/types/src/providers/moonshot.ts
+++ b/packages/types/src/providers/moonshot.ts
@@ -6,21 +6,26 @@ export type MoonshotModelId = keyof typeof moonshotModels
 
				 export const moonshotDefaultModelId: MoonshotModelId = "kimi-k2-thinking"
			
 
				 
			
 
				 export const moonshotModels = {
			
 
				+	// kilocode_change start
			
 
				 	"kimi-for-coding": {
			
 
				 		maxTokens: 32_000,
			
 
				 		contextWindow: 131_072,
			
 
				-		supportsImages: false,
			
 
				+		supportsImages: true,
			
 
				 		supportsPromptCache: true,
			
 
				-		supportsReasoningBudget: true,
			
 
				-		supportsReasoningEffort: true,
			
 
				+		supportsReasoningBinary: true,
			
 
				+		supportsAdaptiveThinking: true,
			
 
				 		inputPrice: 0.6, // $0.60 per million tokens (cache miss)
			
 
				 		outputPrice: 2.5, // $2.50 per million tokens
			
 
				 		cacheWritesPrice: 0, // $0 per million tokens (cache miss)
			
 
				 		cacheReadsPrice: 0.15, // $0.15 per million tokens (cache hit)
			
 
				 		preserveReasoning: true,
			
 
				 		supportsNativeTools: true,
			
 
				+		defaultToolProtocol: "native",
			
 
				+		supportsTemperature: false,
			
 
				+		defaultTemperature: 0.6,
			
 
				 		description: `Kimi for coding`,
			
 
				 	},
			
 
				+	// kilocode_change end
			
 
				 	"kimi-k2-0711-preview": {
			
 
				 		maxTokens: 32_000,
			
 
				 		contextWindow: 131_072,
			
@@ -91,19 +96,26 @@ export const moonshotModels = {
 
				 		defaultTemperature: 1.0,
			
 
				 		description: `The kimi-k2-thinking model is a general-purpose agentic reasoning model developed by Moonshot AI. Thanks to its strength in deep reasoning and multi-turn tool use, it can solve even the hardest problems.`,
			
 
				 	},
			
 
				+	// kilocode_change start
			
 
				 	"kimi-k2.5": {
			
 
				 		maxTokens: 16_384,
			
 
				 		contextWindow: 262_144,
			
 
				-		supportsImages: false,
			
 
				+		supportsImages: true,
			
 
				 		supportsPromptCache: true,
			
 
				+		supportsReasoningBinary: true,
			
 
				+		supportsAdaptiveThinking: true,
			
 
				+		preserveReasoning: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.6, // $0.60 per million tokens (cache miss)
			
 
				 		outputPrice: 3.0, // $3.00 per million tokens
			
 
				 		cacheReadsPrice: 0.1, // $0.10 per million tokens (cache hit)
			
 
				-		supportsTemperature: true,
			
 
				-		defaultTemperature: 1.0,
			
 
				+		supportsTemperature: false,
			
 
				+		defaultTemperature: 0.6,
			
 
				 		description:
			
 
				 			"Kimi K2.5 is the latest generation of Moonshot AI's Kimi series, featuring improved reasoning capabilities and enhanced performance across diverse tasks.",
			
 
				 	},
			
 
				+	// kilocode_change end
			
 
				 } as const satisfies Record<string, ModelInfo>
			
 
				 
			
 
				 export const MOONSHOT_DEFAULT_TEMPERATURE = 0.6
			
--- a/packages/types/src/providers/zenmux.ts
+++ b/packages/types/src/providers/zenmux.ts
@@ -0,0 +1,17 @@
 
				+// kilocode_change - new file
			
 
				+import type { ModelInfo } from "../model.js"
			
 
				+
			
 
				+// Default model for ZenMux - using OpenAI GPT-5 as default
			
 
				+export const zenmuxDefaultModelId = "anthropic/claude-opus-4"
			
 
				+
			
 
				+export const zenmuxDefaultModelInfo: ModelInfo = {
			
 
				+	maxTokens: 8192,
			
 
				+	contextWindow: 200_000,
			
 
				+	supportsImages: true,
			
 
				+	supportsPromptCache: true,
			
 
				+	inputPrice: 15.0,
			
 
				+	outputPrice: 75.0,
			
 
				+	cacheWritesPrice: 18.75,
			
 
				+	cacheReadsPrice: 1.5,
			
 
				+	description: "Claude Opus 4 via ZenMux",
			
 
				+}
			
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -11,6 +11,7 @@ import {
 
				 	AwsBedrockHandler,
			
 
				 	CerebrasHandler,
			
 
				 	OpenRouterHandler,
			
 
				+	ZenMuxHandler, // kilocode_change
			
 
				 	VertexHandler,
			
 
				 	AnthropicVertexHandler,
			
 
				 	OpenAiHandler,
			
@@ -181,6 +182,8 @@ export function buildApiHandler(configuration: ProviderSettings): ApiHandler {
 
				 		// kilocode_change end
			
 
				 		case "openrouter":
			
 
				 			return new OpenRouterHandler(options)
			
 
				+		case "zenmux": // kilocode_change
			
 
				+			return new ZenMuxHandler(options) // kilocode_change
			
 
				 		case "bedrock":
			
 
				 			return new AwsBedrockHandler(options)
			
 
				 		case "vertex":
			
--- a/src/api/providers/__tests__/chutes.spec.ts
+++ b/src/api/providers/__tests__/chutes.spec.ts
@@ -153,6 +153,39 @@ describe("ChutesHandler", () => {
 
				 		])
			
 
				 	})
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	it("should handle non-DeepSeek reasoning field", async () => {
			
 
				+		mockCreate.mockImplementationOnce(async () => ({
			
 
				+			[Symbol.asyncIterator]: async function* () {
			
 
				+				yield {
			
 
				+					choices: [
			
 
				+						{
			
 
				+							delta: { reasoning: "Thinking through it..." },
			
 
				+							index: 0,
			
 
				+						},
			
 
				+					],
			
 
				+					usage: null,
			
 
				+				}
			
 
				+			},
			
 
				+		}))
			
 
				+
			
 
				+		const systemPrompt = "You are a helpful assistant."
			
 
				+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
			
 
				+		mockFetchModel.mockResolvedValueOnce({
			
 
				+			id: "some-other-model",
			
 
				+			info: { maxTokens: 1024, temperature: 0.7 },
			
 
				+		})
			
 
				+
			
 
				+		const stream = handler.createMessage(systemPrompt, messages)
			
 
				+		const chunks = []
			
 
				+		for await (const chunk of stream) {
			
 
				+			chunks.push(chunk)
			
 
				+		}
			
 
				+
			
 
				+		expect(chunks).toEqual([{ type: "reasoning", text: "Thinking through it..." }])
			
 
				+	})
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	it("should return default model when no model is specified", async () => {
			
 
				 		const model = await handler.fetchModel()
			
 
				 		expect(model.id).toBe(chutesDefaultModelId)
			
@@ -275,6 +308,131 @@ describe("ChutesHandler", () => {
 
				 		})
			
 
				 	})
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	it("createMessage should yield tool_call_end on finish_reason tool_calls", async () => {
			
 
				+		mockCreate.mockImplementationOnce(() => {
			
 
				+			return {
			
 
				+				[Symbol.asyncIterator]: () => ({
			
 
				+					next: vi
			
 
				+						.fn()
			
 
				+						.mockResolvedValueOnce({
			
 
				+							done: false,
			
 
				+							value: {
			
 
				+								choices: [
			
 
				+									{
			
 
				+										delta: {
			
 
				+											tool_calls: [
			
 
				+												{
			
 
				+													index: 0,
			
 
				+													id: "call_finish",
			
 
				+													function: { name: "test_tool", arguments: '{"arg":"value"}' },
			
 
				+												},
			
 
				+											],
			
 
				+										},
			
 
				+										finish_reason: null,
			
 
				+									},
			
 
				+								],
			
 
				+							},
			
 
				+						})
			
 
				+						.mockResolvedValueOnce({
			
 
				+							done: false,
			
 
				+							value: {
			
 
				+								choices: [
			
 
				+									{
			
 
				+										delta: {},
			
 
				+										finish_reason: "tool_calls",
			
 
				+									},
			
 
				+								],
			
 
				+							},
			
 
				+						})
			
 
				+						.mockResolvedValueOnce({ done: true }),
			
 
				+				}),
			
 
				+			}
			
 
				+		})
			
 
				+
			
 
				+		const stream = handler.createMessage("system prompt", [])
			
 
				+		const chunks = []
			
 
				+		for await (const chunk of stream) {
			
 
				+			chunks.push(chunk)
			
 
				+		}
			
 
				+
			
 
				+		expect(chunks).toEqual([
			
 
				+			{
			
 
				+				type: "tool_call_partial",
			
 
				+				index: 0,
			
 
				+				id: "call_finish",
			
 
				+				name: "test_tool",
			
 
				+				arguments: '{"arg":"value"}',
			
 
				+			},
			
 
				+			{
			
 
				+				type: "tool_call_end",
			
 
				+				id: "call_finish",
			
 
				+			},
			
 
				+		])
			
 
				+	})
			
 
				+
			
 
				+	it("createMessage should synthesize tool call ids when provider omits them", async () => {
			
 
				+		mockCreate.mockImplementationOnce(() => {
			
 
				+			return {
			
 
				+				[Symbol.asyncIterator]: () => ({
			
 
				+					next: vi
			
 
				+						.fn()
			
 
				+						.mockResolvedValueOnce({
			
 
				+							done: false,
			
 
				+							value: {
			
 
				+								choices: [
			
 
				+									{
			
 
				+										delta: {
			
 
				+											tool_calls: [
			
 
				+												{
			
 
				+													index: 0,
			
 
				+													function: { name: "test_tool", arguments: '{"arg":"value"}' },
			
 
				+												},
			
 
				+											],
			
 
				+										},
			
 
				+										finish_reason: null,
			
 
				+									},
			
 
				+								],
			
 
				+							},
			
 
				+						})
			
 
				+						.mockResolvedValueOnce({
			
 
				+							done: false,
			
 
				+							value: {
			
 
				+								choices: [
			
 
				+									{
			
 
				+										delta: {},
			
 
				+										finish_reason: "tool_calls",
			
 
				+									},
			
 
				+								],
			
 
				+							},
			
 
				+						})
			
 
				+						.mockResolvedValueOnce({ done: true }),
			
 
				+				}),
			
 
				+			}
			
 
				+		})
			
 
				+
			
 
				+		const stream = handler.createMessage("system prompt", [])
			
 
				+		const chunks = []
			
 
				+		for await (const chunk of stream) {
			
 
				+			chunks.push(chunk)
			
 
				+		}
			
 
				+
			
 
				+		expect(chunks).toEqual([
			
 
				+			{
			
 
				+				type: "tool_call_partial",
			
 
				+				index: 0,
			
 
				+				id: "chutes_tool_call_0",
			
 
				+				name: "test_tool",
			
 
				+				arguments: '{"arg":"value"}',
			
 
				+			},
			
 
				+			{
			
 
				+				type: "tool_call_end",
			
 
				+				id: "chutes_tool_call_0",
			
 
				+			},
			
 
				+		])
			
 
				+	})
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	it("createMessage should pass tools and tool_choice to API", async () => {
			
 
				 		const tools = [
			
 
				 			{
			
@@ -307,6 +465,9 @@ describe("ChutesHandler", () => {
 
				 				tools,
			
 
				 				tool_choice,
			
 
				 			}),
			
 
				+			expect.objectContaining({
			
 
				+				timeout: expect.any(Number),
			
 
				+			}),
			
 
				 		)
			
 
				 	})
			
 
				 
			
@@ -326,11 +487,29 @@ describe("ChutesHandler", () => {
 
				 			apiModelId: testModelId,
			
 
				 			chutesApiKey: "test-chutes-api-key",
			
 
				 		})
			
 
				-		// Note: getModel() returns fallback default without calling fetchModel
			
 
				-		// Since we haven't called fetchModel, it returns the default chutesDefaultModelId
			
 
				-		// which is DeepSeek-R1-0528, therefore temperature will be DEEP_SEEK_DEFAULT_TEMPERATURE
			
 
				+		;(handlerWithModel as any).models = {
			
 
				+			[testModelId]: {
			
 
				+				...chutesDefaultModelInfo,
			
 
				+				temperature: 0.7,
			
 
				+			},
			
 
				+		}
			
 
				 		const model = handlerWithModel.getModel()
			
 
				-		// The default model is DeepSeek-R1, so it returns DEEP_SEEK_DEFAULT_TEMPERATURE
			
 
				-		expect(model.info.temperature).toBe(DEEP_SEEK_DEFAULT_TEMPERATURE)
			
 
				+		expect(model.id).toBe(testModelId)
			
 
				+		expect(model.info.temperature).toBe(0.5)
			
 
				+	})
			
 
				+
			
 
				+	// kilocode_change start
			
 
				+	it("should preserve explicit Chutes model id when it is unavailable in cached model list", () => {
			
 
				+		const unsupportedModelId = "moonshotai/Kimi-K2.5-TEE"
			
 
				+		const handlerWithModel = new ChutesHandler({
			
 
				+			apiModelId: unsupportedModelId,
			
 
				+			chutesApiKey: "test-chutes-api-key",
			
 
				+		})
			
 
				+
			
 
				+		const model = handlerWithModel.getModel()
			
 
				+
			
 
				+		expect(model.id).toBe(unsupportedModelId)
			
 
				+		expect(model.info.temperature).toBe(0.5)
			
 
				 	})
			
 
				+	// kilocode_change end
			
 
				 })
			
--- a/src/api/providers/__tests__/fireworks.spec.ts
+++ b/src/api/providers/__tests__/fireworks.spec.ts
@@ -102,17 +102,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 128000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.6,
			
 
				-				outputPrice: 2.5,
			
 
				-				description: expect.stringContaining("Kimi K2 is a state-of-the-art mixture-of-experts"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return Kimi K2 Thinking model with correct configuration", () => {
			
@@ -148,17 +139,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 4096,
			
 
				-				contextWindow: 204800,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.3,
			
 
				-				outputPrice: 1.2,
			
 
				-				description: expect.stringContaining("MiniMax M2 is a high-performance language model"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return Qwen3 235B model with correct configuration", () => {
			
@@ -169,18 +151,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 32768,
			
 
				-				contextWindow: 256000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.22,
			
 
				-				outputPrice: 0.88,
			
 
				-				description:
			
 
				-					"Latest Qwen3 thinking model, competitive against the best closed source models in Jul 2025.",
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return DeepSeek R1 model with correct configuration", () => {
			
@@ -191,17 +163,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 20480,
			
 
				-				contextWindow: 160000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 3,
			
 
				-				outputPrice: 8,
			
 
				-				description: expect.stringContaining("05/28 updated checkpoint of Deepseek R1"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return DeepSeek V3 model with correct configuration", () => {
			
@@ -233,17 +196,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 163840,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.56,
			
 
				-				outputPrice: 1.68,
			
 
				-				description: expect.stringContaining("DeepSeek v3.1 is an improved version"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return GLM-4.5 model with correct configuration", () => {
			
@@ -254,17 +208,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 128000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.55,
			
 
				-				outputPrice: 2.19,
			
 
				-				description: expect.stringContaining("Z.ai GLM-4.5 with 355B total parameters"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return GLM-4.5-Air model with correct configuration", () => {
			
@@ -275,17 +220,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 128000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.55,
			
 
				-				outputPrice: 2.19,
			
 
				-				description: expect.stringContaining("Z.ai GLM-4.5-Air with 106B total parameters"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return GLM-4.6 model with correct configuration", () => {
			
@@ -296,17 +232,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 25344,
			
 
				-				contextWindow: 198000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.55,
			
 
				-				outputPrice: 2.19,
			
 
				-				description: expect.stringContaining("Z.ai GLM-4.6 is an advanced coding model"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return gpt-oss-20b model with correct configuration", () => {
			
@@ -317,17 +244,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 128000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.07,
			
 
				-				outputPrice: 0.3,
			
 
				-				description: expect.stringContaining("OpenAI gpt-oss-20b: Compact model for local/edge deployments"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return gpt-oss-120b model with correct configuration", () => {
			
@@ -338,17 +256,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 128000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.15,
			
 
				-				outputPrice: 0.6,
			
 
				-				description: expect.stringContaining("OpenAI gpt-oss-120b: Production-grade, general-purpose model"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("completePrompt method should return text from Fireworks API", async () => {
			
--- a/src/api/providers/__tests__/kilocode-openrouter.spec.ts
+++ b/src/api/providers/__tests__/kilocode-openrouter.spec.ts
@@ -27,10 +27,10 @@ import {
 
				 	X_KILOCODE_MACHINEID,
			
 
				 	X_KILOCODE_MODE,
			
 
				 } from "../../../shared/kilocode/headers"
			
 
				-import { streamSse } from "../../../services/continuedev/core/fetch/stream"
			
 
				+import { streamSse } from "../../../services/ghost/continuedev/core/fetch/stream"
			
 
				 
			
 
				 // Mock the stream module
			
 
				-vitest.mock("../../../services/continuedev/core/fetch/stream", () => ({
			
 
				+vitest.mock("../../../services/ghost/continuedev/core/fetch/stream", () => ({
			
 
				 	streamSse: vitest.fn(),
			
 
				 }))
			
 
				 
			
--- a/src/api/providers/__tests__/mistral-fim.spec.ts
+++ b/src/api/providers/__tests__/mistral-fim.spec.ts
@@ -6,10 +6,10 @@ vitest.mock("vscode", () => ({}))
 
				 
			
 
				 import { MistralHandler } from "../mistral"
			
 
				 import { ApiHandlerOptions } from "../../../shared/api"
			
 
				-import { streamSse } from "../../../services/continuedev/core/fetch/stream"
			
 
				+import { streamSse } from "../../../services/ghost/continuedev/core/fetch/stream"
			
 
				 
			
 
				 // Mock the stream module
			
 
				-vitest.mock("../../../services/continuedev/core/fetch/stream", () => ({
			
 
				+vitest.mock("../../../services/ghost/continuedev/core/fetch/stream", () => ({
			
 
				 	streamSse: vitest.fn(),
			
 
				 }))
			
 
				 
			
--- a/src/api/providers/__tests__/moonshot.spec.ts
+++ b/src/api/providers/__tests__/moonshot.spec.ts
@@ -117,6 +117,31 @@ describe("MoonshotHandler", () => {
 
				 			expect(model).toHaveProperty("temperature")
			
 
				 			expect(model).toHaveProperty("maxTokens")
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should expose native tools for kimi-k2.5", () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-k2.5",
			
 
				+			})
			
 
				+			const model = strictHandler.getModel()
			
 
				+			const strictModelInfo = model.info as { supportsNativeTools?: boolean; defaultToolProtocol?: string }
			
 
				+
			
 
				+			expect(strictModelInfo.supportsNativeTools).toBe(true)
			
 
				+			expect(strictModelInfo.defaultToolProtocol).toBe("native")
			
 
				+			expect(model.info.supportsImages).toBe(true)
			
 
				+		})
			
 
				+
			
 
				+		it("should expose image capability for kimi-for-coding", () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-for-coding",
			
 
				+			})
			
 
				+			const model = strictHandler.getModel()
			
 
				+
			
 
				+			expect(model.info.supportsImages).toBe(true)
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("createMessage", () => {
			
@@ -221,6 +246,187 @@ describe("MoonshotHandler", () => {
 
				 			expect(usageChunks[0].cacheWriteTokens).toBe(0)
			
 
				 			expect(usageChunks[0].cacheReadTokens).toBe(2)
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should include prompt_cache_key for moonshot requests when taskId is provided", async () => {
			
 
				+			async function* mockFullStream() {
			
 
				+				yield { type: "text-delta", text: "Test response" }
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 1,
			
 
				+					outputTokens: 1,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			for await (const _chunk of handler.createMessage(systemPrompt, messages, { taskId: "task-cache-1" })) {
			
 
				+				// Drain stream
			
 
				+			}
			
 
				+
			
 
				+			expect(mockStreamText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							prompt_cache_key: "task-cache-1",
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				+		it("should enforce strict thinking temperature/provider options for kimi-k2.5 by default", async () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-k2.5",
			
 
				+				modelTemperature: 0.1,
			
 
				+			})
			
 
				+
			
 
				+			async function* mockFullStream() {
			
 
				+				yield { type: "text-delta", text: "Test response" }
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 1,
			
 
				+					outputTokens: 1,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			for await (const _chunk of strictHandler.createMessage(systemPrompt, messages)) {
			
 
				+				// Drain stream
			
 
				+			}
			
 
				+
			
 
				+			expect(mockStreamText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					temperature: 1.0,
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							thinking: { type: "enabled" },
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				+		it("should include prompt_cache_key alongside strict thinking controls when taskId is provided", async () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-for-coding",
			
 
				+			})
			
 
				+
			
 
				+			async function* mockFullStream() {
			
 
				+				yield { type: "text-delta", text: "Test response" }
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 1,
			
 
				+					outputTokens: 1,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			for await (const _chunk of strictHandler.createMessage(systemPrompt, messages, { taskId: "task-cache-2" })) {
			
 
				+				// Drain stream
			
 
				+			}
			
 
				+
			
 
				+			expect(mockStreamText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					temperature: 1.0,
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							prompt_cache_key: "task-cache-2",
			
 
				+							thinking: { type: "enabled" },
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				+		it("should enforce strict thinking temperature/provider options for kimi-for-coding by default", async () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-for-coding",
			
 
				+				modelTemperature: 0.1,
			
 
				+			})
			
 
				+
			
 
				+			async function* mockFullStream() {
			
 
				+				yield { type: "text-delta", text: "Test response" }
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 1,
			
 
				+					outputTokens: 1,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			for await (const _chunk of strictHandler.createMessage(systemPrompt, messages)) {
			
 
				+				// Drain stream
			
 
				+			}
			
 
				+
			
 
				+			expect(mockStreamText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					temperature: 1.0,
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							thinking: { type: "enabled" },
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				+		it("should enforce strict non-thinking temperature/provider options when reasoning is disabled", async () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-for-coding",
			
 
				+				enableReasoningEffort: false,
			
 
				+				modelTemperature: 1.9,
			
 
				+			})
			
 
				+
			
 
				+			async function* mockFullStream() {
			
 
				+				yield { type: "text-delta", text: "Test response" }
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 1,
			
 
				+					outputTokens: 1,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			for await (const _chunk of strictHandler.createMessage(systemPrompt, messages)) {
			
 
				+				// Drain stream
			
 
				+			}
			
 
				+
			
 
				+			expect(mockStreamText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					temperature: 0.6,
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							thinking: { type: "disabled" },
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("completePrompt", () => {
			
@@ -238,6 +444,34 @@ describe("MoonshotHandler", () => {
 
				 				}),
			
 
				 			)
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should enforce strict thinking controls for completePrompt on strict Kimi models", async () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-k2.5",
			
 
				+				enableReasoningEffort: false,
			
 
				+				modelTemperature: 1.8,
			
 
				+			})
			
 
				+
			
 
				+			mockGenerateText.mockResolvedValue({
			
 
				+				text: "Test completion",
			
 
				+			})
			
 
				+
			
 
				+			await strictHandler.completePrompt("Test prompt")
			
 
				+
			
 
				+			expect(mockGenerateText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					temperature: 0.6,
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							thinking: { type: "disabled" },
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("processUsageMetrics", () => {
			
@@ -404,19 +638,13 @@ describe("MoonshotHandler", () => {
 
				 				chunks.push(chunk)
			
 
				 			}
			
 
				 
			
 
				-			const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start")
			
 
				-			const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta")
			
 
				-			const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end")
			
 
				-
			
 
				-			expect(toolCallStartChunks.length).toBe(1)
			
 
				-			expect(toolCallStartChunks[0].id).toBe("tool-call-1")
			
 
				-			expect(toolCallStartChunks[0].name).toBe("read_file")
			
 
				-
			
 
				-			expect(toolCallDeltaChunks.length).toBe(1)
			
 
				-			expect(toolCallDeltaChunks[0].delta).toBe('{"path":"test.ts"}')
			
 
				-
			
 
				-			expect(toolCallEndChunks.length).toBe(1)
			
 
				-			expect(toolCallEndChunks[0].id).toBe("tool-call-1")
			
 
				+			// kilocode_change start
			
 
				+			const toolCallChunks = chunks.filter((c) => c.type === "tool_call")
			
 
				+			expect(toolCallChunks.length).toBe(1)
			
 
				+			expect(toolCallChunks[0].id).toBe("tool-call-1")
			
 
				+			expect(toolCallChunks[0].name).toBe("read_file")
			
 
				+			expect(toolCallChunks[0].arguments).toBe('{"path":"test.ts"}')
			
 
				+			// kilocode_change end
			
 
				 		})
			
 
				 
			
 
				 		it("should handle complete tool calls", async () => {
			
@@ -470,5 +698,63 @@ describe("MoonshotHandler", () => {
 
				 			expect(toolCallChunks[0].name).toBe("read_file")
			
 
				 			expect(toolCallChunks[0].arguments).toBe('{"path":"test.ts"}')
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should flush pending tool-input stream as tool_call when tool-input-end is missing", async () => {
			
 
				+			async function* mockFullStream() {
			
 
				+				yield {
			
 
				+					type: "tool-input-start",
			
 
				+					id: "tool-call-2",
			
 
				+					toolName: "read_file",
			
 
				+				}
			
 
				+				yield {
			
 
				+					type: "tool-input-delta",
			
 
				+					id: "tool-call-2",
			
 
				+					delta: '{"path":"missing-end.ts"}',
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 10,
			
 
				+					outputTokens: 5,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			const stream = handler.createMessage(systemPrompt, messages, {
			
 
				+				taskId: "test-task",
			
 
				+				tools: [
			
 
				+					{
			
 
				+						type: "function",
			
 
				+						function: {
			
 
				+							name: "read_file",
			
 
				+							description: "Read a file",
			
 
				+							parameters: {
			
 
				+								type: "object",
			
 
				+								properties: { path: { type: "string" } },
			
 
				+								required: ["path"],
			
 
				+							},
			
 
				+						},
			
 
				+					},
			
 
				+				],
			
 
				+			})
			
 
				+
			
 
				+			const chunks: any[] = []
			
 
				+			for await (const chunk of stream) {
			
 
				+				chunks.push(chunk)
			
 
				+			}
			
 
				+
			
 
				+			const toolCallChunks = chunks.filter((c) => c.type === "tool_call")
			
 
				+			expect(toolCallChunks).toHaveLength(1)
			
 
				+			expect(toolCallChunks[0]).toMatchObject({
			
 
				+				id: "tool-call-2",
			
 
				+				name: "read_file",
			
 
				+				arguments: '{"path":"missing-end.ts"}',
			
 
				+			})
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 })
			
--- a/src/api/providers/__tests__/zenmux.spec.ts
+++ b/src/api/providers/__tests__/zenmux.spec.ts
@@ -0,0 +1,47 @@
 
				+// kilocode_change - new test file for ZenMux provider
			
 
				+import { ZenMuxHandler } from "../zenmux"
			
 
				+import { ApiHandlerOptions } from "../../../shared/api"
			
 
				+
			
 
				+describe("ZenMuxHandler", () => {
			
 
				+	let mockOptions: ApiHandlerOptions
			
 
				+
			
 
				+	beforeEach(() => {
			
 
				+		mockOptions = {
			
 
				+			zenmuxApiKey: "test-api-key",
			
 
				+			zenmuxModelId: "openai/gpt-4",
			
 
				+			zenmuxBaseUrl: "https://test.zenmux.ai/api/v1",
			
 
				+		}
			
 
				+	})
			
 
				+
			
 
				+	test("should use default base URL when not provided", () => {
			
 
				+		const optionsWithoutBaseUrl = {
			
 
				+			...mockOptions,
			
 
				+			zenmuxBaseUrl: undefined,
			
 
				+		}
			
 
				+		const handler = new ZenMuxHandler(optionsWithoutBaseUrl)
			
 
				+		// The handler should initialize without errors
			
 
				+		expect(handler).toBeDefined()
			
 
				+	})
			
 
				+
			
 
				+	test("should use provided base URL", () => {
			
 
				+		const handler = new ZenMuxHandler(mockOptions)
			
 
				+		expect(handler).toBeDefined()
			
 
				+		// The base URL should be used in the OpenAI client
			
 
				+	})
			
 
				+
			
 
				+	test("should handle missing API key gracefully", () => {
			
 
				+		const optionsWithoutKey = {
			
 
				+			...mockOptions,
			
 
				+			zenmuxApiKey: undefined,
			
 
				+		}
			
 
				+		const handler = new ZenMuxHandler(optionsWithoutKey)
			
 
				+		expect(handler).toBeDefined()
			
 
				+	})
			
 
				+
			
 
				+	test("should return correct model info", () => {
			
 
				+		const handler = new ZenMuxHandler(mockOptions)
			
 
				+		const model = handler.getModel()
			
 
				+		expect(model.id).toBe("openai/gpt-4")
			
 
				+		expect(model.info).toBeDefined()
			
 
				+	})
			
 
				+})
			
--- a/src/api/providers/chutes.ts
+++ b/src/api/providers/chutes.ts
@@ -11,6 +11,7 @@ import { ApiStream } from "../transform/stream"
 
				 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
			
 
				 
			
 
				 import { RouterProvider } from "./router-provider"
			
 
				+import { getApiRequestTimeout } from "./utils/timeout-config"
			
 
				 
			
 
				 export class ChutesHandler extends RouterProvider implements SingleCompletionHandler {
			
 
				 	constructor(options: ApiHandlerOptions) {
			
@@ -25,6 +26,14 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 		})
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	private getRequestOptions() {
			
 
				+		return {
			
 
				+			timeout: getApiRequestTimeout(),
			
 
				+		}
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	private getCompletionParams(
			
 
				 		systemPrompt: string,
			
 
				 		messages: Anthropic.Messages.MessageParam[],
			
@@ -59,6 +68,32 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 		return params
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	private getToolCallId(
			
 
				+		toolCall: {
			
 
				+			id?: string
			
 
				+			index?: number
			
 
				+		},
			
 
				+		toolCallIdsByIndex: Map<number, string>,
			
 
				+	): string {
			
 
				+		const toolCallIndex = toolCall.index ?? 0
			
 
				+
			
 
				+		if (toolCall.id) {
			
 
				+			toolCallIdsByIndex.set(toolCallIndex, toolCall.id)
			
 
				+			return toolCall.id
			
 
				+		}
			
 
				+
			
 
				+		const existingId = toolCallIdsByIndex.get(toolCallIndex)
			
 
				+		if (existingId) {
			
 
				+			return existingId
			
 
				+		}
			
 
				+
			
 
				+		const syntheticId = `chutes_tool_call_${toolCallIndex}`
			
 
				+		toolCallIdsByIndex.set(toolCallIndex, syntheticId)
			
 
				+		return syntheticId
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	override async *createMessage(
			
 
				 		systemPrompt: string,
			
 
				 		messages: Anthropic.Messages.MessageParam[],
			
@@ -70,7 +105,7 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 			const stream = await this.client.chat.completions.create({
			
 
				 				...this.getCompletionParams(systemPrompt, messages, metadata),
			
 
				 				messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]),
			
 
				-			})
			
 
				+			}, this.getRequestOptions())
			
 
				 
			
 
				 			const matcher = new XmlMatcher(
			
 
				 				"think",
			
@@ -80,9 +115,16 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 						text: chunk.data,
			
 
				 					}) as const,
			
 
				 			)
			
 
				+			// kilocode_change start
			
 
				+			const activeToolCallIds = new Set<string>()
			
 
				+			const toolCallIdsByIndex = new Map<number, string>()
			
 
				+			// kilocode_change end
			
 
				 
			
 
				 			for await (const chunk of stream) {
			
 
				 				const delta = chunk.choices[0]?.delta
			
 
				+				// kilocode_change start
			
 
				+				const finishReason = chunk.choices[0]?.finish_reason
			
 
				+				// kilocode_change end
			
 
				 
			
 
				 				if (delta?.content) {
			
 
				 					for (const processedChunk of matcher.update(delta.content)) {
			
@@ -93,15 +135,27 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 				// Emit raw tool call chunks - NativeToolCallParser handles state management
			
 
				 				if (delta && "tool_calls" in delta && Array.isArray(delta.tool_calls)) {
			
 
				 					for (const toolCall of delta.tool_calls) {
			
 
				+						// kilocode_change start
			
 
				+						const toolCallId = this.getToolCallId(toolCall, toolCallIdsByIndex)
			
 
				+						activeToolCallIds.add(toolCallId)
			
 
				+						// kilocode_change end
			
 
				 						yield {
			
 
				 							type: "tool_call_partial",
			
 
				 							index: toolCall.index,
			
 
				-							id: toolCall.id,
			
 
				+							id: toolCallId,
			
 
				 							name: toolCall.function?.name,
			
 
				 							arguments: toolCall.function?.arguments,
			
 
				 						}
			
 
				 					}
			
 
				 				}
			
 
				+				// kilocode_change start
			
 
				+				if (finishReason === "tool_calls" && activeToolCallIds.size > 0) {
			
 
				+					for (const id of activeToolCallIds) {
			
 
				+						yield { type: "tool_call_end", id }
			
 
				+					}
			
 
				+					activeToolCallIds.clear()
			
 
				+				}
			
 
				+				// kilocode_change end
			
 
				 
			
 
				 				if (chunk.usage) {
			
 
				 					yield {
			
@@ -120,31 +174,61 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 			// For non-DeepSeek-R1 models, use standard OpenAI streaming
			
 
				 			const stream = await this.client.chat.completions.create(
			
 
				 				this.getCompletionParams(systemPrompt, messages, metadata),
			
 
				+				this.getRequestOptions(),
			
 
				 			)
			
 
				+			// kilocode_change start
			
 
				+			const activeToolCallIds = new Set<string>()
			
 
				+			const toolCallIdsByIndex = new Map<number, string>()
			
 
				+			// kilocode_change end
			
 
				 
			
 
				 			for await (const chunk of stream) {
			
 
				 				const delta = chunk.choices[0]?.delta
			
 
				+				// kilocode_change start
			
 
				+				const finishReason = chunk.choices[0]?.finish_reason
			
 
				+				// kilocode_change end
			
 
				 
			
 
				 				if (delta?.content) {
			
 
				 					yield { type: "text", text: delta.content }
			
 
				 				}
			
 
				 
			
 
				-				if (delta && "reasoning_content" in delta && delta.reasoning_content) {
			
 
				-					yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" }
			
 
				+				// kilocode_change start
			
 
				+				if (delta) {
			
 
				+					for (const key of ["reasoning_content", "reasoning"] as const) {
			
 
				+						if (key in delta) {
			
 
				+							const reasoningContent = ((delta as any)[key] as string | undefined) || ""
			
 
				+							if (reasoningContent.trim()) {
			
 
				+								yield { type: "reasoning", text: reasoningContent }
			
 
				+							}
			
 
				+							break
			
 
				+						}
			
 
				+					}
			
 
				 				}
			
 
				+				// kilocode_change end
			
 
				 
			
 
				 				// Emit raw tool call chunks - NativeToolCallParser handles state management
			
 
				 				if (delta && "tool_calls" in delta && Array.isArray(delta.tool_calls)) {
			
 
				 					for (const toolCall of delta.tool_calls) {
			
 
				+						// kilocode_change start
			
 
				+						const toolCallId = this.getToolCallId(toolCall, toolCallIdsByIndex)
			
 
				+						activeToolCallIds.add(toolCallId)
			
 
				+						// kilocode_change end
			
 
				 						yield {
			
 
				 							type: "tool_call_partial",
			
 
				 							index: toolCall.index,
			
 
				-							id: toolCall.id,
			
 
				+							id: toolCallId,
			
 
				 							name: toolCall.function?.name,
			
 
				 							arguments: toolCall.function?.arguments,
			
 
				 						}
			
 
				 					}
			
 
				 				}
			
 
				+				// kilocode_change start
			
 
				+				if (finishReason === "tool_calls" && activeToolCallIds.size > 0) {
			
 
				+					for (const id of activeToolCallIds) {
			
 
				+						yield { type: "tool_call_end", id }
			
 
				+					}
			
 
				+					activeToolCallIds.clear()
			
 
				+				}
			
 
				+				// kilocode_change end
			
 
				 
			
 
				 				if (chunk.usage) {
			
 
				 					yield {
			
@@ -184,7 +268,7 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 				requestParams.temperature = this.options.modelTemperature ?? defaultTemperature
			
 
				 			}
			
 
				 
			
 
				-			const response = await this.client.chat.completions.create(requestParams)
			
 
				+			const response = await this.client.chat.completions.create(requestParams, this.getRequestOptions())
			
 
				 			return response.choices[0]?.message.content || ""
			
 
				 		} catch (error) {
			
 
				 			if (error instanceof Error) {
			
@@ -196,12 +280,25 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 
			
 
				 	override getModel() {
			
 
				 		const model = super.getModel()
			
 
				-		const isDeepSeekR1 = model.id.includes("DeepSeek-R1")
			
 
				+		const configuredModelId = this.options.apiModelId
			
 
				+		// kilocode_change start
			
 
				+		// Keep explicit Chutes model IDs instead of silently switching to the provider default.
			
 
				+		// This prevents hidden model substitution when model lists are stale/unavailable.
			
 
				+		const shouldPreserveExplicitModelId =
			
 
				+			!!configuredModelId &&
			
 
				+			configuredModelId !== this.defaultModelId &&
			
 
				+			model.id === this.defaultModelId &&
			
 
				+			!this.models[configuredModelId]
			
 
				+
			
 
				+		const effectiveModelId = shouldPreserveExplicitModelId ? configuredModelId : model.id
			
 
				+		const baseInfo = shouldPreserveExplicitModelId ? this.defaultModelInfo : model.info
			
 
				+		// kilocode_change end
			
 
				+		const isDeepSeekR1 = effectiveModelId.includes("DeepSeek-R1")
			
 
				 
			
 
				 		return {
			
 
				-			...model,
			
 
				+			id: effectiveModelId,
			
 
				 			info: {
			
 
				-				...model.info,
			
 
				+				...baseInfo,
			
 
				 				temperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0.5,
			
 
				 			},
			
 
				 		}
			
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -40,6 +40,7 @@ import { getHuggingFaceModels } from "./huggingface"
 
				 import { getRooModels } from "./roo"
			
 
				 import { getChutesModels } from "./chutes"
			
 
				 import { getNanoGptModels } from "./nano-gpt" //kilocode_change
			
 
				+import { getZenmuxModels } from "./zenmux"
			
 
				 
			
 
				 const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
			
 
				 
			
@@ -75,7 +76,6 @@ async function fetchModelsFromProvider(options: GetModelsOptions): Promise<Model
 
				 	const { provider } = options
			
 
				 
			
 
				 	let models: ModelRecord
			
 
				-
			
 
				 	switch (provider) {
			
 
				 		case "openrouter":
			
 
				 			// kilocode_change start: base url and bearer token
			
@@ -85,6 +85,12 @@ async function fetchModelsFromProvider(options: GetModelsOptions): Promise<Model
 
				 			})
			
 
				 			// kilocode_change end
			
 
				 			break
			
 
				+		case "zenmux":
			
 
				+			models = await getZenmuxModels({
			
 
				+				openRouterBaseUrl: options.baseUrl || "https://zenmux.ai/api/v1",
			
 
				+				headers: options.apiKey ? { Authorization: `Bearer ${options.apiKey}` } : undefined,
			
 
				+			})
			
 
				+			break
			
 
				 		case "requesty":
			
 
				 			// Requesty models endpoint requires an API key for per-user custom policies.
			
 
				 			models = await getRequestyModels(options.baseUrl, options.apiKey)
			
--- a/src/api/providers/fetchers/zenmux.ts
+++ b/src/api/providers/fetchers/zenmux.ts
@@ -0,0 +1,72 @@
 
				+import { z } from "zod"
			
 
				+
			
 
				+import { type ModelInfo } from "@roo-code/types"
			
 
				+import type { ApiHandlerOptions } from "../../../shared/api"
			
 
				+import { DEFAULT_HEADERS } from "../constants"
			
 
				+import { parseApiPrice } from "../../../shared/cost"
			
 
				+
			
 
				+/**
			
 
				+ * ZenMuxModel
			
 
				+ */
			
 
				+const zenMuxModelSchema = z.object({
			
 
				+	id: z.string(),
			
 
				+	object: z.string(),
			
 
				+	created: z.number(),
			
 
				+	owned_by: z.string(),
			
 
				+})
			
 
				+
			
 
				+export type ZenMuxModel = z.infer<typeof zenMuxModelSchema>
			
 
				+
			
 
				+/**
			
 
				+ * ZenMuxModelsResponse
			
 
				+ */
			
 
				+const zenMuxModelsResponseSchema = z.object({
			
 
				+	data: z.array(zenMuxModelSchema),
			
 
				+	object: z.string(),
			
 
				+})
			
 
				+
			
 
				+/**
			
 
				+ * getZenmuxRouterModels
			
 
				+ */
			
 
				+export async function getZenmuxModels(
			
 
				+	options?: ApiHandlerOptions & { headers?: Record<string, string> },
			
 
				+): Promise<Record<string, ModelInfo>> {
			
 
				+	const models: Record<string, ModelInfo> = {}
			
 
				+	const baseURL = "https://zenmux.ai/api/v1"
			
 
				+	try {
			
 
				+		const response = await fetch(`${baseURL}/models`, {
			
 
				+			headers: { ...DEFAULT_HEADERS, ...(options?.headers ?? {}) },
			
 
				+		})
			
 
				+		const json = await response.json()
			
 
				+		const result = zenMuxModelsResponseSchema.safeParse(json)
			
 
				+
			
 
				+		if (!result.success) {
			
 
				+			throw new Error("ZenMux models response is invalid: " + JSON.stringify(result.error.format(), undefined, 2))
			
 
				+		}
			
 
				+
			
 
				+		const data = result.data.data
			
 
				+
			
 
				+		for (const model of data) {
			
 
				+			const { id, owned_by } = model
			
 
				+
			
 
				+			const modelInfo: ModelInfo = {
			
 
				+				maxTokens: 0,
			
 
				+				contextWindow: 0,
			
 
				+				supportsPromptCache: false,
			
 
				+				inputPrice: 0,
			
 
				+				outputPrice: 0,
			
 
				+				description: `${owned_by || "ZenMux"} model`,
			
 
				+				displayName: id,
			
 
				+			}
			
 
				+
			
 
				+			models[id] = modelInfo
			
 
				+		}
			
 
				+
			
 
				+		console.log(`Successfully fetched ${Object.keys(models).length} ZenMux models`)
			
 
				+	} catch (error) {
			
 
				+		console.error(`Error fetching ZenMux models: ${JSON.stringify(error, Object.getOwnPropertyNames(error), 2)}`)
			
 
				+		throw error
			
 
				+	}
			
 
				+
			
 
				+	return models
			
 
				+}
			
--- a/src/api/providers/index.ts
+++ b/src/api/providers/index.ts
@@ -25,6 +25,7 @@ export { OpenAiCompatibleResponsesHandler } from "./openai-responses" // kilocod
 
				 export { OpenAICompatibleHandler } from "./openai-compatible"
			
 
				 export type { OpenAICompatibleConfig } from "./openai-compatible"
			
 
				 export { OpenRouterHandler } from "./openrouter"
			
 
				+export { ZenMuxHandler } from "./zenmux" // kilocode_change
			
 
				 export { QwenCodeHandler } from "./qwen-code"
			
 
				 export { RequestyHandler } from "./requesty"
			
 
				 export { SambaNovaHandler } from "./sambanova"
			
--- a/src/api/providers/kilocode-openrouter.ts
+++ b/src/api/providers/kilocode-openrouter.ts
@@ -19,7 +19,7 @@ import {
 
				 	X_KILOCODE_MACHINEID,
			
 
				 } from "../../shared/kilocode/headers"
			
 
				 import { DEFAULT_HEADERS } from "./constants"
			
 
				-import { streamSse } from "../../services/continuedev/core/fetch/stream"
			
 
				+import { streamSse } from "../../services/ghost/continuedev/core/fetch/stream"
			
 
				 import { getEditorNameHeader } from "../../core/kilocode/wrapper"
			
 
				 import type { FimHandler } from "./kilocode/FimHandler"
			
 
				 import * as vscode from "vscode"
			
--- a/src/api/providers/mistral.ts
+++ b/src/api/providers/mistral.ts
@@ -20,7 +20,7 @@ import { handleProviderError } from "./utils/error-handler"
 
				 import { BaseProvider } from "./base-provider"
			
 
				 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
			
 
				 import { DEFAULT_HEADERS } from "./constants" // kilocode_change
			
 
				-import { streamSse } from "../../services/continuedev/core/fetch/stream" // kilocode_change
			
 
				+import { streamSse } from "../../services/ghost/continuedev/core/fetch/stream" // kilocode_change
			
 
				 import type { CompletionUsage } from "./openrouter" // kilocode_change
			
 
				 import type { FimHandler } from "./kilocode/FimHandler" // kilocode_change
			
 
				 
			
--- a/src/api/providers/moonshot.ts
+++ b/src/api/providers/moonshot.ts
@@ -7,6 +7,22 @@ import { getModelParams } from "../transform/model-params"
 
				 
			
 
				 import { OpenAICompatibleHandler, OpenAICompatibleConfig } from "./openai-compatible"
			
 
				 
			
 
				+// kilocode_change start
			
 
				+const STRICT_KIMI_TEMPERATURES = {
			
 
				+	"kimi-k2.5": {
			
 
				+		thinkingEnabled: 1.0,
			
 
				+		thinkingDisabled: moonshotModels["kimi-k2.5"].defaultTemperature ?? 0.6,
			
 
				+	},
			
 
				+	"kimi-for-coding": {
			
 
				+		thinkingEnabled: 1.0,
			
 
				+		thinkingDisabled: moonshotModels["kimi-for-coding"].defaultTemperature ?? 0.6,
			
 
				+	},
			
 
				+} as const
			
 
				+
			
 
				+type StrictKimiModelId = keyof typeof STRICT_KIMI_TEMPERATURES
			
 
				+const STRICT_KIMI_MODELS = new Set(Object.keys(STRICT_KIMI_TEMPERATURES))
			
 
				+// kilocode_change end
			
 
				+
			
 
				 export class MoonshotHandler extends OpenAICompatibleHandler {
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				 		const modelId = options.apiModelId ?? moonshotDefaultModelId
			
@@ -67,4 +83,73 @@ export class MoonshotHandler extends OpenAICompatibleHandler {
 
				 		// Moonshot always requires max_tokens
			
 
				 		return this.options.modelMaxTokens || modelInfo.maxTokens || undefined
			
 
				 	}
			
 
				+
			
 
				+	// kilocode_change start
			
 
				+	private isStrictKimiModel(modelId: string): boolean {
			
 
				+		return STRICT_KIMI_MODELS.has(modelId)
			
 
				+	}
			
 
				+
			
 
				+	private getStrictKimiTemperatureConfig(modelId: string) {
			
 
				+		if (!this.isStrictKimiModel(modelId)) {
			
 
				+			return undefined
			
 
				+		}
			
 
				+
			
 
				+		return STRICT_KIMI_TEMPERATURES[modelId as StrictKimiModelId]
			
 
				+	}
			
 
				+
			
 
				+	private isStrictKimiThinkingEnabled(): boolean {
			
 
				+		return this.options.enableReasoningEffort !== false
			
 
				+	}
			
 
				+
			
 
				+	protected override getRequestTemperature(model: { id: string; temperature?: number }): number | undefined {
			
 
				+		const strictTemperatureConfig = this.getStrictKimiTemperatureConfig(model.id)
			
 
				+		if (strictTemperatureConfig) {
			
 
				+			return this.isStrictKimiThinkingEnabled()
			
 
				+				? strictTemperatureConfig.thinkingEnabled
			
 
				+				: strictTemperatureConfig.thinkingDisabled
			
 
				+		}
			
 
				+
			
 
				+		return super.getRequestTemperature(model)
			
 
				+	}
			
 
				+
			
 
				+	protected override getProviderOptions(
			
 
				+		model: { id: string; info: ModelInfo },
			
 
				+		metadata?: Parameters<OpenAICompatibleHandler["getProviderOptions"]>[1],
			
 
				+	): ReturnType<OpenAICompatibleHandler["getProviderOptions"]> {
			
 
				+		const inheritedProviderOptions = super.getProviderOptions(model, metadata)
			
 
				+		const existingMoonshotOptions =
			
 
				+			inheritedProviderOptions?.moonshot &&
			
 
				+			typeof inheritedProviderOptions.moonshot === "object" &&
			
 
				+			!Array.isArray(inheritedProviderOptions.moonshot)
			
 
				+				? inheritedProviderOptions.moonshot
			
 
				+				: {}
			
 
				+		const moonshotOptions = {
			
 
				+			...existingMoonshotOptions,
			
 
				+			...(metadata?.taskId ? { prompt_cache_key: metadata.taskId } : {}),
			
 
				+		}
			
 
				+
			
 
				+		if (!this.isStrictKimiModel(model.id)) {
			
 
				+			if (Object.keys(moonshotOptions).length === 0) {
			
 
				+				return inheritedProviderOptions
			
 
				+			}
			
 
				+
			
 
				+			return {
			
 
				+				...inheritedProviderOptions,
			
 
				+				moonshot: moonshotOptions,
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		const thinking = {
			
 
				+			type: (this.isStrictKimiThinkingEnabled() ? "enabled" : "disabled") as "enabled" | "disabled",
			
 
				+		}
			
 
				+
			
 
				+		return {
			
 
				+			...inheritedProviderOptions,
			
 
				+			moonshot: {
			
 
				+				...moonshotOptions,
			
 
				+				thinking,
			
 
				+			},
			
 
				+		}
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				 }
			
--- a/src/api/providers/openai-compatible.ts
+++ b/src/api/providers/openai-compatible.ts
@@ -13,12 +13,14 @@ import type { ModelInfo } from "@roo-code/types"
 
				 import type { ApiHandlerOptions } from "../../shared/api"
			
 
				 
			
 
				 import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart } from "../transform/ai-sdk"
			
 
				-import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
			
 
				+import { ApiStream, ApiStreamChunk, ApiStreamUsageChunk } from "../transform/stream"
			
 
				 
			
 
				 import { DEFAULT_HEADERS } from "./constants"
			
 
				 import { BaseProvider } from "./base-provider"
			
 
				 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
			
 
				 
			
 
				+type StreamTextProviderOptions = Parameters<typeof streamText>[0]["providerOptions"]
			
 
				+
			
 
				 /**
			
 
				  * Configuration options for creating an OpenAI-compatible provider.
			
 
				  */
			
@@ -147,6 +149,27 @@ export abstract class OpenAICompatibleHandler extends BaseProvider implements Si
 
				 		return maxTokens ?? undefined
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	/**
			
 
				+	 * Get the temperature to use for a request.
			
 
				+	 * Subclasses can override this to enforce provider/model-specific behavior.
			
 
				+	 */
			
 
				+	protected getRequestTemperature(model: { temperature?: number }): number | undefined {
			
 
				+		return model.temperature ?? this.config.temperature ?? 0
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+	 * Get provider-specific AI SDK options.
			
 
				+	 * Subclasses can override this to pass provider-specific request fields.
			
 
				+	 */
			
 
				+	protected getProviderOptions(
			
 
				+		_model: { id: string; info: ModelInfo },
			
 
				+		_metadata?: ApiHandlerCreateMessageMetadata,
			
 
				+	): StreamTextProviderOptions {
			
 
				+		return undefined
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	/**
			
 
				 	 * Create a message stream using the AI SDK.
			
 
				 	 */
			
@@ -170,23 +193,97 @@ export abstract class OpenAICompatibleHandler extends BaseProvider implements Si
 
				 			model: languageModel,
			
 
				 			system: systemPrompt,
			
 
				 			messages: aiSdkMessages,
			
 
				-			temperature: model.temperature ?? this.config.temperature ?? 0,
			
 
				+			temperature: this.getRequestTemperature(model),
			
 
				 			maxOutputTokens: this.getMaxOutputTokens(),
			
 
				 			tools: aiSdkTools,
			
 
				 			toolChoice: this.mapToolChoice(metadata?.tool_choice),
			
 
				+			// kilocode_change
			
 
				+			providerOptions: this.getProviderOptions(model, metadata),
			
 
				 		}
			
 
				 
			
 
				 		// Use streamText for streaming responses
			
 
				 		const result = streamText(requestOptions)
			
 
				 
			
 
				+		// kilocode_change start
			
 
				+		// Moonshot/Kimi can stream tool calls as tool-input-* events without a final tool-call event.
			
 
				+		// Accumulate these events and emit a complete tool_call chunk so Task can execute tools reliably.
			
 
				+		const pendingToolInputs = new Map<string, { toolName: string; input: string }>()
			
 
				+		const emittedToolCallIds = new Set<string>()
			
 
				+
			
 
				+		const emitToolCallFromPendingInput = (toolCallId: string): ApiStreamChunk | undefined => {
			
 
				+			if (emittedToolCallIds.has(toolCallId)) {
			
 
				+				pendingToolInputs.delete(toolCallId)
			
 
				+				return undefined
			
 
				+			}
			
 
				+
			
 
				+			const pending = pendingToolInputs.get(toolCallId)
			
 
				+			pendingToolInputs.delete(toolCallId)
			
 
				+
			
 
				+			emittedToolCallIds.add(toolCallId)
			
 
				+
			
 
				+			return {
			
 
				+				type: "tool_call",
			
 
				+				id: toolCallId,
			
 
				+				name: pending?.toolName || "unknown_tool",
			
 
				+				arguments: pending?.input || "{}",
			
 
				+			}
			
 
				+		}
			
 
				+		// kilocode_change end
			
 
				+
			
 
				 		// Process the full stream to get all events
			
 
				 		for await (const part of result.fullStream) {
			
 
				+			// kilocode_change start
			
 
				+			if (part.type === "tool-input-start") {
			
 
				+				const existing = pendingToolInputs.get(part.id)
			
 
				+				pendingToolInputs.set(part.id, {
			
 
				+					toolName: part.toolName || existing?.toolName || "unknown_tool",
			
 
				+					input: existing?.input || "",
			
 
				+				})
			
 
				+				continue
			
 
				+			}
			
 
				+
			
 
				+			if (part.type === "tool-input-delta") {
			
 
				+				const existing = pendingToolInputs.get(part.id)
			
 
				+				pendingToolInputs.set(part.id, {
			
 
				+					toolName: existing?.toolName || "unknown_tool",
			
 
				+					input: (existing?.input || "") + part.delta,
			
 
				+				})
			
 
				+				continue
			
 
				+			}
			
 
				+
			
 
				+			if (part.type === "tool-input-end") {
			
 
				+				const toolCallChunk = emitToolCallFromPendingInput(part.id)
			
 
				+				if (toolCallChunk) {
			
 
				+					yield toolCallChunk
			
 
				+				}
			
 
				+				continue
			
 
				+			}
			
 
				+
			
 
				+			if (part.type === "tool-call") {
			
 
				+				if (emittedToolCallIds.has(part.toolCallId)) {
			
 
				+					continue
			
 
				+				}
			
 
				+				emittedToolCallIds.add(part.toolCallId)
			
 
				+				pendingToolInputs.delete(part.toolCallId)
			
 
				+			}
			
 
				+			// kilocode_change end
			
 
				+
			
 
				 			// Use the processAiSdkStreamPart utility to convert stream parts
			
 
				 			for (const chunk of processAiSdkStreamPart(part)) {
			
 
				 				yield chunk
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				+		// kilocode_change start
			
 
				+		// Flush any unfinished tool-input streams at end-of-stream.
			
 
				+		for (const toolCallId of pendingToolInputs.keys()) {
			
 
				+			const toolCallChunk = emitToolCallFromPendingInput(toolCallId)
			
 
				+			if (toolCallChunk) {
			
 
				+				yield toolCallChunk
			
 
				+			}
			
 
				+		}
			
 
				+		// kilocode_change end
			
 
				+
			
 
				 		// Yield usage metrics at the end
			
 
				 		const usage = await result.usage
			
 
				 		if (usage) {
			
@@ -199,12 +296,15 @@ export abstract class OpenAICompatibleHandler extends BaseProvider implements Si
 
				 	 */
			
 
				 	async completePrompt(prompt: string): Promise<string> {
			
 
				 		const languageModel = this.getLanguageModel()
			
 
				+		const model = this.getModel()
			
 
				 
			
 
				 		const { text } = await generateText({
			
 
				 			model: languageModel,
			
 
				 			prompt,
			
 
				 			maxOutputTokens: this.getMaxOutputTokens(),
			
 
				-			temperature: this.config.temperature ?? 0,
			
 
				+			temperature: this.getRequestTemperature(model),
			
 
				+			// kilocode_change
			
 
				+			providerOptions: this.getProviderOptions(model),
			
 
				 		})
			
 
				 
			
 
				 		return text
			
--- a/src/api/providers/zenmux.ts
+++ b/src/api/providers/zenmux.ts
@@ -0,0 +1,501 @@
 
				+// kilocode_change - new file
			
 
				+import OpenAI from "openai"
			
 
				+import type Anthropic from "@anthropic-ai/sdk"
			
 
				+import type { ModelInfo } from "@roo-code/types"
			
 
				+import { zenmuxDefaultModelId, zenmuxDefaultModelInfo } from "@roo-code/types"
			
 
				+import { ApiProviderError } from "@roo-code/types"
			
 
				+import { TelemetryService } from "@roo-code/telemetry"
			
 
				+
			
 
				+import { ApiHandlerOptions, ModelRecord } from "../../shared/api"
			
 
				+
			
 
				+import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transform/caching/anthropic"
			
 
				+import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini"
			
 
				+import type { OpenRouterReasoningParams } from "../transform/reasoning"
			
 
				+import { getModelParams } from "../transform/model-params"
			
 
				+
			
 
				+import { getModels } from "./fetchers/modelCache"
			
 
				+
			
 
				+import { DEFAULT_HEADERS } from "./constants"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				+import { verifyFinishReason } from "./kilocode/verifyFinishReason"
			
 
				+
			
 
				+import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from "../index"
			
 
				+import { ChatCompletionTool } from "openai/resources"
			
 
				+import { convertToOpenAiMessages } from "../transform/openai-format"
			
 
				+import { convertToR1Format } from "../transform/r1-format"
			
 
				+import { resolveToolProtocol } from "../../utils/resolveToolProtocol"
			
 
				+import { TOOL_PROTOCOL } from "@roo-code/types"
			
 
				+import { ApiStreamChunk } from "../transform/stream"
			
 
				+import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCallParser"
			
 
				+import { KiloCodeChunkSchema } from "./kilocode/chunk-schema"
			
 
				+
			
 
				+// ZenMux provider parameters
			
 
				+type ZenMuxProviderParams = {
			
 
				+	order?: string[]
			
 
				+	only?: string[]
			
 
				+	allow_fallbacks?: boolean
			
 
				+	data_collection?: "allow" | "deny"
			
 
				+	sort?: "price" | "throughput" | "latency"
			
 
				+	zdr?: boolean
			
 
				+}
			
 
				+
			
 
				+// ZenMux-specific response types
			
 
				+type ZenMuxChatCompletionParams = Omit<OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming, "model"> & {
			
 
				+	model: string
			
 
				+	provider?: ZenMuxProviderParams
			
 
				+	reasoning?: OpenRouterReasoningParams
			
 
				+}
			
 
				+
			
 
				+// ZenMux error structure
			
 
				+interface ZenMuxErrorResponse {
			
 
				+	message?: string
			
 
				+	code?: number
			
 
				+	metadata?: { raw?: string }
			
 
				+}
			
 
				+
			
 
				+// Usage interface for cost calculation
			
 
				+interface CompletionUsage {
			
 
				+	completion_tokens?: number
			
 
				+	completion_tokens_details?: {
			
 
				+		reasoning_tokens?: number
			
 
				+	}
			
 
				+	prompt_tokens?: number
			
 
				+	prompt_tokens_details?: {
			
 
				+		cached_tokens?: number
			
 
				+	}
			
 
				+	total_tokens?: number
			
 
				+	cost?: number
			
 
				+	cost_details?: {
			
 
				+		upstream_inference_cost?: number
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3
			
 
				+
			
 
				+export class ZenMuxHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				+	private client: OpenAI
			
 
				+	protected models: ModelRecord = {}
			
 
				+	protected endpoints: ModelRecord = {}
			
 
				+	lastGenerationId?: string
			
 
				+
			
 
				+	protected get providerName(): "ZenMux" {
			
 
				+		return "ZenMux" as const
			
 
				+	}
			
 
				+
			
 
				+	private currentReasoningDetails: any[] = []
			
 
				+
			
 
				+	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				+		this.options = options
			
 
				+
			
 
				+		const baseURL = this.options.zenmuxBaseUrl || "https://zenmux.ai/api/v1"
			
 
				+		const apiKey = this.options.zenmuxApiKey ?? "not-provided"
			
 
				+
			
 
				+		this.client = new OpenAI({
			
 
				+			baseURL: baseURL,
			
 
				+			apiKey: apiKey,
			
 
				+			defaultHeaders: DEFAULT_HEADERS,
			
 
				+		})
			
 
				+
			
 
				+		// Load models asynchronously to populate cache before getModel() is called
			
 
				+		this.loadDynamicModels().catch((error) => {
			
 
				+			console.error("[ZenMuxHandler] Failed to load dynamic models:", error)
			
 
				+		})
			
 
				+	}
			
 
				+
			
 
				+	private async loadDynamicModels(): Promise<void> {
			
 
				+		try {
			
 
				+			const models = await getModels({ provider: "zenmux" })
			
 
				+			this.models = models
			
 
				+		} catch (error) {
			
 
				+			console.error("[ZenMuxHandler] Error loading dynamic models:", {
			
 
				+				error: error instanceof Error ? error.message : String(error),
			
 
				+				stack: error instanceof Error ? error.stack : undefined,
			
 
				+			})
			
 
				+		}
			
 
				+	}
			
 
				+	async createZenMuxStream(
			
 
				+		client: OpenAI,
			
 
				+		systemPrompt: string,
			
 
				+		messages: Anthropic.Messages.MessageParam[],
			
 
				+		model: { id: string; info: ModelInfo },
			
 
				+		_reasoningEffort?: string,
			
 
				+		thinkingBudgetTokens?: number,
			
 
				+		zenMuxProviderSorting?: string,
			
 
				+		tools?: Array<ChatCompletionTool>,
			
 
				+		_geminiThinkingLevel?: string,
			
 
				+	) {
			
 
				+		// Convert Anthropic messages to OpenAI format
			
 
				+		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
			
 
				+			{ role: "system", content: systemPrompt },
			
 
				+			...convertToOpenAiMessages(messages),
			
 
				+		]
			
 
				+
			
 
				+		// Build reasoning config if thinking budget is set
			
 
				+		let reasoning: { max_tokens: number } | undefined
			
 
				+		if (thinkingBudgetTokens && thinkingBudgetTokens > 0) {
			
 
				+			reasoning = { max_tokens: thinkingBudgetTokens }
			
 
				+		}
			
 
				+
			
 
				+		// @ts-ignore-next-line
			
 
				+		const stream = await client.chat.completions.create({
			
 
				+			model: model.id,
			
 
				+			messages: openAiMessages,
			
 
				+			stream: true,
			
 
				+			stream_options: { include_usage: true },
			
 
				+			...(reasoning ? { reasoning } : {}),
			
 
				+			...(zenMuxProviderSorting && zenMuxProviderSorting !== ""
			
 
				+				? {
			
 
				+						provider: {
			
 
				+							routing: {
			
 
				+								type: "priority",
			
 
				+								primary_factor: zenMuxProviderSorting,
			
 
				+							},
			
 
				+						},
			
 
				+					}
			
 
				+				: {}),
			
 
				+			...this.getOpenAIToolParams(tools),
			
 
				+		})
			
 
				+
			
 
				+		return stream
			
 
				+	}
			
 
				+	getOpenAIToolParams(tools?: ChatCompletionTool[], enableParallelToolCalls: boolean = false) {
			
 
				+		return tools?.length
			
 
				+			? {
			
 
				+					tools,
			
 
				+					tool_choice: tools ? "auto" : undefined,
			
 
				+					parallel_tool_calls: enableParallelToolCalls ? true : false,
			
 
				+				}
			
 
				+			: {
			
 
				+					tools: undefined,
			
 
				+				}
			
 
				+	}
			
 
				+
			
 
				+	getTotalCost(lastUsage: CompletionUsage): number {
			
 
				+		return (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0)
			
 
				+	}
			
 
				+
			
 
				+	private handleStreamingError(error: ZenMuxErrorResponse, modelId: string, operation: string): never {
			
 
				+		const rawErrorMessage = error?.metadata?.raw || error?.message
			
 
				+
			
 
				+		const apiError = Object.assign(
			
 
				+			new ApiProviderError(
			
 
				+				rawErrorMessage ?? "Unknown error",
			
 
				+				this.providerName,
			
 
				+				modelId,
			
 
				+				operation,
			
 
				+				error?.code,
			
 
				+			),
			
 
				+			{ status: error?.code, error: { message: error?.message, metadata: error?.metadata } },
			
 
				+		)
			
 
				+
			
 
				+		TelemetryService.instance.captureException(apiError)
			
 
				+
			
 
				+		throw new Error(`ZenMux API Error ${error?.code}: ${rawErrorMessage}`)
			
 
				+	}
			
 
				+	async *createMessage(
			
 
				+		systemPrompt: string,
			
 
				+		messages: Anthropic.Messages.MessageParam[],
			
 
				+		metadata?: ApiHandlerCreateMessageMetadata,
			
 
				+	): AsyncGenerator<ApiStreamChunk> {
			
 
				+		this.lastGenerationId = undefined
			
 
				+		const model = await this.fetchModel()
			
 
				+
			
 
				+		let { id: modelId } = model
			
 
				+
			
 
				+		// Reset reasoning_details accumulator for this request
			
 
				+		this.currentReasoningDetails = []
			
 
				+
			
 
				+		// Convert Anthropic messages to OpenAI format.
			
 
				+		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
			
 
				+			{ role: "system", content: systemPrompt },
			
 
				+			...convertToOpenAiMessages(messages),
			
 
				+		]
			
 
				+
			
 
				+		// DeepSeek highly recommends using user instead of system role.
			
 
				+		if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") {
			
 
				+			openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
			
 
				+		}
			
 
				+
			
 
				+		// Process reasoning_details when switching models to Gemini for native tool call compatibility
			
 
				+		const toolProtocol = resolveToolProtocol(this.options, model.info)
			
 
				+		const isNativeProtocol = toolProtocol === TOOL_PROTOCOL.NATIVE
			
 
				+		const isGemini = modelId.startsWith("google/gemini")
			
 
				+
			
 
				+		// For Gemini with native protocol: inject fake reasoning.encrypted blocks for tool calls
			
 
				+		// This is required when switching from other models to Gemini to satisfy API validation
			
 
				+		if (isNativeProtocol && isGemini) {
			
 
				+			openAiMessages = openAiMessages.map((msg) => {
			
 
				+				if (msg.role === "assistant") {
			
 
				+					const toolCalls = (msg as any).tool_calls as any[] | undefined
			
 
				+					const existingDetails = (msg as any).reasoning_details as any[] | undefined
			
 
				+
			
 
				+					// Only inject if there are tool calls and no existing encrypted reasoning
			
 
				+					if (toolCalls && toolCalls.length > 0) {
			
 
				+						const hasEncrypted = existingDetails?.some((d) => d.type === "reasoning.encrypted") ?? false
			
 
				+
			
 
				+						if (!hasEncrypted) {
			
 
				+							const fakeEncrypted = toolCalls.map((tc, idx) => ({
			
 
				+								id: tc.id,
			
 
				+								type: "reasoning.encrypted",
			
 
				+								data: "skip_thought_signature_validator",
			
 
				+								format: "google-gemini-v1",
			
 
				+								index: (existingDetails?.length ?? 0) + idx,
			
 
				+							}))
			
 
				+
			
 
				+							return {
			
 
				+								...msg,
			
 
				+								reasoning_details: [...(existingDetails ?? []), ...fakeEncrypted],
			
 
				+							}
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+				return msg
			
 
				+			})
			
 
				+		}
			
 
				+
			
 
				+		// Add cache breakpoints for supported models
			
 
				+		if (modelId.startsWith("anthropic/claude") || modelId.startsWith("google/gemini")) {
			
 
				+			if (modelId.startsWith("google")) {
			
 
				+				addGeminiCacheBreakpoints(systemPrompt, openAiMessages)
			
 
				+			} else {
			
 
				+				addAnthropicCacheBreakpoints(systemPrompt, openAiMessages)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		let stream
			
 
				+		try {
			
 
				+			stream = await this.createZenMuxStream(
			
 
				+				this.client,
			
 
				+				systemPrompt,
			
 
				+				messages,
			
 
				+				model,
			
 
				+				this.options.reasoningEffort,
			
 
				+				this.options.modelMaxThinkingTokens,
			
 
				+				this.options.zenmuxProviderSort,
			
 
				+				metadata?.tools,
			
 
				+			)
			
 
				+		} catch (error) {
			
 
				+			const errorMessage = error instanceof Error ? error.message : String(error)
			
 
				+			const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage")
			
 
				+			TelemetryService.instance.captureException(apiError)
			
 
				+			throw error
			
 
				+		}
			
 
				+
			
 
				+		let lastUsage: CompletionUsage | undefined = undefined
			
 
				+		let inferenceProvider: string | undefined
			
 
				+		// Accumulator for reasoning_details: accumulate text by type-index key
			
 
				+		const reasoningDetailsAccumulator = new Map<
			
 
				+			string,
			
 
				+			{
			
 
				+				type: string
			
 
				+				text?: string
			
 
				+				summary?: string
			
 
				+				data?: string
			
 
				+				id?: string | null
			
 
				+				format?: string
			
 
				+				signature?: string
			
 
				+				index: number
			
 
				+			}
			
 
				+		>()
			
 
				+
			
 
				+		for await (const chunk of stream) {
			
 
				+			// Handle ZenMux streaming error response
			
 
				+			if ("error" in chunk) {
			
 
				+				this.handleStreamingError(chunk.error as ZenMuxErrorResponse, modelId, "createMessage")
			
 
				+			}
			
 
				+
			
 
				+			const kiloCodeChunk = KiloCodeChunkSchema.safeParse(chunk).data
			
 
				+			inferenceProvider =
			
 
				+				kiloCodeChunk?.choices?.[0]?.delta?.provider_metadata?.gateway?.routing?.resolvedProvider ??
			
 
				+				kiloCodeChunk?.provider ??
			
 
				+				inferenceProvider
			
 
				+
			
 
				+			verifyFinishReason(chunk.choices[0])
			
 
				+			const delta = chunk.choices[0]?.delta
			
 
				+			const finishReason = chunk.choices[0]?.finish_reason
			
 
				+
			
 
				+			if (delta) {
			
 
				+				// Handle reasoning_details array format
			
 
				+				const deltaWithReasoning = delta as typeof delta & {
			
 
				+					reasoning_details?: Array<{
			
 
				+						type: string
			
 
				+						text?: string
			
 
				+						summary?: string
			
 
				+						data?: string
			
 
				+						id?: string | null
			
 
				+						format?: string
			
 
				+						signature?: string
			
 
				+						index?: number
			
 
				+					}>
			
 
				+				}
			
 
				+
			
 
				+				if (deltaWithReasoning.reasoning_details && Array.isArray(deltaWithReasoning.reasoning_details)) {
			
 
				+					for (const detail of deltaWithReasoning.reasoning_details) {
			
 
				+						const index = detail.index ?? 0
			
 
				+						const key = `${detail.type}-${index}`
			
 
				+						const existing = reasoningDetailsAccumulator.get(key)
			
 
				+
			
 
				+						if (existing) {
			
 
				+							// Accumulate text/summary/data for existing reasoning detail
			
 
				+							if (detail.text !== undefined) {
			
 
				+								existing.text = (existing.text || "") + detail.text
			
 
				+							}
			
 
				+							if (detail.summary !== undefined) {
			
 
				+								existing.summary = (existing.summary || "") + detail.summary
			
 
				+							}
			
 
				+							if (detail.data !== undefined) {
			
 
				+								existing.data = (existing.data || "") + detail.data
			
 
				+							}
			
 
				+							// Update other fields if provided
			
 
				+							if (detail.id !== undefined) existing.id = detail.id
			
 
				+							if (detail.format !== undefined) existing.format = detail.format
			
 
				+							if (detail.signature !== undefined) existing.signature = detail.signature
			
 
				+						} else {
			
 
				+							// Start new reasoning detail accumulation
			
 
				+							reasoningDetailsAccumulator.set(key, {
			
 
				+								type: detail.type,
			
 
				+								text: detail.text,
			
 
				+								summary: detail.summary,
			
 
				+								data: detail.data,
			
 
				+								id: detail.id,
			
 
				+								format: detail.format,
			
 
				+								signature: detail.signature,
			
 
				+								index,
			
 
				+							})
			
 
				+						}
			
 
				+
			
 
				+						// Yield text for display (still fragmented for live streaming)
			
 
				+						let reasoningText: string | undefined
			
 
				+						if (detail.type === "reasoning.text" && typeof detail.text === "string") {
			
 
				+							reasoningText = detail.text
			
 
				+						} else if (detail.type === "reasoning.summary" && typeof detail.summary === "string") {
			
 
				+							reasoningText = detail.summary
			
 
				+						}
			
 
				+
			
 
				+						if (reasoningText) {
			
 
				+							yield { type: "reasoning", text: reasoningText } as ApiStreamChunk
			
 
				+						}
			
 
				+					}
			
 
				+				} else if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
			
 
				+					// Handle legacy reasoning format
			
 
				+					yield { type: "reasoning", text: delta.reasoning } as ApiStreamChunk
			
 
				+				}
			
 
				+
			
 
				+				if (delta && "reasoning_content" in delta && typeof delta.reasoning_content === "string") {
			
 
				+					yield { type: "reasoning", text: delta.reasoning_content } as ApiStreamChunk
			
 
				+				}
			
 
				+
			
 
				+				// Check for tool calls in delta
			
 
				+				if ("tool_calls" in delta && Array.isArray(delta.tool_calls)) {
			
 
				+					for (const toolCall of delta.tool_calls) {
			
 
				+						yield {
			
 
				+							type: "tool_call_partial",
			
 
				+							index: toolCall.index,
			
 
				+							id: toolCall.id,
			
 
				+							name: toolCall.function?.name,
			
 
				+							arguments: toolCall.function?.arguments,
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				if (delta.content) {
			
 
				+					yield { type: "text", text: delta.content }
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			// Process finish_reason to emit tool_call_end events
			
 
				+			if (finishReason) {
			
 
				+				const endEvents = NativeToolCallParser.processFinishReason(finishReason)
			
 
				+				for (const event of endEvents) {
			
 
				+					yield event
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (chunk.usage) {
			
 
				+				lastUsage = chunk.usage
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// After streaming completes, store the accumulated reasoning_details
			
 
				+		if (reasoningDetailsAccumulator.size > 0) {
			
 
				+			this.currentReasoningDetails = Array.from(reasoningDetailsAccumulator.values())
			
 
				+		}
			
 
				+
			
 
				+		if (lastUsage) {
			
 
				+			yield {
			
 
				+				type: "usage",
			
 
				+				inputTokens: lastUsage.prompt_tokens || 0,
			
 
				+				outputTokens: lastUsage.completion_tokens || 0,
			
 
				+				cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,
			
 
				+				reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens,
			
 
				+				totalCost: this.getTotalCost(lastUsage),
			
 
				+				inferenceProvider,
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	getReasoningDetails(): any[] | undefined {
			
 
				+		return this.currentReasoningDetails.length > 0 ? this.currentReasoningDetails : undefined
			
 
				+	}
			
 
				+	public async fetchModel() {
			
 
				+		const models = await getModels({ provider: "zenmux" })
			
 
				+		this.models = models
			
 
				+		return this.getModel()
			
 
				+	}
			
 
				+
			
 
				+	override getModel() {
			
 
				+		const id = this.options.zenmuxModelId ?? zenmuxDefaultModelId
			
 
				+		let info = this.models[id] ?? zenmuxDefaultModelInfo
			
 
				+
			
 
				+		const isDeepSeekR1 = id.startsWith("deepseek/deepseek-r1") || id === "perplexity/sonar-reasoning"
			
 
				+
			
 
				+		const params = getModelParams({
			
 
				+			format: "zenmux",
			
 
				+			modelId: id,
			
 
				+			model: info,
			
 
				+			settings: this.options,
			
 
				+			defaultTemperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0,
			
 
				+		})
			
 
				+
			
 
				+		return { id, info, topP: isDeepSeekR1 ? 0.95 : undefined, ...params }
			
 
				+	}
			
 
				+
			
 
				+	async completePrompt(prompt: string) {
			
 
				+		let { id: modelId, maxTokens, temperature, reasoning, verbosity } = await this.fetchModel()
			
 
				+
			
 
				+		// ZenMux `verbosity` supports "low" | "medium" | "high" (and sometimes null),
			
 
				+		// while our shared model params may include "max". Map "max" to the closest
			
 
				+		// supported value to satisfy the API/SDK typing.
			
 
				+		const zenMuxVerbosity: "low" | "medium" | "high" | null | undefined = verbosity === "max" ? "high" : verbosity
			
 
				+
			
 
				+		const completionParams: ZenMuxChatCompletionParams = {
			
 
				+			model: modelId,
			
 
				+			max_tokens: maxTokens,
			
 
				+			temperature,
			
 
				+			messages: [{ role: "user", content: prompt }],
			
 
				+			stream: false,
			
 
				+			...(reasoning && { reasoning }),
			
 
				+			verbosity: zenMuxVerbosity,
			
 
				+		}
			
 
				+
			
 
				+		let response
			
 
				+
			
 
				+		try {
			
 
				+			response = await this.client.chat.completions.create(completionParams)
			
 
				+		} catch (error) {
			
 
				+			const errorMessage = error instanceof Error ? error.message : String(error)
			
 
				+			const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "completePrompt")
			
 
				+			TelemetryService.instance.captureException(apiError)
			
 
				+			throw error
			
 
				+		}
			
 
				+
			
 
				+		if ("error" in response) {
			
 
				+			this.handleStreamingError(response.error as ZenMuxErrorResponse, modelId, "completePrompt")
			
 
				+		}
			
 
				+
			
 
				+		const completion = response as OpenAI.Chat.ChatCompletion
			
 
				+		return completion.choices[0]?.message?.content || ""
			
 
				+	}
			
 
				+}
			
--- a/src/api/transform/__tests__/ai-sdk.spec.ts
+++ b/src/api/transform/__tests__/ai-sdk.spec.ts
@@ -284,6 +284,139 @@ describe("AI SDK conversion utilities", () => {
 
				 			})
			
 
				 		})
			
 
				 
			
 
				+		// kilocode_change start
			
 
				+		it("preserves assistant text/tool-call/text ordering", () => {
			
 
				+			const messages: Anthropic.Messages.MessageParam[] = [
			
 
				+				{
			
 
				+					role: "assistant",
			
 
				+					content: [
			
 
				+						{ type: "text", text: "Before tool call" },
			
 
				+						{
			
 
				+							type: "tool_use",
			
 
				+							id: "call_789",
			
 
				+							name: "read_file",
			
 
				+							input: { path: "before.ts" },
			
 
				+						},
			
 
				+						{ type: "text", text: "After tool call" },
			
 
				+					],
			
 
				+				},
			
 
				+			]
			
 
				+
			
 
				+			const result = convertToAiSdkMessages(messages)
			
 
				+
			
 
				+			expect(result).toHaveLength(1)
			
 
				+			expect(result[0]).toEqual({
			
 
				+				role: "assistant",
			
 
				+				content: [
			
 
				+					{ type: "text", text: "Before tool call" },
			
 
				+					{
			
 
				+						type: "tool-call",
			
 
				+						toolCallId: "call_789",
			
 
				+						toolName: "read_file",
			
 
				+						input: { path: "before.ts" },
			
 
				+					},
			
 
				+					{ type: "text", text: "After tool call" },
			
 
				+				],
			
 
				+			})
			
 
				+		})
			
 
				+
			
 
				+		it("preserves user text before tool results", () => {
			
 
				+			const messages: Anthropic.Messages.MessageParam[] = [
			
 
				+				{
			
 
				+					role: "assistant",
			
 
				+					content: [
			
 
				+						{
			
 
				+							type: "tool_use",
			
 
				+							id: "call_999",
			
 
				+							name: "read_file",
			
 
				+							input: { path: "ordered.ts" },
			
 
				+						},
			
 
				+					],
			
 
				+				},
			
 
				+				{
			
 
				+					role: "user",
			
 
				+					content: [
			
 
				+						{ type: "text", text: "Context before tool result" },
			
 
				+						{
			
 
				+							type: "tool_result",
			
 
				+							tool_use_id: "call_999",
			
 
				+							content: "ordered-result",
			
 
				+						},
			
 
				+					],
			
 
				+				},
			
 
				+			]
			
 
				+
			
 
				+			const result = convertToAiSdkMessages(messages)
			
 
				+
			
 
				+			expect(result).toHaveLength(3)
			
 
				+			expect(result[0]).toEqual({
			
 
				+				role: "assistant",
			
 
				+				content: [
			
 
				+					{
			
 
				+						type: "tool-call",
			
 
				+						toolCallId: "call_999",
			
 
				+						toolName: "read_file",
			
 
				+						input: { path: "ordered.ts" },
			
 
				+					},
			
 
				+				],
			
 
				+			})
			
 
				+			expect(result[1]).toEqual({
			
 
				+				role: "user",
			
 
				+				content: [{ type: "text", text: "Context before tool result" }],
			
 
				+			})
			
 
				+			expect(result[2]).toEqual({
			
 
				+				role: "tool",
			
 
				+				content: [
			
 
				+					{
			
 
				+						type: "tool-result",
			
 
				+						toolCallId: "call_999",
			
 
				+						toolName: "read_file",
			
 
				+						output: { type: "text", value: "ordered-result" },
			
 
				+					},
			
 
				+				],
			
 
				+			})
			
 
				+		})
			
 
				+
			
 
				+		it("preserves assistant reasoning blocks via openaiCompatible metadata", () => {
			
 
				+			const messages = [
			
 
				+				{
			
 
				+					role: "assistant",
			
 
				+					content: [
			
 
				+						{ type: "reasoning", text: "Step 1 reasoning" },
			
 
				+						{ type: "text", text: "I will call a tool" },
			
 
				+						{
			
 
				+							type: "tool_use",
			
 
				+							id: "call_reasoning",
			
 
				+							name: "read_file",
			
 
				+							input: { path: "reasoning.ts" },
			
 
				+						},
			
 
				+					],
			
 
				+				},
			
 
				+			] as Anthropic.Messages.MessageParam[]
			
 
				+
			
 
				+			const result = convertToAiSdkMessages(messages as any)
			
 
				+
			
 
				+			expect(result).toHaveLength(1)
			
 
				+			expect(result[0]).toMatchObject({
			
 
				+				role: "assistant",
			
 
				+				content: [
			
 
				+					{ type: "text", text: "I will call a tool" },
			
 
				+					{
			
 
				+						type: "tool-call",
			
 
				+						toolCallId: "call_reasoning",
			
 
				+						toolName: "read_file",
			
 
				+						input: { path: "reasoning.ts" },
			
 
				+					},
			
 
				+				],
			
 
				+				providerOptions: {
			
 
				+					openaiCompatible: {
			
 
				+						reasoning_content: "Step 1 reasoning",
			
 
				+					},
			
 
				+				},
			
 
				+			})
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				+
			
 
				 		it("handles empty assistant content", () => {
			
 
				 			const messages: Anthropic.Messages.MessageParam[] = [
			
 
				 				{
			
--- a/src/api/transform/__tests__/model-params.spec.ts
+++ b/src/api/transform/__tests__/model-params.spec.ts
@@ -684,6 +684,42 @@ describe("getModelParams", () => {
 
				 		})
			
 
				 	})
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	describe("Adaptive thinking models", () => {
			
 
				+		it("should default to thinking temperature when adaptive thinking is enabled and unset", () => {
			
 
				+			const model: ModelInfo = {
			
 
				+				...baseModel,
			
 
				+				supportsAdaptiveThinking: true,
			
 
				+				defaultTemperature: 0.6,
			
 
				+			}
			
 
				+
			
 
				+			const result = getModelParams({
			
 
				+				...openaiParams,
			
 
				+				settings: {},
			
 
				+				model,
			
 
				+			})
			
 
				+
			
 
				+			expect(result.temperature).toBe(1.0)
			
 
				+		})
			
 
				+
			
 
				+		it("should use default non-thinking temperature when adaptive thinking is explicitly disabled", () => {
			
 
				+			const model: ModelInfo = {
			
 
				+				...baseModel,
			
 
				+				supportsAdaptiveThinking: true,
			
 
				+				defaultTemperature: 0.6,
			
 
				+			}
			
 
				+
			
 
				+			const result = getModelParams({
			
 
				+				...openaiParams,
			
 
				+				settings: { enableReasoningEffort: false },
			
 
				+				model,
			
 
				+			})
			
 
				+
			
 
				+			expect(result.temperature).toBe(0.6)
			
 
				+		})
			
 
				+	})
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	describe("Hybrid reasoning models (supportsReasoningEffort)", () => {
			
 
				 		const model: ModelInfo = {
			
 
				 			...baseModel,
			
--- a/src/api/transform/__tests__/reasoning.spec.ts
+++ b/src/api/transform/__tests__/reasoning.spec.ts
@@ -838,6 +838,65 @@ describe("reasoning.ts", () => {
 
				 			const result = getGeminiReasoning(options) as GeminiReasoningParams | undefined
			
 
				 			expect(result).toEqual({ thinkingLevel: "medium", includeThoughts: true })
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should return undefined for budget-only models when budget is not enabled (fixes issue #4490)", () => {
			
 
				+			// This test covers the bug where gemini-2.5-flash would fail with
			
 
				+			// "Thinking level is not supported for this model" because thinkingLevel
			
 
				+			// was being sent to a model that only supports thinkingBudget
			
 
				+			const geminiFlashModel: ModelInfo = {
			
 
				+				...baseModel,
			
 
				+				// gemini-2.5-flash supports budget but NOT effort-based reasoning
			
 
				+				supportsReasoningBudget: true,
			
 
				+				// Note: no supportsReasoningEffort, no requiredReasoningBudget
			
 
				+			}
			
 
				+
			
 
				+			const settings: ProviderSettings = {
			
 
				+				apiProvider: "gemini",
			
 
				+				// User may have a reasoningEffort set from a different model
			
 
				+				reasoningEffort: "high",
			
 
				+				// But enableReasoningEffort is not true, so budget won't be used
			
 
				+			}
			
 
				+
			
 
				+			const options: GetModelReasoningOptions = {
			
 
				+				model: geminiFlashModel,
			
 
				+				reasoningBudget: 4096,
			
 
				+				reasoningEffort: "high",
			
 
				+				settings,
			
 
				+			}
			
 
				+
			
 
				+			const result = getGeminiReasoning(options)
			
 
				+			// Should return undefined, NOT { thinkingLevel: "high", includeThoughts: true }
			
 
				+			// because this model doesn't support thinkingLevel
			
 
				+			expect(result).toBeUndefined()
			
 
				+		})
			
 
				+
			
 
				+		it("should return undefined for budget-only models even with explicit effort setting", () => {
			
 
				+			// Models like gemini-2.5-flash only support budget-based reasoning
			
 
				+			const budgetOnlyModel: ModelInfo = {
			
 
				+				...baseModel,
			
 
				+				supportsReasoningBudget: true,
			
 
				+				maxThinkingTokens: 24576,
			
 
				+				// Critically: no supportsReasoningEffort
			
 
				+			}
			
 
				+
			
 
				+			const settings: ProviderSettings = {
			
 
				+				apiProvider: "gemini",
			
 
				+				reasoningEffort: "medium",
			
 
				+			}
			
 
				+
			
 
				+			const options: GetModelReasoningOptions = {
			
 
				+				model: budgetOnlyModel,
			
 
				+				reasoningBudget: 8192,
			
 
				+				reasoningEffort: "medium",
			
 
				+				settings,
			
 
				+			}
			
 
				+
			
 
				+			const result = getGeminiReasoning(options)
			
 
				+			// Must not send thinkingLevel to a model that doesn't support it
			
 
				+			expect(result).toBeUndefined()
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("Integration scenarios", () => {
			
--- a/src/api/transform/ai-sdk.ts
+++ b/src/api/transform/ai-sdk.ts
@@ -5,7 +5,13 @@
 
				 
			
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import OpenAI from "openai"
			
 
				-import { tool as createTool, jsonSchema, type ModelMessage, type TextStreamPart } from "ai"
			
 
				+import {
			
 
				+	tool as createTool,
			
 
				+	jsonSchema,
			
 
				+	type AssistantModelMessage,
			
 
				+	type ModelMessage,
			
 
				+	type TextStreamPart,
			
 
				+} from "ai"
			
 
				 import type { ApiStreamChunk } from "./stream"
			
 
				 
			
 
				 /**
			
@@ -38,6 +44,8 @@ export function convertToAiSdkMessages(messages: Anthropic.Messages.MessageParam
 
				 			})
			
 
				 		} else {
			
 
				 			if (message.role === "user") {
			
 
				+				// kilocode_change start
			
 
				+				// Keep user text/image parts and tool results in their original order.
			
 
				 				const parts: Array<
			
 
				 					{ type: "text"; text: string } | { type: "image"; image: string; mimeType?: string }
			
 
				 				> = []
			
@@ -48,10 +56,34 @@ export function convertToAiSdkMessages(messages: Anthropic.Messages.MessageParam
 
				 					output: { type: "text"; value: string }
			
 
				 				}> = []
			
 
				 
			
 
				+				const flushUserParts = () => {
			
 
				+					if (parts.length === 0) {
			
 
				+						return
			
 
				+					}
			
 
				+					modelMessages.push({
			
 
				+						role: "user",
			
 
				+						content: [...parts],
			
 
				+					} as ModelMessage)
			
 
				+					parts.length = 0
			
 
				+				}
			
 
				+
			
 
				+				const flushToolResults = () => {
			
 
				+					if (toolResults.length === 0) {
			
 
				+						return
			
 
				+					}
			
 
				+					modelMessages.push({
			
 
				+						role: "tool",
			
 
				+						content: [...toolResults],
			
 
				+					} as ModelMessage)
			
 
				+					toolResults.length = 0
			
 
				+				}
			
 
				+
			
 
				 				for (const part of message.content) {
			
 
				 					if (part.type === "text") {
			
 
				+						flushToolResults()
			
 
				 						parts.push({ type: "text", text: part.text })
			
 
				 					} else if (part.type === "image") {
			
 
				+						flushToolResults()
			
 
				 						// Handle both base64 and URL source types
			
 
				 						const source = part.source as { type: string; media_type?: string; data?: string; url?: string }
			
 
				 						if (source.type === "base64" && source.media_type && source.data) {
			
@@ -67,6 +99,7 @@ export function convertToAiSdkMessages(messages: Anthropic.Messages.MessageParam
 
				 							})
			
 
				 						}
			
 
				 					} else if (part.type === "tool_result") {
			
 
				+						flushUserParts()
			
 
				 						// Convert tool results to string content
			
 
				 						let content: string
			
 
				 						if (typeof part.content === "string") {
			
@@ -92,59 +125,69 @@ export function convertToAiSdkMessages(messages: Anthropic.Messages.MessageParam
 
				 					}
			
 
				 				}
			
 
				 
			
 
				-				// AI SDK requires tool results in separate "tool" role messages
			
 
				-				// UserContent only supports: string | Array<TextPart | ImagePart | FilePart>
			
 
				-				// ToolContent (for role: "tool") supports: Array<ToolResultPart | ToolApprovalResponse>
			
 
				-				if (toolResults.length > 0) {
			
 
				-					modelMessages.push({
			
 
				-						role: "tool",
			
 
				-						content: toolResults,
			
 
				-					} as ModelMessage)
			
 
				-				}
			
 
				-
			
 
				-				// Add user message with only text/image content (no tool results)
			
 
				-				if (parts.length > 0) {
			
 
				-					modelMessages.push({
			
 
				-						role: "user",
			
 
				-						content: parts,
			
 
				-					} as ModelMessage)
			
 
				-				}
			
 
				+				flushToolResults()
			
 
				+				flushUserParts()
			
 
				+				// kilocode_change end
			
 
				 			} else if (message.role === "assistant") {
			
 
				+				// kilocode_change start
			
 
				+				// Keep assistant text and tool calls in original order.
			
 
				 				const textParts: string[] = []
			
 
				-				const toolCalls: Array<{
			
 
				-					type: "tool-call"
			
 
				-					toolCallId: string
			
 
				-					toolName: string
			
 
				-					input: unknown
			
 
				-				}> = []
			
 
				+				const content: Array<
			
 
				+					| { type: "text"; text: string }
			
 
				+					| { type: "tool-call"; toolCallId: string; toolName: string; input: unknown }
			
 
				+				> = []
			
 
				+				const reasoningParts: string[] = []
			
 
				+
			
 
				+				const flushText = () => {
			
 
				+					if (textParts.length === 0) {
			
 
				+						return
			
 
				+					}
			
 
				+					content.push({ type: "text", text: textParts.join("\n") })
			
 
				+					textParts.length = 0
			
 
				+				}
			
 
				 
			
 
				 				for (const part of message.content) {
			
 
				 					if (part.type === "text") {
			
 
				 						textParts.push(part.text)
			
 
				 					} else if (part.type === "tool_use") {
			
 
				-						toolCalls.push({
			
 
				+						flushText()
			
 
				+						const toolCall = {
			
 
				 							type: "tool-call",
			
 
				 							toolCallId: part.id,
			
 
				 							toolName: part.name,
			
 
				 							input: part.input,
			
 
				-						})
			
 
				+						} as const
			
 
				+						content.push(toolCall)
			
 
				+					} else if (
			
 
				+						(part as { type?: string }).type === "reasoning" &&
			
 
				+						typeof (part as { text?: unknown }).text === "string"
			
 
				+					) {
			
 
				+						const reasoningPart = part as { text?: unknown }
			
 
				+						reasoningParts.push(reasoningPart.text as string)
			
 
				 					}
			
 
				 				}
			
 
				 
			
 
				-				const content: Array<
			
 
				-					| { type: "text"; text: string }
			
 
				-					| { type: "tool-call"; toolCallId: string; toolName: string; input: unknown }
			
 
				-				> = []
			
 
				-
			
 
				-				if (textParts.length > 0) {
			
 
				-					content.push({ type: "text", text: textParts.join("\n") })
			
 
				-				}
			
 
				-				content.push(...toolCalls)
			
 
				+				flushText()
			
 
				 
			
 
				-				modelMessages.push({
			
 
				+				const aiSdkAssistantMessage: AssistantModelMessage = {
			
 
				 					role: "assistant",
			
 
				 					content: content.length > 0 ? content : [{ type: "text", text: "" }],
			
 
				-				} as ModelMessage)
			
 
				+				}
			
 
				+
			
 
				+				const messageWithReasoning = message as { reasoning_content?: string }
			
 
				+				const reasoningContent = messageWithReasoning.reasoning_content || reasoningParts.join("\n").trim()
			
 
				+				if (reasoningContent) {
			
 
				+					aiSdkAssistantMessage.providerOptions = {
			
 
				+						...(aiSdkAssistantMessage.providerOptions || {}),
			
 
				+						// OpenAI-compatible AI SDK models read per-message metadata from providerOptions.openaiCompatible.
			
 
				+						openaiCompatible: {
			
 
				+							reasoning_content: reasoningContent,
			
 
				+						},
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				modelMessages.push(aiSdkAssistantMessage)
			
 
				+				// kilocode_change end
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
--- a/src/api/transform/model-params.ts
+++ b/src/api/transform/model-params.ts
@@ -25,8 +25,7 @@ import {
 
				 	getGeminiReasoning,
			
 
				 	getOpenRouterReasoning,
			
 
				 } from "./reasoning"
			
 
				-
			
 
				-type Format = "anthropic" | "openai" | "gemini" | "openrouter"
			
 
				+type Format = "anthropic" | "openai" | "gemini" | "openrouter" | "zenmux"
			
 
				 
			
 
				 type GetModelParamsOptions<T extends Format> = {
			
 
				 	format: T
			
@@ -65,13 +64,26 @@ type OpenRouterModelParams = {
 
				 	reasoning: OpenRouterReasoningParams | undefined
			
 
				 } & BaseModelParams
			
 
				 
			
 
				-export type ModelParams = AnthropicModelParams | OpenAiModelParams | GeminiModelParams | OpenRouterModelParams
			
 
				+// kilocode_change start
			
 
				+type ZenMuxModelParams = {
			
 
				+	format: "zenmux"
			
 
				+	reasoning: OpenRouterReasoningParams | undefined
			
 
				+} & BaseModelParams
			
 
				+// kilocode_change end
			
 
				+
			
 
				+export type ModelParams =
			
 
				+	| AnthropicModelParams
			
 
				+	| OpenAiModelParams
			
 
				+	| GeminiModelParams
			
 
				+	| OpenRouterModelParams
			
 
				+	| ZenMuxModelParams // kilocode_change
			
 
				 
			
 
				 // Function overloads for specific return types
			
 
				 export function getModelParams(options: GetModelParamsOptions<"anthropic">): AnthropicModelParams
			
 
				 export function getModelParams(options: GetModelParamsOptions<"openai">): OpenAiModelParams
			
 
				 export function getModelParams(options: GetModelParamsOptions<"gemini">): GeminiModelParams
			
 
				 export function getModelParams(options: GetModelParamsOptions<"openrouter">): OpenRouterModelParams
			
 
				+export function getModelParams(options: GetModelParamsOptions<"zenmux">): OpenRouterModelParams
			
 
				 export function getModelParams({
			
 
				 	format,
			
 
				 	modelId,
			
--- a/src/api/transform/reasoning.ts
+++ b/src/api/transform/reasoning.ts
@@ -147,6 +147,12 @@ export const getGeminiReasoning = ({
 
				 		return { thinkingBudget: reasoningBudget!, includeThoughts: true }
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	if (!model.supportsReasoningEffort) {
			
 
				+		return undefined
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	// For effort-based Gemini models, rely directly on the selected effort value.
			
 
				 	// We intentionally ignore enableReasoningEffort here so that explicitly chosen
			
 
				 	// efforts in the UI (e.g. "High" for gemini-3-pro-preview) always translate
			
--- a/src/core/prompts/responses.ts
+++ b/src/core/prompts/responses.ts
@@ -1,6 +1,7 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import * as path from "path"
			
 
				 import * as diff from "diff"
			
 
				+import * as fs from "fs" // kilocode_change
			
 
				 import { RooIgnoreController, LOCK_TEXT_SYMBOL } from "../ignore/RooIgnoreController"
			
 
				 import { RooProtectedController } from "../protect/RooProtectedController"
			
 
				 import * as vscode from "vscode"
			
@@ -233,6 +234,34 @@ Otherwise, if you have not completed the task and do not need additional informa
 
				 				}
			
 
				 			}
			
 
				 		}
			
 
				+
			
 
				+		// kilocode_change start: Append character count to each file in the list
			
 
				+		rooIgnoreParsed = rooIgnoreParsed.map((entry) => {
			
 
				+			// Extract actual path by removing prefix symbols
			
 
				+			let actualPath = entry
			
 
				+			if (entry.startsWith(LOCK_TEXT_SYMBOL)) {
			
 
				+				actualPath = entry.slice((LOCK_TEXT_SYMBOL + " ").length)
			
 
				+			} else if (entry.startsWith("🛡️")) {
			
 
				+				actualPath = entry.slice("🛡️ ".length)
			
 
				+			}
			
 
				+
			
 
				+			// Skip directories (end with /)
			
 
				+			if (actualPath.endsWith("/")) {
			
 
				+				return entry
			
 
				+			}
			
 
				+
			
 
				+			// Read file and get character count
			
 
				+			try {
			
 
				+				const absoluteFilePath = path.resolve(absolutePath, actualPath)
			
 
				+				const content = fs.readFileSync(absoluteFilePath, "utf-8")
			
 
				+				return `${entry}  # ${content.length} chars`
			
 
				+			} catch {
			
 
				+				// If reading fails, return original entry
			
 
				+				return entry
			
 
				+			}
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				+
			
 
				 		if (didHitLimit) {
			
 
				 			return `${rooIgnoreParsed.join(
			
 
				 				"\n",
			
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -158,6 +158,9 @@ const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes
 
				 const DEFAULT_USAGE_COLLECTION_TIMEOUT_MS = 5000 // 5 seconds
			
 
				 const FORCED_CONTEXT_REDUCTION_PERCENT = 75 // Keep 75% of context (remove 25%) on context window errors
			
 
				 const MAX_CONTEXT_WINDOW_RETRIES = 3 // Maximum retries for context window errors
			
 
				+// kilocode_change start
			
 
				+const MAX_CHUTES_TERMINATED_RETRY_ATTEMPTS = 2 // Allow up to 2 retries (3 total attempts) before failing fast
			
 
				+// kilocode_change end
			
 
				 
			
 
				 export interface TaskOptions extends CreateTaskOptions {
			
 
				 	context: vscode.ExtensionContext // kilocode_change
			
@@ -3553,6 +3556,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 
			
 
				 						// Clean up partial state
			
 
				 						await abortStream(cancelReason, streamingFailedMessage)
			
 
				+						// kilocode_change start
			
 
				+						// Bound retries for repeated Chutes "terminated" stream failures
			
 
				+						// to prevent indefinite thinking/retry loops.
			
 
				+						const retryAttempt = currentItem.retryAttempt ?? 0
			
 
				+						if (this.hasExceededChutesTerminatedRetryLimit(error, retryAttempt)) {
			
 
				+							console.error(
			
 
				+								`[Task#${this.taskId}.${this.instanceId}] Chutes stream terminated repeatedly. Stopping retries after attempt ${retryAttempt}.`,
			
 
				+							)
			
 
				+							throw error
			
 
				+						}
			
 
				+						// kilocode_change end
			
 
				 
			
 
				 						if (this.abort) {
			
 
				 							// User cancelled - abort the entire task
			
@@ -4279,6 +4293,22 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	private isChutesTerminatedError(error: unknown): boolean {
			
 
				+		if (this.apiConfiguration?.apiProvider !== "chutes") {
			
 
				+			return false
			
 
				+		}
			
 
				+
			
 
				+		const message =
			
 
				+			error instanceof Error ? error.message : typeof error === "string" ? error : JSON.stringify(error)
			
 
				+		return /\bterminated\b/i.test(message || "")
			
 
				+	}
			
 
				+
			
 
				+	private hasExceededChutesTerminatedRetryLimit(error: unknown, retryAttempt: number): boolean {
			
 
				+		return this.isChutesTerminatedError(error) && retryAttempt >= MAX_CHUTES_TERMINATED_RETRY_ATTEMPTS
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	public async *attemptApiRequest(
			
 
				 		retryAttempt: number = 0,
			
 
				 		options: { skipProviderRateLimit?: boolean } = {},
			
@@ -4654,6 +4684,14 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 				return
			
 
				 			}
			
 
				 			// kilocode_change end
			
 
				+			// kilocode_change start
			
 
				+			// Chutes can occasionally terminate streams abruptly; avoid recursive
			
 
				+			// first-chunk auto-retries here and delegate retry policy to the
			
 
				+			// outer request loop, which applies a bounded retry cap.
			
 
				+			if (this.isChutesTerminatedError(error)) {
			
 
				+				throw error
			
 
				+			}
			
 
				+			// kilocode_change end
			
 
				 			// note that this api_req_failed ask is unique in that we only present this option if the api hasn't streamed any content yet (ie it fails on the first chunk due), as it would allow them to hit a retry button. However if the api failed mid-stream, it could be in any arbitrary state where some tools may have executed, so that error is handled differently and requires cancelling the task entirely.
			
 
				 			if (autoApprovalEnabled) {
			
 
				 				// Apply shared exponential backoff and countdown UX
			
--- a/src/core/task/__tests__/Task.spec.ts
+++ b/src/core/task/__tests__/Task.spec.ts
@@ -900,6 +900,82 @@ describe("Cline", () => {
 
				 				await task.catch(() => {})
			
 
				 			})
			
 
				 
			
 
				+			// kilocode_change start
			
 
				+			it("attemptApiRequest should not recursively auto-retry first-chunk Chutes terminated errors", async () => {
			
 
				+				const chutesConfig = {
			
 
				+					...mockApiConfig,
			
 
				+					apiProvider: "chutes" as const,
			
 
				+					apiModelId: "moonshotai/Kimi-K2.5-TEE",
			
 
				+				}
			
 
				+
			
 
				+				const task = new Task({
			
 
				+					provider: mockProvider,
			
 
				+					apiConfiguration: chutesConfig,
			
 
				+					task: "test task",
			
 
				+					startTask: false,
			
 
				+					context: mockExtensionContext,
			
 
				+				})
			
 
				+
			
 
				+				const terminatedError = new Error("terminated")
			
 
				+				const mockFailedStream = {
			
 
				+					// eslint-disable-next-line require-yield
			
 
				+					async *[Symbol.asyncIterator]() {
			
 
				+						throw terminatedError
			
 
				+					},
			
 
				+					async next() {
			
 
				+						throw terminatedError
			
 
				+					},
			
 
				+					async return() {
			
 
				+						return { done: true, value: undefined }
			
 
				+					},
			
 
				+					async throw(e: any) {
			
 
				+						throw e
			
 
				+					},
			
 
				+					async [Symbol.asyncDispose]() {
			
 
				+						// Cleanup
			
 
				+					},
			
 
				+				} as AsyncGenerator<ApiStreamChunk>
			
 
				+
			
 
				+				const createMessageSpy = vi.spyOn(task.api, "createMessage").mockReturnValue(mockFailedStream)
			
 
				+				const backoffSpy = vi.spyOn(task as any, "backoffAndAnnounce").mockResolvedValue(undefined)
			
 
				+
			
 
				+				mockProvider.getState = vi.fn().mockResolvedValue({
			
 
				+					apiConfiguration: chutesConfig,
			
 
				+					autoApprovalEnabled: true,
			
 
				+					requestDelaySeconds: 1,
			
 
				+					mode: "code",
			
 
				+				})
			
 
				+
			
 
				+				const iterator = task.attemptApiRequest(0, { skipProviderRateLimit: true })
			
 
				+				await expect(iterator.next()).rejects.toThrow("terminated")
			
 
				+
			
 
				+				expect(createMessageSpy).toHaveBeenCalledTimes(1)
			
 
				+				expect(backoffSpy).not.toHaveBeenCalled()
			
 
				+			})
			
 
				+
			
 
				+			it("should apply Chutes terminated retry cap at the configured threshold", async () => {
			
 
				+				const chutesConfig = {
			
 
				+					...mockApiConfig,
			
 
				+					apiProvider: "chutes" as const,
			
 
				+					apiModelId: "moonshotai/Kimi-K2.5-TEE",
			
 
				+				}
			
 
				+
			
 
				+				const task = new Task({
			
 
				+					provider: mockProvider,
			
 
				+					apiConfiguration: chutesConfig,
			
 
				+					task: "test task",
			
 
				+					startTask: false,
			
 
				+					context: mockExtensionContext,
			
 
				+				})
			
 
				+
			
 
				+				const terminatedError = new Error("terminated")
			
 
				+
			
 
				+				expect((task as any).hasExceededChutesTerminatedRetryLimit(terminatedError, 0)).toBe(false)
			
 
				+				expect((task as any).hasExceededChutesTerminatedRetryLimit(terminatedError, 1)).toBe(false)
			
 
				+				expect((task as any).hasExceededChutesTerminatedRetryLimit(terminatedError, 2)).toBe(true)
			
 
				+			})
			
 
				+			// kilocode_change end
			
 
				+
			
 
				 			describe("processUserContentMentions", () => {
			
 
				 				it("should process mentions in task and feedback tags", async () => {
			
 
				 					const [cline, task] = Task.create({
			
--- a/src/core/webview/__tests__/ClineProvider.spec.ts
+++ b/src/core/webview/__tests__/ClineProvider.spec.ts
@@ -2783,7 +2783,12 @@ describe("ClineProvider - Router Models", () => {
 
				 			apiKey: "litellm-key",
			
 
				 			baseUrl: "http://localhost:4000",
			
 
				 		})
			
 
				-		expect(getModels).toHaveBeenCalledWith({ provider: "chutes" })
			
 
				+		expect(getModels).toHaveBeenCalledWith({ provider: "chutes", apiKey: undefined })
			
 
				+		expect(getModels).toHaveBeenCalledWith({
			
 
				+			provider: "zenmux",
			
 
				+			apiKey: undefined,
			
 
				+			baseUrl: "https://zenmux.ai/api/v1",
			
 
				+		})
			
 
				 
			
 
				 		// Verify response was sent
			
 
				 		expect(mockPostMessage).toHaveBeenCalledWith({
			
@@ -2809,6 +2814,7 @@ describe("ClineProvider - Router Models", () => {
 
				 				"sap-ai-core": {}, // kilocode_change
			
 
				 				huggingface: {},
			
 
				 				"io-intelligence": {},
			
 
				+				zenmux: mockModels,
			
 
				 			},
			
 
				 			values: undefined,
			
 
				 		})
			
@@ -2861,6 +2867,7 @@ describe("ClineProvider - Router Models", () => {
 
				 			.mockResolvedValueOnce(mockModels) // kilocode_change: synthetic success
			
 
				 			.mockResolvedValueOnce(mockModels) // roo success
			
 
				 			.mockRejectedValueOnce(new Error("Chutes API error")) // chutes fail
			
 
				+			.mockResolvedValueOnce(mockModels) // zenmux success
			
 
				 			.mockRejectedValueOnce(new Error("LiteLLM connection failed")) // litellm fail
			
 
				 
			
 
				 		await messageHandler({ type: "requestRouterModels" })
			
@@ -2889,6 +2896,7 @@ describe("ClineProvider - Router Models", () => {
 
				 				"sap-ai-core": {}, // kilocode_change
			
 
				 				huggingface: {},
			
 
				 				"io-intelligence": {},
			
 
				+				zenmux: mockModels,
			
 
				 			},
			
 
				 			values: undefined,
			
 
				 		})
			
@@ -3045,6 +3053,7 @@ describe("ClineProvider - Router Models", () => {
 
				 				"sap-ai-core": {}, // kilocode_change
			
 
				 				huggingface: {},
			
 
				 				"io-intelligence": {},
			
 
				+				zenmux: mockModels,
			
 
				 			},
			
 
				 			values: undefined,
			
 
				 		})
			
--- a/src/core/webview/__tests__/webviewMessageHandler.spec.ts
+++ b/src/core/webview/__tests__/webviewMessageHandler.spec.ts
@@ -310,6 +310,11 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 		expect(mockGetModels).toHaveBeenCalledWith({ provider: "requesty", apiKey: "requesty-key" })
			
 
				 		expect(mockGetModels).toHaveBeenCalledWith({ provider: "glama" }) // kilocode_change
			
 
				 		expect(mockGetModels).toHaveBeenCalledWith({ provider: "unbound", apiKey: "unbound-key" })
			
 
				+		expect(mockGetModels).toHaveBeenCalledWith({
			
 
				+			provider: "zenmux",
			
 
				+			apiKey: undefined,
			
 
				+			baseUrl: "https://zenmux.ai/api/v1",
			
 
				+		})
			
 
				 		// kilocode_change start
			
 
				 		expect(mockGetModels).toHaveBeenCalledWith({ provider: "chutes", apiKey: "chutes-key" })
			
 
				 		expect(mockGetModels).toHaveBeenCalledWith({
			
@@ -360,6 +365,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 				"nano-gpt": mockModels, // kilocode_change
			
 
				 				roo: mockModels,
			
 
				 				chutes: mockModels,
			
 
				+				zenmux: mockModels,
			
 
				 				ollama: mockModels, // kilocode_change
			
 
				 				lmstudio: {},
			
 
				 				"vercel-ai-gateway": mockModels,
			
@@ -463,6 +469,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 				unbound: mockModels,
			
 
				 				roo: mockModels,
			
 
				 				chutes: mockModels,
			
 
				+				zenmux: mockModels,
			
 
				 				litellm: {},
			
 
				 				kilocode: mockModels,
			
 
				 				"nano-gpt": mockModels, // kilocode_change
			
@@ -506,6 +513,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 			.mockRejectedValueOnce(new Error("Synthetic API error")) // kilocode_change
			
 
				 			.mockResolvedValueOnce(mockModels) // roo
			
 
				 			.mockRejectedValueOnce(new Error("Chutes API error")) // chutes
			
 
				+			.mockResolvedValueOnce(mockModels) // zenmux
			
 
				 			.mockRejectedValueOnce(new Error("LiteLLM connection failed")) // litellm
			
 
				 
			
 
				 		await webviewMessageHandler(mockClineProvider, {
			
@@ -568,6 +576,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 				unbound: {},
			
 
				 				roo: mockModels,
			
 
				 				chutes: {},
			
 
				+				zenmux: mockModels,
			
 
				 				litellm: {},
			
 
				 				ollama: {},
			
 
				 				lmstudio: {},
			
@@ -606,6 +615,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 			.mockRejectedValueOnce(new Error("Synthetic API error")) // kilocode_change synthetic
			
 
				 			.mockRejectedValueOnce(new Error("Roo API error")) // roo
			
 
				 			.mockRejectedValueOnce(new Error("Chutes API error")) // chutes
			
 
				+			.mockResolvedValueOnce({}) // zenmux
			
 
				 			.mockRejectedValueOnce(new Error("LiteLLM connection failed")) // litellm
			
 
				 
			
 
				 		await webviewMessageHandler(mockClineProvider, {
			
--- a/src/core/webview/webviewMessageHandler.ts
+++ b/src/core/webview/webviewMessageHandler.ts
@@ -902,8 +902,8 @@ export const webviewMessageHandler = async (
 
				 						"sap-ai-core": {}, // kilocode_change
			
 
				 						chutes: {},
			
 
				 						"nano-gpt": {}, // kilocode_change
			
 
				+						zenmux: {},
			
 
				 					}
			
 
				-
			
 
				 			const safeGetModels = async (options: GetModelsOptions): Promise<ModelRecord> => {
			
 
				 				try {
			
 
				 					return await getModels(options)
			
@@ -1004,6 +1004,14 @@ export const webviewMessageHandler = async (
 
				 					key: "chutes",
			
 
				 					options: { provider: "chutes", apiKey: apiConfiguration.chutesApiKey },
			
 
				 				},
			
 
				+				{
			
 
				+					key: "zenmux",
			
 
				+					options: {
			
 
				+						provider: "zenmux",
			
 
				+						apiKey: apiConfiguration.zenmuxApiKey,
			
 
				+						baseUrl: apiConfiguration.zenmuxBaseUrl ?? "https://zenmux.ai/api/v1",
			
 
				+					},
			
 
				+				},
			
 
				 			]
			
 
				 			// kilocode_change end
			
 
				 
			
@@ -1052,7 +1060,6 @@ export const webviewMessageHandler = async (
 
				 
			
 
				 			results.forEach((result, index) => {
			
 
				 				const routerName = modelFetchPromises[index].key
			
 
				-
			
 
				 				if (result.status === "fulfilled") {
			
 
				 					routerModels[routerName] = result.value.models
			
 
				 
			
--- a/src/esbuild.mjs
+++ b/src/esbuild.mjs
@@ -88,7 +88,7 @@ async function main() {
 
				 					copyPaths([["walkthrough", "walkthrough"]], srcDir, distDir)
			
 
				 
			
 
				 					// Copy tree-sitter files to dist directory
			
 
				-					copyPaths([["services/continuedev/tree-sitter", "tree-sitter"]], srcDir, distDir)
			
 
				+					copyPaths([["services/ghost/continuedev/tree-sitter", "tree-sitter"]], srcDir, distDir)
			
 
				 
			
 
				 					// Copy JSDOM xhr-sync-worker.js to fix runtime resolution
			
 
				 					const jsdomWorkerDest = path.join(distDir, "xhr-sync-worker.js")
			
--- a/src/eslint.config.mjs
+++ b/src/eslint.config.mjs
@@ -37,6 +37,6 @@ export default [
 
				 		},
			
 
				 	},
			
 
				 	{
			
 
				-		ignores: ["webview-ui", "out", "services/continuedev/core/llm/llamaTokenizer.js", "**/__fixtures__"],
			
 
				+		ignores: ["webview-ui", "out", "services/ghost/continuedev/core/llm/llamaTokenizer.js", "**/__fixtures__"],
			
 
				 	},
			
 
				 ]
			
--- a/src/integrations/terminal/ShellIntegrationManager.ts
+++ b/src/integrations/terminal/ShellIntegrationManager.ts
@@ -36,6 +36,10 @@ export class ShellIntegrationManager {
 
				 
			
 
				 				const zshrcContent = `
			
 
				 	source "${shellIntegrationPath}"
			
 
				+	# Disable history expansion (!) to prevent commands with ! from failing
			
 
				+	# Fixes issue where commands like --collectCoverageFrom="!pattern" would fail
			
 
				+	# with "event not found" error
			
 
				+	setopt NO_BANG_HIST
			
 
				 	ZDOTDIR=\${ROO_ZDOTDIR:-$HOME}
			
 
				 	unset ROO_ZDOTDIR
			
 
				 	[ -f "$ZDOTDIR/.zshenv" ] && source "$ZDOTDIR/.zshenv"
			
--- a/src/integrations/theme/getTheme.ts
+++ b/src/integrations/theme/getTheme.ts
@@ -72,7 +72,6 @@ export async function getTheme() {
 
				 			const includeTheme = parseThemeString(includeThemeString)
			
 
				 			parsed = mergeJson(parsed, includeTheme)
			
 
				 		}
			
 
				-
			
 
				 		const converted = convertTheme(parsed)
			
 
				 
			
 
				 		converted.base = (
			
--- a/src/package.json
+++ b/src/package.json
@@ -3,7 +3,7 @@
 
				 	"displayName": "%extension.displayName%",
			
 
				 	"description": "%extension.description%",
			
 
				 	"publisher": "kilocode",
			
 
				-	"version": "5.5.0",
			
 
				+	"version": "5.6.0",
			
 
				 	"icon": "assets/icons/logo-outline-black.png",
			
 
				 	"galleryBanner": {
			
 
				 		"color": "#FFFFFF",
			
--- a/src/services/continuedev/LICENSE
+++ b/src/services/continuedev/LICENSE
@@ -1,201 +0,0 @@
 
				-                                 Apache License
			
 
				-                           Version 2.0, January 2004
			
 
				-                        http://www.apache.org/licenses/
			
 
				-
			
 
				-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
			
 
				-
			
 
				-   1. Definitions.
			
 
				-
			
 
				-      "License" shall mean the terms and conditions for use, reproduction,
			
 
				-      and distribution as defined by Sections 1 through 9 of this document.
			
 
				-
			
 
				-      "Licensor" shall mean the copyright owner or entity authorized by
			
 
				-      the copyright owner that is granting the License.
			
 
				-
			
 
				-      "Legal Entity" shall mean the union of the acting entity and all
			
 
				-      other entities that control, are controlled by, or are under common
			
 
				-      control with that entity. For the purposes of this definition,
			
 
				-      "control" means (i) the power, direct or indirect, to cause the
			
 
				-      direction or management of such entity, whether by contract or
			
 
				-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
			
 
				-      outstanding shares, or (iii) beneficial ownership of such entity.
			
 
				-
			
 
				-      "You" (or "Your") shall mean an individual or Legal Entity
			
 
				-      exercising permissions granted by this License.
			
 
				-
			
 
				-      "Source" form shall mean the preferred form for making modifications,
			
 
				-      including but not limited to software source code, documentation
			
 
				-      source, and configuration files.
			
 
				-
			
 
				-      "Object" form shall mean any form resulting from mechanical
			
 
				-      transformation or translation of a Source form, including but
			
 
				-      not limited to compiled object code, generated documentation,
			
 
				-      and conversions to other media types.
			
 
				-
			
 
				-      "Work" shall mean the work of authorship, whether in Source or
			
 
				-      Object form, made available under the License, as indicated by a
			
 
				-      copyright notice that is included in or attached to the work
			
 
				-      (an example is provided in the Appendix below).
			
 
				-
			
 
				-      "Derivative Works" shall mean any work, whether in Source or Object
			
 
				-      form, that is based on (or derived from) the Work and for which the
			
 
				-      editorial revisions, annotations, elaborations, or other modifications
			
 
				-      represent, as a whole, an original work of authorship. For the purposes
			
 
				-      of this License, Derivative Works shall not include works that remain
			
 
				-      separable from, or merely link (or bind by name) to the interfaces of,
			
 
				-      the Work and Derivative Works thereof.
			
 
				-
			
 
				-      "Contribution" shall mean any work of authorship, including
			
 
				-      the original version of the Work and any modifications or additions
			
 
				-      to that Work or Derivative Works thereof, that is intentionally
			
 
				-      submitted to Licensor for inclusion in the Work by the copyright owner
			
 
				-      or by an individual or Legal Entity authorized to submit on behalf of
			
 
				-      the copyright owner. For the purposes of this definition, "submitted"
			
 
				-      means any form of electronic, verbal, or written communication sent
			
 
				-      to the Licensor or its representatives, including but not limited to
			
 
				-      communication on electronic mailing lists, source code control systems,
			
 
				-      and issue tracking systems that are managed by, or on behalf of, the
			
 
				-      Licensor for the purpose of discussing and improving the Work, but
			
 
				-      excluding communication that is conspicuously marked or otherwise
			
 
				-      designated in writing by the copyright owner as "Not a Contribution."
			
 
				-
			
 
				-      "Contributor" shall mean Licensor and any individual or Legal Entity
			
 
				-      on behalf of whom a Contribution has been received by Licensor and
			
 
				-      subsequently incorporated within the Work.
			
 
				-
			
 
				-   2. Grant of Copyright License. Subject to the terms and conditions of
			
 
				-      this License, each Contributor hereby grants to You a perpetual,
			
 
				-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				-      copyright license to reproduce, prepare Derivative Works of,
			
 
				-      publicly display, publicly perform, sublicense, and distribute the
			
 
				-      Work and such Derivative Works in Source or Object form.
			
 
				-
			
 
				-   3. Grant of Patent License. Subject to the terms and conditions of
			
 
				-      this License, each Contributor hereby grants to You a perpetual,
			
 
				-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				-      (except as stated in this section) patent license to make, have made,
			
 
				-      use, offer to sell, sell, import, and otherwise transfer the Work,
			
 
				-      where such license applies only to those patent claims licensable
			
 
				-      by such Contributor that are necessarily infringed by their
			
 
				-      Contribution(s) alone or by combination of their Contribution(s)
			
 
				-      with the Work to which such Contribution(s) was submitted. If You
			
 
				-      institute patent litigation against any entity (including a
			
 
				-      cross-claim or counterclaim in a lawsuit) alleging that the Work
			
 
				-      or a Contribution incorporated within the Work constitutes direct
			
 
				-      or contributory patent infringement, then any patent licenses
			
 
				-      granted to You under this License for that Work shall terminate
			
 
				-      as of the date such litigation is filed.
			
 
				-
			
 
				-   4. Redistribution. You may reproduce and distribute copies of the
			
 
				-      Work or Derivative Works thereof in any medium, with or without
			
 
				-      modifications, and in Source or Object form, provided that You
			
 
				-      meet the following conditions:
			
 
				-
			
 
				-      (a) You must give any other recipients of the Work or
			
 
				-          Derivative Works a copy of this License; and
			
 
				-
			
 
				-      (b) You must cause any modified files to carry prominent notices
			
 
				-          stating that You changed the files; and
			
 
				-
			
 
				-      (c) You must retain, in the Source form of any Derivative Works
			
 
				-          that You distribute, all copyright, patent, trademark, and
			
 
				-          attribution notices from the Source form of the Work,
			
 
				-          excluding those notices that do not pertain to any part of
			
 
				-          the Derivative Works; and
			
 
				-
			
 
				-      (d) If the Work includes a "NOTICE" text file as part of its
			
 
				-          distribution, then any Derivative Works that You distribute must
			
 
				-          include a readable copy of the attribution notices contained
			
 
				-          within such NOTICE file, excluding those notices that do not
			
 
				-          pertain to any part of the Derivative Works, in at least one
			
 
				-          of the following places: within a NOTICE text file distributed
			
 
				-          as part of the Derivative Works; within the Source form or
			
 
				-          documentation, if provided along with the Derivative Works; or,
			
 
				-          within a display generated by the Derivative Works, if and
			
 
				-          wherever such third-party notices normally appear. The contents
			
 
				-          of the NOTICE file are for informational purposes only and
			
 
				-          do not modify the License. You may add Your own attribution
			
 
				-          notices within Derivative Works that You distribute, alongside
			
 
				-          or as an addendum to the NOTICE text from the Work, provided
			
 
				-          that such additional attribution notices cannot be construed
			
 
				-          as modifying the License.
			
 
				-
			
 
				-      You may add Your own copyright statement to Your modifications and
			
 
				-      may provide additional or different license terms and conditions
			
 
				-      for use, reproduction, or distribution of Your modifications, or
			
 
				-      for any such Derivative Works as a whole, provided Your use,
			
 
				-      reproduction, and distribution of the Work otherwise complies with
			
 
				-      the conditions stated in this License.
			
 
				-
			
 
				-   5. Submission of Contributions. Unless You explicitly state otherwise,
			
 
				-      any Contribution intentionally submitted for inclusion in the Work
			
 
				-      by You to the Licensor shall be under the terms and conditions of
			
 
				-      this License, without any additional terms or conditions.
			
 
				-      Notwithstanding the above, nothing herein shall supersede or modify
			
 
				-      the terms of any separate license agreement you may have executed
			
 
				-      with Licensor regarding such Contributions.
			
 
				-
			
 
				-   6. Trademarks. This License does not grant permission to use the trade
			
 
				-      names, trademarks, service marks, or product names of the Licensor,
			
 
				-      except as required for reasonable and customary use in describing the
			
 
				-      origin of the Work and reproducing the content of the NOTICE file.
			
 
				-
			
 
				-   7. Disclaimer of Warranty. Unless required by applicable law or
			
 
				-      agreed to in writing, Licensor provides the Work (and each
			
 
				-      Contributor provides its Contributions) on an "AS IS" BASIS,
			
 
				-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
			
 
				-      implied, including, without limitation, any warranties or conditions
			
 
				-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
			
 
				-      PARTICULAR PURPOSE. You are solely responsible for determining the
			
 
				-      appropriateness of using or redistributing the Work and assume any
			
 
				-      risks associated with Your exercise of permissions under this License.
			
 
				-
			
 
				-   8. Limitation of Liability. In no event and under no legal theory,
			
 
				-      whether in tort (including negligence), contract, or otherwise,
			
 
				-      unless required by applicable law (such as deliberate and grossly
			
 
				-      negligent acts) or agreed to in writing, shall any Contributor be
			
 
				-      liable to You for damages, including any direct, indirect, special,
			
 
				-      incidental, or consequential damages of any character arising as a
			
 
				-      result of this License or out of the use or inability to use the
			
 
				-      Work (including but not limited to damages for loss of goodwill,
			
 
				-      work stoppage, computer failure or malfunction, or any and all
			
 
				-      other commercial damages or losses), even if such Contributor
			
 
				-      has been advised of the possibility of such damages.
			
 
				-
			
 
				-   9. Accepting Warranty or Additional Liability. While redistributing
			
 
				-      the Work or Derivative Works thereof, You may choose to offer,
			
 
				-      and charge a fee for, acceptance of support, warranty, indemnity,
			
 
				-      or other liability obligations and/or rights consistent with this
			
 
				-      License. However, in accepting such obligations, You may act only
			
 
				-      on Your own behalf and on Your sole responsibility, not on behalf
			
 
				-      of any other Contributor, and only if You agree to indemnify,
			
 
				-      defend, and hold each Contributor harmless for any liability
			
 
				-      incurred by, or claims asserted against, such Contributor by reason
			
 
				-      of your accepting any such warranty or additional liability.
			
 
				-
			
 
				-   END OF TERMS AND CONDITIONS
			
 
				-
			
 
				-   APPENDIX: How to apply the Apache License to your work.
			
 
				-
			
 
				-      To apply the Apache License to your work, attach the following
			
 
				-      boilerplate notice, with the fields enclosed by brackets "[]"
			
 
				-      replaced with your own identifying information. (Don't include
			
 
				-      the brackets!)  The text should be enclosed in the appropriate
			
 
				-      comment syntax for the file format. We also recommend that a
			
 
				-      file or class name and description of purpose be included on the
			
 
				-      same "printed page" as the copyright notice for easier
			
 
				-      identification within third-party archives.
			
 
				-
			
 
				-   Copyright 2023 Continue Dev, Inc.
			
 
				-
			
 
				-   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-   you may not use this file except in compliance with the License.
			
 
				-   You may obtain a copy of the License at
			
 
				-
			
 
				-       http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-   Unless required by applicable law or agreed to in writing, software
			
 
				-   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-   See the License for the specific language governing permissions and
			
 
				-   limitations under the License.
			
--- a/src/services/continuedev/README.md
+++ b/src/services/continuedev/README.md
@@ -1,245 +0,0 @@
 
				-# Continue: Autocomplete & NextEdit Library
			
 
				-
			
 
				-A focused TypeScript library extracted from [Continue](https://github.com/continuedev/continue) containing only the AI-powered autocomplete and NextEdit features. This library is integrated into the [Kilocode](https://github.com/kilocode/kilocode) monorepo as a service component.
			
 
				-
			
 
				-## Overview
			
 
				-
			
 
				-This library provides two core features from the Continue project:
			
 
				-
			
 
				-- **Autocomplete**: Intelligent, context-aware code completion powered by LLMs
			
 
				-- **NextEdit**: Multi-location code edit predictions that understand your editing patterns
			
 
				-
			
 
				-All other Continue functionality (GUI, chat, agents, tools, etc.) has been removed to create a focused, reusable codebase.
			
 
				-
			
 
				-## Integration Context
			
 
				-
			
 
				-This library is part of the **Kilocode monorepo** at `src/services/continuedev/`. It contains pure TypeScript source code without independent build configuration:
			
 
				-
			
 
				-- **No package.json** - Dependencies managed by Kilocode's pnpm workspace
			
 
				-- **No build config** - Uses Kilocode's TypeScript and build tooling
			
 
				-- **Testing** - Integrated into Kilocode's vitest test suite
			
 
				-- **Purpose** - Provides autocomplete and edit prediction capabilities to Kilocode
			
 
				-
			
 
				-For integration details, see [`INTEGRATION.md`](INTEGRATION.md).
			
 
				-
			
 
				-## Features
			
 
				-
			
 
				-### 🎯 Autocomplete
			
 
				-
			
 
				-- **Context-aware completions**: Analyzes surrounding code, imports, and recently edited files
			
 
				-- **Multi-line support**: Generates complete code blocks, not just single lines
			
 
				-- **Smart filtering**: Removes invalid completions using AST analysis and bracket matching
			
 
				-- **Caching**: LRU cache for improved performance
			
 
				-- **Debouncing**: Prevents excessive LLM calls during rapid typing
			
 
				-- **Tree-sitter integration**: Accurate syntax-aware code analysis
			
 
				-
			
 
				-### ✨ NextEdit
			
 
				-
			
 
				-- **Predictive edits**: Suggests edits across multiple locations based on your changes
			
 
				-- **Full-file and partial-file diffs**: Flexible edit region calculation
			
 
				-- **Multiple model support**: Built-in support for Instinct and MercuryCoder models
			
 
				-- **Visual feedback**: Jump navigation between edit regions
			
 
				-- **Cursor positioning**: Intelligent cursor placement after edits
			
 
				-
			
 
				-### 🔧 Supporting Infrastructure
			
 
				-
			
 
				-- **Multiple LLM providers**: OpenAI, Anthropic, Gemini, Azure, Bedrock, and more
			
 
				-- **Tree-sitter integration**: Accurate syntax-aware code analysis for multiple languages
			
 
				-- **Comprehensive testing**: 857 tests covering autocomplete, NextEdit, and integrations
			
 
				-- **TypeScript**: Full type safety and IntelliSense support
			
 
				-
			
 
				-## Library Structure
			
 
				-
			
 
				-```
			
 
				-src/services/continuedev/
			
 
				-├── core/                      # All autocomplete & NextEdit code
			
 
				-│   ├── autocomplete/          # Autocomplete feature
			
 
				-│   │   ├── CompletionProvider.ts
			
 
				-│   │   ├── MinimalConfig.ts
			
 
				-│   │   ├── context/          # Context gathering (tree-sitter based)
			
 
				-│   │   ├── filtering/        # Bracket matching, stream transforms
			
 
				-│   │   ├── generation/       # LLM completion streaming
			
 
				-│   │   ├── postprocessing/   # Clean up completions
			
 
				-│   │   ├── snippets/         # Code snippet retrieval
			
 
				-│   │   └── templating/       # Prompt construction
			
 
				-│   ├── nextEdit/             # NextEdit feature
			
 
				-│   │   ├── NextEditProvider.ts
			
 
				-│   │   ├── providers/        # Model-specific providers
			
 
				-│   │   ├── context/          # Edit aggregation & context
			
 
				-│   │   ├── diff/             # Diff calculation
			
 
				-│   │   └── templating/       # NextEdit prompt engines
			
 
				-│   ├── llm/                  # LLM integration
			
 
				-│   │   ├── llms/             # Provider implementations
			
 
				-│   │   └── openai-adapters/  # OpenAI-compatible adapters
			
 
				-│   ├── diff/                 # Myers diff algorithm
			
 
				-│   ├── util/                 # Shared utilities
			
 
				-│   ├── indexing/             # Security checks & ignore patterns
			
 
				-│   ├── fetch/                # HTTP client with cert support
			
 
				-│   └── vscode-test-harness/  # VSCode integration example
			
 
				-├── tree-sitter/              # Tree-sitter query files
			
 
				-├── legacy_code_rewrite/      # Historical extraction documentation
			
 
				-├── API_REFERENCE.md          # Complete API documentation
			
 
				-├── ARCHITECTURE.md           # Technical architecture
			
 
				-├── EXAMPLES.md               # Usage examples
			
 
				-├── INTEGRATION.md            # Integration with Kilocode
			
 
				-└── LICENSE                   # Apache 2.0 license
			
 
				-```
			
 
				-
			
 
				-## Architecture
			
 
				-
			
 
				-### High-Level Architecture
			
 
				-
			
 
				-```
			
 
				-┌─────────────────────────────────────────┐
			
 
				-│         IDE Integration Layer           │
			
 
				-│  (VSCode, JetBrains, or Custom IDE)     │
			
 
				-└─────────────┬───────────────────────────┘
			
 
				-              │
			
 
				-              ├──────────────────┬─────────────────┐
			
 
				-              │                  │                 │
			
 
				-┌─────────────▼──────────┐  ┌───▼────────────┐  ┌▼─────────────┐
			
 
				-│  CompletionProvider    │  │ NextEditProvider│  │ MinimalConfig│
			
 
				-│  (Autocomplete)        │  │ (NextEdit)      │  │ Provider     │
			
 
				-└────────┬───────────────┘  └────┬───────────┘  └──────────────┘
			
 
				-         │                        │
			
 
				-         │  ┌─────────────────────┤
			
 
				-         │  │                     │
			
 
				-    ┌────▼──▼────┐          ┌────▼──────────┐
			
 
				-    │  LLM Layer │          │ Tree-sitter   │
			
 
				-    │  (OpenAI,  │          │ (AST parsing) │
			
 
				-    │   Ollama)  │          └───────────────┘
			
 
				-    └────────────┘
			
 
				-```
			
 
				-
			
 
				-### Core Components
			
 
				-
			
 
				-- **[`CompletionProvider`](core/autocomplete/CompletionProvider.ts)**: Main autocomplete engine
			
 
				-- **[`NextEditProvider`](core/nextEdit/NextEditProvider.ts)**: NextEdit prediction engine
			
 
				-- **[`MinimalConfigProvider`](core/autocomplete/MinimalConfig.ts)**: Configuration management
			
 
				-- **IDE Interface**: Abstraction for editor integration
			
 
				-- **ILLM Interface**: Abstraction for LLM providers
			
 
				-
			
 
				-## Documentation
			
 
				-
			
 
				-For detailed usage and API information:
			
 
				-
			
 
				-- **[`API_REFERENCE.md`](API_REFERENCE.md)** - Complete API documentation with method signatures and parameters
			
 
				-- **[`EXAMPLES.md`](EXAMPLES.md)** - Practical code examples for common use cases
			
 
				-- **[`ARCHITECTURE.md`](ARCHITECTURE.md)** - Technical architecture and design decisions
			
 
				-- **[`INTEGRATION.md`](INTEGRATION.md)** - Integration with Kilocode monorepo
			
 
				-
			
 
				-The VSCode test harness at [`core/vscode-test-harness/`](core/vscode-test-harness/) provides a complete working integration example with 86 tests.
			
 
				-
			
 
				-## Current State
			
 
				-
			
 
				-### What's Included ✅
			
 
				-
			
 
				-- **Autocomplete**: Full tab autocomplete with context gathering, filtering, and LLM generation
			
 
				-- **NextEdit**: Multi-location edit prediction with diff calculation
			
 
				-- **LLM Integrations**: OpenAI, Anthropic, Gemini, Azure, Bedrock, and 15+ other providers
			
 
				-- **Tree-sitter**: Multi-language syntax parsing for context extraction
			
 
				-- **Test Harness**: VSCode integration example with 86 integration tests
			
 
				-- **All Dependencies**: fetch, diff utilities, security checks, logging, analytics
			
 
				-
			
 
				-### What's Removed ❌
			
 
				-
			
 
				-- GUI/Webview interface
			
 
				-- Chat functionality
			
 
				-- Agent/tool system
			
 
				-- CLI extension
			
 
				-- IntelliJ extension
			
 
				-- Documentation site
			
 
				-- Media assets
			
 
				-- Deployment scripts
			
 
				-- Complex YAML configuration (replaced with minimal config)
			
 
				-- Package monorepo structure (consolidated to single root)
			
 
				-
			
 
				-### Statistics
			
 
				-
			
 
				-- **857 passing tests** (autocomplete, NextEdit, integrations, dependencies)
			
 
				-- **TypeScript compiles cleanly** (zero type errors)
			
 
				-- **~50,000 lines of code** (down from ~112,000 - 55% reduction)
			
 
				-- **Single test framework** (Vitest only, Jest removed)
			
 
				-- **Consolidated structure** (all code in `core/`, no separate packages)
			
 
				-
			
 
				-## Testing
			
 
				-
			
 
				-Tests for this library are integrated into Kilocode's test suite. See [`INTEGRATION.md`](INTEGRATION.md) for details on running tests within the Kilocode project.
			
 
				-
			
 
				-The library includes comprehensive test coverage:
			
 
				-
			
 
				-- **Autocomplete tests**: Context gathering, filtering, generation, templating, caching
			
 
				-- **NextEdit tests**: Edit prediction, diff calculation, template engines, history tracking
			
 
				-- **Diff tests**: Myers algorithm, streaming diffs, line matching
			
 
				-- **Integration tests**: VSCode test harness with real-world usage scenarios
			
 
				-- **Dependency tests**: LRU cache, tree-sitter parsing, security filtering
			
 
				-
			
 
				-## Integration into Your IDE
			
 
				-
			
 
				-To integrate this library into your own IDE:
			
 
				-
			
 
				-1. Implement the [`IDE` interface](core/index.d.ts)
			
 
				-2. Create [`CompletionProvider`](core/autocomplete/CompletionProvider.ts) instance
			
 
				-3. Create [`NextEditProvider`](core/nextEdit/NextEditProvider.ts) instance (optional)
			
 
				-4. See [`core/vscode-test-harness/`](core/vscode-test-harness/) for a complete VSCode example
			
 
				-
			
 
				-## License & Credits
			
 
				-
			
 
				-This library is extracted from [Continue](https://github.com/continuedev/continue), an open-source AI code assistant.
			
 
				-
			
 
				-**Original Project**: https://github.com/continuedev/continue  
			
 
				-**Original Authors**: Continue Dev, Inc  
			
 
				-**License**: Apache-2.0
			
 
				-
			
 
				-### Attribution
			
 
				-
			
 
				-The autocomplete and NextEdit functionality in this library was developed by the Continue team. This extraction preserves the original code structure and functionality while providing a minimal, reusable library.
			
 
				-
			
 
				-Key contributors to the original Continue project:
			
 
				-
			
 
				-- The Continue team and community
			
 
				-- See the original repository for full contributor list
			
 
				-
			
 
				-### Changes in This Version
			
 
				-
			
 
				-This version removes everything except autocomplete and NextEdit:
			
 
				-
			
 
				-- ❌ **Removed**: GUI, chat interface, agents, tools, CLI, IntelliJ extension
			
 
				-- ❌ **Removed**: Documentation site, media files, deployment scripts
			
 
				-- ❌ **Removed**: Complex YAML config system (replaced with minimal config)
			
 
				-- ❌ **Removed**: Package monorepo structure (consolidated into single root)
			
 
				-- ✅ **Retained**: All autocomplete functionality with comprehensive test coverage
			
 
				-- ✅ **Retained**: NextEdit feature with full test coverage
			
 
				-- ✅ **Retained**: LLM integrations (OpenAI, Anthropic, Gemini, etc.)
			
 
				-- ✅ **Retained**: Tree-sitter parsing and context gathering
			
 
				-
			
 
				-## Documentation
			
 
				-
			
 
				-- **[`README.md`](README.md)** - This file - Overview and integration
			
 
				-- **[`INTEGRATION.md`](INTEGRATION.md)** - Integration with Kilocode monorepo
			
 
				-- **[`ARCHITECTURE.md`](ARCHITECTURE.md)** - Technical architecture details
			
 
				-- **[`API_REFERENCE.md`](API_REFERENCE.md)** - Complete API documentation
			
 
				-- **[`EXAMPLES.md`](EXAMPLES.md)** - Comprehensive usage examples
			
 
				-- **[`legacy_code_rewrite/`](legacy_code_rewrite/)** - Historical extraction documentation (49 files)
			
 
				-
			
 
				-## Links
			
 
				-
			
 
				-- **Kilocode Project**: https://github.com/kilocode/kilocode
			
 
				-- **Original Continue Project**: https://github.com/continuedev/continue
			
 
				-- **Continue Documentation**: https://docs.continue.dev
			
 
				-- **Continue Discord**: https://discord.gg/continue
			
 
				-
			
 
				-## Support
			
 
				-
			
 
				-For questions about this library:
			
 
				-
			
 
				-- Check the [`ARCHITECTURE.md`](ARCHITECTURE.md) for technical details
			
 
				-- Review [`EXAMPLES.md`](EXAMPLES.md) for usage patterns
			
 
				-- Examine the test harness in [`core/vscode-test-harness/`](core/vscode-test-harness/)
			
 
				-- See [`INTEGRATION.md`](INTEGRATION.md) for Kilocode integration details
			
 
				-
			
 
				-For questions about the original Continue project:
			
 
				-
			
 
				-- Visit https://docs.continue.dev
			
 
				-- Join the Discord: https://discord.gg/continue
			
 
				-- Open an issue: https://github.com/continuedev/continue/issues
			
--- a/src/services/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/typescript/classMethods.ts
+++ b/src/services/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/typescript/classMethods.ts
@@ -1,35 +0,0 @@
 
				-// @ts-nocheck
			
 
				-
			
 
				-class Group {
			
 
				-  getPersonAddress(person: Person): Address {
			
 
				-    // TODO
			
 
				-  }
			
 
				-
			
 
				-  getHardcodedAddress(): Address {
			
 
				-    // TODO
			
 
				-  }
			
 
				-
			
 
				-  addPerson(person: Person) {
			
 
				-    // TODO
			
 
				-  }
			
 
				-
			
 
				-  addPeople(people: Person[]) {
			
 
				-    // TODO
			
 
				-  }
			
 
				-
			
 
				-  getAddresses(people: Person[]): Address[] {
			
 
				-    // TODO
			
 
				-  }
			
 
				-
			
 
				-  logPersonWithAddress(person: Person<Address>): Person<Address> {
			
 
				-    // TODO
			
 
				-  }
			
 
				-
			
 
				-  logPersonOrAddress(person: Person | Address): Person | Address {
			
 
				-    // TODO
			
 
				-  }
			
 
				-
			
 
				-  logPersonAndAddress(person: Person, address: Address) {
			
 
				-    // TODO
			
 
				-  }
			
 
				-}
			
--- a/src/services/continuedev/core/diff/test-examples/README.md
+++ b/src/services/continuedev/core/diff/test-examples/README.md
@@ -1,23 +0,0 @@
 
				-# Diff algorithm tests
			
 
				-
			
 
				-Tests are specified as
			
 
				-
			
 
				-```
			
 
				-<CODE BEFORE>
			
 
				-
			
 
				----
			
 
				-
			
 
				-<CODE AFTER>
			
 
				-
			
 
				----
			
 
				-
			
 
				-<EXPECTED DIFF>
			
 
				-```
			
 
				-
			
 
				-`---` is the delimeter, and surrounding whitespace will be trimmed.
			
 
				-
			
 
				-The expected diff can be generated with the `displayDiff` function.
			
 
				-
			
 
				-We make this explicit instead of comparing to the output of `myersDiff` in case the output from that is either unattainable or not exactly what we want.
			
 
				-
			
 
				-In order to generate the expected diff, you can first leave it empty and then run the test. The test will catch this and write the _computed_ diff to the test file. It is up to you to correct this to the expected diff.
			
--- a/src/services/continuedev/core/indexing/README.md
+++ b/src/services/continuedev/core/indexing/README.md
@@ -1,30 +0,0 @@
 
				-# Indexing
			
 
				-
			
 
				-Continue uses a tagging system along with content addressing to ensure that nothing needs to be indexed twice. When you change branches, Continue will only re-index the files that are newly modified and that we don't already have a copy of. This system can be used across many different "artifacts" just by implementing the `CodebaseIndex` class.
			
 
				-
			
 
				-_artifact_: something that is generated by indexing and then saved to be used later (e.g. emeddings, full-text search index, or a table of top-level code snippets in each file)
			
 
				-
			
 
				-_cacheKey_: a key that determines whether two files can be considered the same to avoid re-indexing (always hash of file contents at this point)
			
 
				-
			
 
				-_`CodebaseIndex`_: a class that makes it easy to use the indexing system to help you generate a new artifact
			
 
				-
			
 
				-The indexing process does the following:
			
 
				-
			
 
				-1. Check the modified timestamps of all files in the repo (this may seem extreme, but checking timestamps is significantly faster than actually reading a file. Git does the same thing.)
			
 
				-2. Compare these to a "catalog" (stored in SQLite) of the last time that we indexed each of these files to get a list of files to "add" or "remove". If the file exists in the repo but not in the catalog, then we must "add" the file. If it exists in the catalog but not the repo, we must "remove" the file. If it exists in both and was modified after last indexed, then we must update the file. In this case we also add it to the "add" list.
			
 
				-3. For each file to "add", check whether it was indexed on another branch. Here we use a SQLite table that acts as a cache for indexed files. If we find an entry in this table for a file with the same cacheKey, then we only need to add a tag to this entry for the current branch ("addTag"). Otherwise, we must "compute" the artifact.
			
 
				-4. For each file in "remove", check whether it was indexed on another branch. If we find only one entry with the same cacheKey (presumably this should be the entry for the current branch, or something has gone wrong), then this entry should be removed and there will be no more branches that need the artifact, so we want to "delete" it. If there is more than one tag on this artifact, then we should just remove the tag for this branch ("removeTag").
			
 
				-5. After having calculated these four lists of files ("compute", "delete", "addTag", "removeTag"), we pass them to the `CodebaseIndex` so that it can update whatever index-specific storage it might have. Many of them use SQLite and/or LanceDB. The `CodebaseIndex` implements a method called "update" that accepts the four lists and yields progress updates as it iterates over the lists. These progress updates are used to officially mark a file as having been indexed, so that if the extension is closed mid-indexing we don't falsely record progress.
			
 
				-
			
 
				-## Existing `CodebaseIndex`es
			
 
				-
			
 
				-All indexes must be returned by `getIndexesToBuild` in [`CodebaseIndexer.ts`](./CodebaseIndexer.ts) if they are to be used.
			
 
				-
			
 
				-`CodeSnippetsCodebaseIndex`: uses tree-sitter queries to get a list of functions, classes, and other top-level code objects in each file
			
 
				-`FullTextSearchCodebaseIndex`: creates a full-text search index using SQLite FTS5
			
 
				-`ChunkCodebaseIndex`: chunks files recursively by code structure, for use in other embeddings providers like `LanceDbIndex`
			
 
				-`LanceDbIndex`: calculates embeddings for each chunk and adds them to the LanceDB vector database, with metadata going into SQLite. Note that for each branch, a unique table is created in LanceDB.
			
 
				-
			
 
				-## Known problems
			
 
				-
			
 
				-- `FullTextSearchCodebaseIndex` doesn't differentiate between tags (branch, repo), so results may come from any branch/repo. LanceDB does this by creating separate tables for each tag (see `tableNameForTag`). The chunk index does this with a second table
			
--- a/src/services/ghost/GhostJetbrainsBridge.ts
+++ b/src/services/ghost/GhostJetbrainsBridge.ts
@@ -4,7 +4,7 @@ import { z } from "zod"
 
				 import { GhostServiceManager } from "./GhostServiceManager"
			
 
				 import { ClineProvider } from "../../core/webview/ClineProvider"
			
 
				 import { getKiloCodeWrapperProperties } from "../../core/kilocode/wrapper"
			
 
				-import { languageForFilepath } from "../continuedev/core/autocomplete/constants/AutocompleteLanguageInfo"
			
 
				+import { languageForFilepath } from "./continuedev/core/autocomplete/constants/AutocompleteLanguageInfo"
			
 
				 import { GhostContextProvider } from "./types"
			
 
				 import { FimPromptBuilder } from "./classic-auto-complete/FillInTheMiddle"
			
 
				 import { HoleFiller } from "./classic-auto-complete/HoleFiller"
			
--- a/src/services/ghost/chat-autocomplete/ChatTextAreaAutocomplete.ts
+++ b/src/services/ghost/chat-autocomplete/ChatTextAreaAutocomplete.ts
@@ -2,7 +2,7 @@ import * as vscode from "vscode"
 
				 import { GhostModel } from "../GhostModel"
			
 
				 import { ProviderSettingsManager } from "../../../core/config/ProviderSettingsManager"
			
 
				 import { AutocompleteContext, VisibleCodeContext } from "../types"
			
 
				-import { removePrefixOverlap } from "../../continuedev/core/autocomplete/postprocessing/removePrefixOverlap.js"
			
 
				+import { removePrefixOverlap } from "../continuedev/core/autocomplete/postprocessing/removePrefixOverlap.js"
			
 
				 import { AutocompleteTelemetry } from "../classic-auto-complete/AutocompleteTelemetry"
			
 
				 import { postprocessGhostSuggestion } from "../classic-auto-complete/uselessSuggestionFilter"
			
 
				 
			
--- a/src/services/ghost/classic-auto-complete/FillInTheMiddle.ts
+++ b/src/services/ghost/classic-auto-complete/FillInTheMiddle.ts
@@ -6,7 +6,7 @@ import {
 
				 	FillInAtCursorSuggestion,
			
 
				 } from "../types"
			
 
				 import { getProcessedSnippets } from "./getProcessedSnippets"
			
 
				-import { getTemplateForModel } from "../../continuedev/core/autocomplete/templating/AutocompleteTemplate"
			
 
				+import { getTemplateForModel } from "../continuedev/core/autocomplete/templating/AutocompleteTemplate"
			
 
				 import { GhostModel } from "../GhostModel"
			
 
				 
			
 
				 export type { FimGhostPrompt, FimCompletionResult }
			
--- a/src/services/ghost/classic-auto-complete/GhostInlineCompletionProvider.ts
+++ b/src/services/ghost/classic-auto-complete/GhostInlineCompletionProvider.ts
@@ -16,10 +16,10 @@ import {
 
				 import { HoleFiller } from "./HoleFiller"
			
 
				 import { FimPromptBuilder } from "./FillInTheMiddle"
			
 
				 import { GhostModel } from "../GhostModel"
			
 
				-import { ContextRetrievalService } from "../../continuedev/core/autocomplete/context/ContextRetrievalService"
			
 
				-import { VsCodeIde } from "../../continuedev/core/vscode-test-harness/src/VSCodeIde"
			
 
				-import { RecentlyVisitedRangesService } from "../../continuedev/core/vscode-test-harness/src/autocomplete/RecentlyVisitedRangesService"
			
 
				-import { RecentlyEditedTracker } from "../../continuedev/core/vscode-test-harness/src/autocomplete/recentlyEdited"
			
 
				+import { ContextRetrievalService } from "../continuedev/core/autocomplete/context/ContextRetrievalService"
			
 
				+import { VsCodeIde } from "../continuedev/core/vscode-test-harness/src/VSCodeIde"
			
 
				+import { RecentlyVisitedRangesService } from "../continuedev/core/vscode-test-harness/src/autocomplete/RecentlyVisitedRangesService"
			
 
				+import { RecentlyEditedTracker } from "../continuedev/core/vscode-test-harness/src/autocomplete/recentlyEdited"
			
 
				 import type { GhostServiceSettings } from "@roo-code/types"
			
 
				 import { postprocessGhostSuggestion } from "./uselessSuggestionFilter"
			
 
				 import { shouldSkipAutocomplete } from "./contextualSkip"
			
--- a/src/services/ghost/classic-auto-complete/HoleFiller.ts
+++ b/src/services/ghost/classic-auto-complete/HoleFiller.ts
@@ -6,7 +6,7 @@ import {
 
				 	ChatCompletionResult,
			
 
				 } from "../types"
			
 
				 import { getProcessedSnippets } from "./getProcessedSnippets"
			
 
				-import { formatSnippets } from "../../continuedev/core/autocomplete/templating/formatting"
			
 
				+import { formatSnippets } from "../continuedev/core/autocomplete/templating/formatting"
			
 
				 import { GhostModel } from "../GhostModel"
			
 
				 import { ApiStreamChunk } from "../../../api/transform/stream"
			
 
				 
			
--- a/src/services/ghost/classic-auto-complete/__tests__/GhostContextProvider.test.ts
+++ b/src/services/ghost/classic-auto-complete/__tests__/GhostContextProvider.test.ts
@@ -1,12 +1,12 @@
 
				 import { describe, it, expect, beforeEach, vi } from "vitest"
			
 
				 import { getProcessedSnippets } from "../getProcessedSnippets"
			
 
				 import { AutocompleteInput, GhostContextProvider } from "../../types"
			
 
				-import { AutocompleteSnippetType } from "../../../continuedev/core/autocomplete/types"
			
 
				+import { AutocompleteSnippetType } from "../../continuedev/core/autocomplete/types"
			
 
				 import { GhostModel } from "../../GhostModel"
			
 
				 import { RooIgnoreController } from "../../../../core/ignore/RooIgnoreController"
			
 
				 import crypto from "crypto"
			
 
				-import { ContextRetrievalService } from "../../../continuedev/core/autocomplete/context/ContextRetrievalService"
			
 
				-import { VsCodeIde } from "../../../continuedev/core/vscode-test-harness/src/VSCodeIde"
			
 
				+import { ContextRetrievalService } from "../../continuedev/core/autocomplete/context/ContextRetrievalService"
			
 
				+import { VsCodeIde } from "../../continuedev/core/vscode-test-harness/src/VSCodeIde"
			
 
				 
			
 
				 vi.mock("vscode", () => ({
			
 
				 	Uri: {
			
@@ -28,19 +28,19 @@ vi.mock("vscode", () => ({
 
				 	},
			
 
				 }))
			
 
				 
			
 
				-vi.mock("../../../continuedev/core/autocomplete/context/ContextRetrievalService", () => ({
			
 
				+vi.mock("../../continuedev/core/autocomplete/context/ContextRetrievalService", () => ({
			
 
				 	ContextRetrievalService: vi.fn().mockImplementation(() => ({
			
 
				 		initializeForFile: vi.fn().mockResolvedValue(undefined),
			
 
				 	})),
			
 
				 }))
			
 
				 
			
 
				-vi.mock("../../../continuedev/core/vscode-test-harness/src/VSCodeIde", () => ({
			
 
				+vi.mock("../../continuedev/core/vscode-test-harness/src/VSCodeIde", () => ({
			
 
				 	VsCodeIde: vi.fn().mockImplementation(() => ({
			
 
				 		getWorkspaceDirs: vi.fn().mockResolvedValue(["file:///workspace"]),
			
 
				 	})),
			
 
				 }))
			
 
				 
			
 
				-vi.mock("../../../continuedev/core/autocomplete/util/HelperVars", () => ({
			
 
				+vi.mock("../../continuedev/core/autocomplete/util/HelperVars", () => ({
			
 
				 	HelperVars: {
			
 
				 		create: vi.fn().mockResolvedValue({
			
 
				 			filepath: "file:///test.ts",
			
@@ -49,7 +49,7 @@ vi.mock("../../../continuedev/core/autocomplete/util/HelperVars", () => ({
 
				 	},
			
 
				 }))
			
 
				 
			
 
				-vi.mock("../../../continuedev/core/autocomplete/snippets/getAllSnippets", () => ({
			
 
				+vi.mock("../../continuedev/core/autocomplete/snippets/getAllSnippets", () => ({
			
 
				 	getAllSnippetsWithoutRace: vi.fn().mockResolvedValue({
			
 
				 		recentlyOpenedFileSnippets: [],
			
 
				 		importDefinitionSnippets: [],
			
@@ -63,7 +63,7 @@ vi.mock("../../../continuedev/core/autocomplete/snippets/getAllSnippets", () =>
 
				 	}),
			
 
				 }))
			
 
				 
			
 
				-vi.mock("../../../continuedev/core/autocomplete/templating/filtering", () => ({
			
 
				+vi.mock("../../continuedev/core/autocomplete/templating/filtering", () => ({
			
 
				 	getSnippets: vi
			
 
				 		.fn()
			
 
				 		.mockImplementation((_helper, payload) => [
			
@@ -126,7 +126,7 @@ describe("GhostContextProvider", () => {
 
				 
			
 
				 		it("should return processed snippets when snippets are available", async () => {
			
 
				 			const { getAllSnippetsWithoutRace } = await import(
			
 
				-				"../../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				+				"../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				 			)
			
 
				 
			
 
				 			;(getAllSnippetsWithoutRace as any).mockResolvedValueOnce({
			
@@ -167,7 +167,7 @@ describe("GhostContextProvider", () => {
 
				 
			
 
				 		it("should process multiple snippets correctly", async () => {
			
 
				 			const { getAllSnippetsWithoutRace } = await import(
			
 
				-				"../../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				+				"../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				 			)
			
 
				 
			
 
				 			;(getAllSnippetsWithoutRace as any).mockResolvedValueOnce({
			
@@ -219,7 +219,7 @@ describe("GhostContextProvider", () => {
 
				 
			
 
				 		it("should propagate errors from getAllSnippetsWithoutRace", async () => {
			
 
				 			const { getAllSnippetsWithoutRace } = await import(
			
 
				-				"../../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				+				"../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				 			)
			
 
				 
			
 
				 			;(getAllSnippetsWithoutRace as any).mockRejectedValueOnce(new Error("Test error"))
			
@@ -264,7 +264,7 @@ describe("GhostContextProvider", () => {
 
				 
			
 
				 		it("should filter out blocked files", async () => {
			
 
				 			const { getAllSnippetsWithoutRace } = await import(
			
 
				-				"../../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				+				"../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				 			)
			
 
				 
			
 
				 			// Mock validateAccess to block /blocked.ts
			
@@ -317,7 +317,7 @@ describe("GhostContextProvider", () => {
 
				 
			
 
				 		it("should keep snippets without file paths", async () => {
			
 
				 			const { getAllSnippetsWithoutRace } = await import(
			
 
				-				"../../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				+				"../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				 			)
			
 
				 
			
 
				 			const controller = await mockIgnoreController!
			
@@ -351,7 +351,7 @@ describe("GhostContextProvider", () => {
 
				 				staticSnippet: [],
			
 
				 			})
			
 
				 
			
 
				-			const { getSnippets } = await import("../../../continuedev/core/autocomplete/templating/filtering")
			
 
				+			const { getSnippets } = await import("../../continuedev/core/autocomplete/templating/filtering")
			
 
				 			;(getSnippets as any).mockImplementation((_helper: any, payload: any) => [
			
 
				 				...payload.recentlyOpenedFileSnippets,
			
 
				 				...payload.diffSnippets,
			
@@ -392,7 +392,7 @@ describe("GhostContextProvider", () => {
 
				 			}
			
 
				 
			
 
				 			const { getAllSnippetsWithoutRace } = await import(
			
 
				-				"../../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				+				"../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				 			)
			
 
				 
			
 
				 			;(getAllSnippetsWithoutRace as any).mockResolvedValueOnce({
			
--- a/src/services/ghost/classic-auto-complete/__tests__/HoleFiller.test.ts
+++ b/src/services/ghost/classic-auto-complete/__tests__/HoleFiller.test.ts
@@ -2,7 +2,7 @@ import { describe, it, expect, beforeEach, vi } from "vitest"
 
				 import { HoleFiller, parseGhostResponse } from "../HoleFiller"
			
 
				 import { AutocompleteInput, GhostContextProvider } from "../../types"
			
 
				 import crypto from "crypto"
			
 
				-import { AutocompleteSnippetType } from "../../../continuedev/core/autocomplete/types"
			
 
				+import { AutocompleteSnippetType } from "../../continuedev/core/autocomplete/types"
			
 
				 
			
 
				 // Mock the getProcessedSnippets module
			
 
				 vi.mock("../getProcessedSnippets", () => ({
			
--- a/src/services/ghost/classic-auto-complete/getProcessedSnippets.ts
+++ b/src/services/ghost/classic-auto-complete/getProcessedSnippets.ts
@@ -1,15 +1,15 @@
 
				 import * as vscode from "vscode"
			
 
				-import { ContextRetrievalService } from "../../continuedev/core/autocomplete/context/ContextRetrievalService"
			
 
				-import { VsCodeIde } from "../../continuedev/core/vscode-test-harness/src/VSCodeIde"
			
 
				+import { ContextRetrievalService } from "../continuedev/core/autocomplete/context/ContextRetrievalService"
			
 
				+import { VsCodeIde } from "../continuedev/core/vscode-test-harness/src/VSCodeIde"
			
 
				 import { AutocompleteInput } from "../types"
			
 
				-import { HelperVars } from "../../continuedev/core/autocomplete/util/HelperVars"
			
 
				-import { getAllSnippetsWithoutRace } from "../../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				-import { getDefinitionsFromLsp } from "../../continuedev/core/vscode-test-harness/src/autocomplete/lsp"
			
 
				-import { DEFAULT_AUTOCOMPLETE_OPTS } from "../../continuedev/core/util/parameters"
			
 
				-import { getSnippets } from "../../continuedev/core/autocomplete/templating/filtering"
			
 
				+import { HelperVars } from "../continuedev/core/autocomplete/util/HelperVars"
			
 
				+import { getAllSnippetsWithoutRace } from "../continuedev/core/autocomplete/snippets/getAllSnippets"
			
 
				+import { getDefinitionsFromLsp } from "../continuedev/core/vscode-test-harness/src/autocomplete/lsp"
			
 
				+import { DEFAULT_AUTOCOMPLETE_OPTS } from "../continuedev/core/util/parameters"
			
 
				+import { getSnippets } from "../continuedev/core/autocomplete/templating/filtering"
			
 
				 import { GhostModel } from "../GhostModel"
			
 
				 import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController"
			
 
				-import { AutocompleteSnippet, AutocompleteSnippetType } from "../../continuedev/core/autocomplete/types"
			
 
				+import { AutocompleteSnippet, AutocompleteSnippetType } from "../continuedev/core/autocomplete/types"
			
 
				 
			
 
				 function uriToFsPath(filepath: string): string {
			
 
				 	if (filepath.startsWith("file://")) {
			
--- a/src/services/ghost/classic-auto-complete/uselessSuggestionFilter.ts
+++ b/src/services/ghost/classic-auto-complete/uselessSuggestionFilter.ts
@@ -1,4 +1,4 @@
 
				-import { postprocessCompletion } from "../../continuedev/core/autocomplete/postprocessing/index.js"
			
 
				+import { postprocessCompletion } from "../continuedev/core/autocomplete/postprocessing/index.js"
			
 
				 import { applyLanguageFilter } from "./language-filters"
			
 
				 
			
 
				 export type AutocompleteSuggestion = {
			
--- a/src/services/ghost/context/VisibleCodeTracker.ts
+++ b/src/services/ghost/context/VisibleCodeTracker.ts
@@ -11,7 +11,7 @@
 
				 import * as vscode from "vscode"
			
 
				 
			
 
				 import { toRelativePath } from "../../../utils/path"
			
 
				-import { isSecurityConcern } from "../../continuedev/core/indexing/ignore"
			
 
				+import { isSecurityConcern } from "../continuedev/core/indexing/ignore"
			
 
				 import type { RooIgnoreController } from "../../../core/ignore/RooIgnoreController"
			
 
				 
			
 
				 import { VisibleCodeContext, VisibleEditorInfo, VisibleRange, DiffInfo } from "../types"
			
--- a/src/services/ghost/context/__tests__/VisibleCodeTracker.spec.ts
+++ b/src/services/ghost/context/__tests__/VisibleCodeTracker.spec.ts
@@ -10,7 +10,7 @@ vi.mock("vscode", () => ({
 
				 	},
			
 
				 }))
			
 
				 
			
 
				-vi.mock("../../../../services/continuedev/core/indexing/ignore", () => ({
			
 
				+vi.mock("../../../../services/ghost/continuedev/core/indexing/ignore", () => ({
			
 
				 	isSecurityConcern: vi.fn((filePath: string) => {
			
 
				 		return filePath.includes(".env") || filePath.includes("credentials")
			
 
				 	}),
			
--- a/src/services/ghost/continuedev/.gitignore
+++ b/src/services/ghost/continuedev/.gitignore
--- a/src/services/ghost/continuedev/API_REFERENCE.md
+++ b/src/services/ghost/continuedev/API_REFERENCE.md
--- a/src/services/ghost/continuedev/ARCHITECTURE.md
+++ b/src/services/ghost/continuedev/ARCHITECTURE.md
--- a/src/services/ghost/continuedev/EXAMPLES.md
+++ b/src/services/ghost/continuedev/EXAMPLES.md
--- a/src/services/ghost/continuedev/INTEGRATION.md
+++ b/src/services/ghost/continuedev/INTEGRATION.md
--- a/src/services/ghost/continuedev/core/autocomplete/CompletionProvider.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/CompletionProvider.ts
--- a/src/services/ghost/continuedev/core/autocomplete/MinimalConfig.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/MinimalConfig.ts
--- a/src/services/ghost/continuedev/core/autocomplete/classification/shouldCompleteMultiline.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/classification/shouldCompleteMultiline.ts
--- a/src/services/ghost/continuedev/core/autocomplete/constants/AutocompleteLanguageInfo.test.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/constants/AutocompleteLanguageInfo.test.ts
--- a/src/services/ghost/continuedev/core/autocomplete/constants/AutocompleteLanguageInfo.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/constants/AutocompleteLanguageInfo.ts
--- a/src/services/ghost/continuedev/core/autocomplete/context/ContextRetrievalService.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/context/ContextRetrievalService.ts
--- a/src/services/ghost/continuedev/core/autocomplete/context/ImportDefinitionsService.test.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/context/ImportDefinitionsService.test.ts
--- a/src/services/ghost/continuedev/core/autocomplete/context/ImportDefinitionsService.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/context/ImportDefinitionsService.ts
--- a/src/services/ghost/continuedev/core/autocomplete/context/ranking/index.test.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/context/ranking/index.test.ts
--- a/src/services/ghost/continuedev/core/autocomplete/context/ranking/index.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/context/ranking/index.ts
--- a/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/RootPathContextService.test.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/RootPathContextService.test.ts
--- a/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/RootPathContextService.ts
+++ b/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/RootPathContextService.ts
--- a/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/__init__.py
+++ b/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/__init__.py
--- a/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/base_module.py
+++ b/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/base_module.py
--- a/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/file1.go
+++ b/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/file1.go
--- a/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/file1.php
+++ b/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/file1.php
--- a/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/python/classes.py
+++ b/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/python/classes.py
--- a/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/python/functions.py
+++ b/src/services/ghost/continuedev/core/autocomplete/context/root-path-context/__fixtures__/files/python/functions.py