19 часов назад · 252ff650c8
--- a/.changeset/add-slovak-translation.md
+++ b/.changeset/add-slovak-translation.md
@@ -1,5 +0,0 @@
 
				----
			
 
				-"kilo-code": patch
			
 
				----
			
 
				-
			
 
				-Add Slovak (sk) language translation for Kilo Code extension and UI
			
--- a/.changeset/fifty-baboons-shine.md
+++ b/.changeset/fifty-baboons-shine.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Updates some visual bugs in Agent Behaviour settings page
			
--- a/.changeset/filter-internal-verification-tags.md
+++ b/.changeset/filter-internal-verification-tags.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Filter internal verification tags from assistant messages before displaying to users
			
--- a/.changeset/fix-context-flickering.md
+++ b/.changeset/fix-context-flickering.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+fix: prevent context token indicator flickering
			
--- a/.changeset/fix-model-no-tools-used.md
+++ b/.changeset/fix-model-no-tools-used.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix recurring MODEL_NO_TOOLS_USED error loop by detecting text-based tool call hallucinations and instructing the model to use the native API.
			
--- a/.changeset/gentle-laws-allow.md
+++ b/.changeset/gentle-laws-allow.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+feat: support preserving reasoning content in OpenAI format conversion
			
--- a/.changeset/kill-command-fix.md
+++ b/.changeset/kill-command-fix.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix: "Kill Command" button now reliably terminates processes on all platforms, including those running in the background.
			
--- a/.changeset/lucky-lands-tickle.md
+++ b/.changeset/lucky-lands-tickle.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+fix(nano-gpt): Add native reasoning field extraction
			
--- a/.changeset/old-planes-start.md
+++ b/.changeset/old-planes-start.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Support custom embed dimensions for Ollama provider
			
--- a/.changeset/persist-deleted-api-costs.md
+++ b/.changeset/persist-deleted-api-costs.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix: Persist total API cost after message deletion
			
--- a/.changeset/thin-forks-draw.md
+++ b/.changeset/thin-forks-draw.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix tool use failure for providers returning numeric tool call IDs (e.g. MiniMax) by coercing ID to string in the shared stream parser
			
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -65,5 +65,5 @@ If you're creating a completely new file that doesn't exist in Roo, add this com
 
				 - all the following folders are kilocode-specific and need no marking with comments:
			
 
				     - jetbrains/
			
 
				     - cli/
			
 
				-    - src/services/ghost/
			
 
				+    - src/services/autocomplete/
			
 
				     - src/services/continuedev/
			
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -180,7 +180,7 @@ Code in these directories is Kilo Code-specific and doesn't need markers:
 
				 - `jetbrains/` - JetBrains plugin
			
 
				 - `agent-manager/` directories
			
 
				 - Any path containing `kilocode` in filename or directory name
			
 
				-- `src/services/ghost/` - Ghost service
			
 
				+- `src/services/autocomplete/ - Autocomplete service
			
 
				 
			
 
				 ### When markers ARE needed
			
 
				 
			
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,59 @@
 
				 # kilo-code
			
 
				 
			
 
				+## 5.7.0
			
 
				+
			
 
				+### Minor Changes
			
 
				+
			
 
				+- [#4768](https://github.com/Kilo-Org/kilocode/pull/4768) [`626f18a`](https://github.com/Kilo-Org/kilocode/commit/626f18a91fde30b9a303708b3c42897aa91bcd98) Thanks [@hsp-sz](https://github.com/hsp-sz)! - feat: add Zenmux provider
			
 
				+
			
 
				+### Patch Changes
			
 
				+
			
 
				+- [#4714](https://github.com/Kilo-Org/kilocode/pull/4714) [`69b36b5`](https://github.com/Kilo-Org/kilocode/commit/69b36b537d5a5f6817dbc60567623ffcdfac9acf) Thanks [@otterDeveloper](https://github.com/otterDeveloper)! - feat (fireworks.ai): add minimax 2.1, glm 4.7, updated other models
			
 
				+
			
 
				+- [#4926](https://github.com/Kilo-Org/kilocode/pull/4926) [`079dffd`](https://github.com/Kilo-Org/kilocode/commit/079dffd17e2612ac22f5aaf9430f18363088c4cd) Thanks [@YuriNachos](https://github.com/YuriNachos)! - fix: disable zsh history expansion (#4926)
			
 
				+
			
 
				+- [#5162](https://github.com/Kilo-Org/kilocode/pull/5162) [`cad3c68`](https://github.com/Kilo-Org/kilocode/commit/cad3c688dc2493ef7a750fc47c60db9507da4a9d) Thanks [@hdcodedev](https://github.com/hdcodedev)! - Fix attached images being lost when editing a message with checkpoint
			
 
				+
			
 
				+    When editing a message that has a checkpoint, the images attached to the edited message were not being included in the `editMessageConfirm` webview message. This caused images to be silently dropped and not sent to the backend.
			
 
				+
			
 
				+    The fix adds the `images` field to the message payload in both the checkpoint and non-checkpoint edit confirmation paths.
			
 
				+
			
 
				+    Fixes #3489
			
 
				+
			
 
				+- [#5139](https://github.com/Kilo-Org/kilocode/pull/5139) [`932c692`](https://github.com/Kilo-Org/kilocode/commit/932c692b2f35e7bd4ffa59f74640ab27e984ef2c) Thanks [@naga-k](https://github.com/naga-k)! - Prevent sending thinkingLevel to unsupporting Gemini models
			
 
				+
			
 
				+- [#4945](https://github.com/Kilo-Org/kilocode/pull/4945) [`43bc7ac`](https://github.com/Kilo-Org/kilocode/commit/43bc7acc815d81ba0f775c9e2d7965336c0feb50) Thanks [@CaiDingxian](https://github.com/CaiDingxian)! - feat: add chars count to ListFilesTool
			
 
				+
			
 
				+- [#5805](https://github.com/Kilo-Org/kilocode/pull/5805) [`918f767`](https://github.com/Kilo-Org/kilocode/commit/918f767136cb073a71767d76708da40e25c03f06) Thanks [@Neonsy](https://github.com/Neonsy)! - Add support for GLM 5 and set Z.ai default to `glm-5` and align Z.ai API line model selection in VS Code and webview settings
			
 
				+
			
 
				+## 5.6.0
			
 
				+
			
 
				+### Minor Changes
			
 
				+
			
 
				+- [#5040](https://github.com/Kilo-Org/kilocode/pull/5040) [`abe3047`](https://github.com/Kilo-Org/kilocode/commit/abe30473feffb84e885fc8abd5595033fe8b5431) Thanks [@luthraansh](https://github.com/luthraansh)! - Added Corethink as a new AI provider
			
 
				+
			
 
				+### Patch Changes
			
 
				+
			
 
				+- [#5749](https://github.com/Kilo-Org/kilocode/pull/5749) [`b2fa0a9`](https://github.com/Kilo-Org/kilocode/commit/b2fa0a9b239a396feee39d14eb60eafb088c0ed4) Thanks [@skaldamramra](https://github.com/skaldamramra)! - Add Slovak (sk) language translation for Kilo Code extension and UI
			
 
				+
			
 
				+- [#5681](https://github.com/Kilo-Org/kilocode/pull/5681) [`b5ef707`](https://github.com/Kilo-Org/kilocode/commit/b5ef70717068a791da5c3b3068eadb8e189ff484) Thanks [@Drilmo](https://github.com/Drilmo)! - fix(agent-manager): Fix double scrollbar in mode selector dropdowns
			
 
				+
			
 
				+- [#5722](https://github.com/Kilo-Org/kilocode/pull/5722) [`f7cf4fd`](https://github.com/Kilo-Org/kilocode/commit/f7cf4fd5002b697f1e41e744b01f096e57666acf) Thanks [@Neonsy](https://github.com/Neonsy)! - Improve Chutes Kimi reliability by preventing terminated-stream retry loops and handling tool/reasoning chunks more safely.
			
 
				+
			
 
				+- [#5747](https://github.com/Kilo-Org/kilocode/pull/5747) [`95be119`](https://github.com/Kilo-Org/kilocode/commit/95be1193449184869e49d44b7fe9f09e1620b3ce) Thanks [@Githubguy132010](https://github.com/Githubguy132010)! - Fix JetBrains build failure by adding missing vsix dependency for build pipeline
			
 
				+
			
 
				+- [#5733](https://github.com/Kilo-Org/kilocode/pull/5733) [`1b5c4f4`](https://github.com/Kilo-Org/kilocode/commit/1b5c4f4fab28f03b81a9bdf3cd789b1425108765) Thanks [@krisztian-gajdar](https://github.com/krisztian-gajdar)! - Show loading spinner immediately when opening review scope dialog while scope information is being computed, improving perceived performance for repositories with many changes
			
 
				+
			
 
				+- [#5699](https://github.com/Kilo-Org/kilocode/pull/5699) [`e560e47`](https://github.com/Kilo-Org/kilocode/commit/e560e47e39f605f78a6d18fdbfc0dd680ceb5557) Thanks [@Patel230](https://github.com/Patel230)! - Fix unreadable text and poor contrast issues in Agent Manager
			
 
				+
			
 
				+- [#5722](https://github.com/Kilo-Org/kilocode/pull/5722) [`a834092`](https://github.com/Kilo-Org/kilocode/commit/a8340925c72e9ee0494e1bffd47dbc1aaddc1c8e) Thanks [@Neonsy](https://github.com/Neonsy)! - Fixed Moonshot Kimi tool-calling and thinking-mode behavior for `kimi-k2.5` and `kimi-for-coding`.
			
 
				+
			
 
				+- [#4749](https://github.com/Kilo-Org/kilocode/pull/4749) [`ed70dad`](https://github.com/Kilo-Org/kilocode/commit/ed70dad320a80160dc793bf34f52b87d995285ff) Thanks [@lgrgic](https://github.com/lgrgic)! - Fix 'Delete' toggle button in Auto Approve settings
			
 
				+
			
 
				+- [#5756](https://github.com/Kilo-Org/kilocode/pull/5756) [`5d9d4d1`](https://github.com/Kilo-Org/kilocode/commit/5d9d4d1c4a6236fccf7082ea9e8d83d95bbd207a) Thanks [@bernaferrari](https://github.com/bernaferrari)! - Remove duplicate "Kilo Code Marketplace" title in toolbar (thanks @bernaferrari!)
			
 
				+
			
 
				+- [#3807](https://github.com/Kilo-Org/kilocode/pull/3807) [`e37717e`](https://github.com/Kilo-Org/kilocode/commit/e37717ee2fad8efb53bea92752dd9ea25f79bbed) Thanks [@davidraedev](https://github.com/davidraedev)! - Hook embedding timeout into settings for ollama
			
 
				+
			
 
				 ## 5.5.0
			
 
				 
			
 
				 ### Minor Changes
			
--- a/apps/kilocode-docs/pages/ai-providers/zenmux.md
+++ b/apps/kilocode-docs/pages/ai-providers/zenmux.md
@@ -0,0 +1,197 @@
 
				+---
			
 
				+title: ZenMux
			
 
				+---
			
 
				+
			
 
				+import Codicon from "@site/src/components/Codicon";
			
 
				+
			
 
				+# Using ZenMux With Kilo Code
			
 
				+
			
 
				+[ZenMux](https://zenmux.ai) provides a unified API gateway to access multiple AI models from different providers through a single endpoint. It supports OpenAI, Anthropic, Google, and other major AI providers, automatically handling routing, fallbacks, and cost optimization.
			
 
				+
			
 
				+## Getting Started
			
 
				+
			
 
				+1. **Sign up for ZenMux:** Visit [zenmux.ai](https://zenmux.ai) to create an account.
			
 
				+2. **Get your API key:** After signing up, navigate to your dashboard to generate an API key.
			
 
				+3. **Configure in Kilo Code:** Add your API key to Kilo Code settings.
			
 
				+
			
 
				+## Configuration in Kilo Code
			
 
				+
			
 
				+1. **Open Kilo Code Settings:** Click the gear icon (<Codicon name="gear" />) in the Kilo Code panel.
			
 
				+2. **Select Provider:** Choose "ZenMux" from the "API Provider" dropdown.
			
 
				+3. **Enter API Key:** Paste your ZenMux API key into the "ZenMux API Key" field.
			
 
				+4. **Select Model:** Choose your desired model from the "Model" dropdown.
			
 
				+5. **(Optional) Custom Base URL:** If you need to use a custom base URL for the ZenMux API, check "Use custom base URL" and enter the URL. Leave this blank for most users.
			
 
				+
			
 
				+## Supported Models
			
 
				+
			
 
				+ZenMux supports a wide range of models from various providers:
			
 
				+
			
 
				+Visi [zenmux.ai/models](https://zenmux.ai/models) to see the complete list of available models.
			
 
				+
			
 
				+### Other Providers
			
 
				+
			
 
				+ZenMux also supports models from Meta, Mistral, and many other providers. Check your ZenMux dashboard for the complete list of available models.
			
 
				+
			
 
				+## API Compatibility
			
 
				+
			
 
				+ZenMux provides multiple API endpoints for different protocols:
			
 
				+
			
 
				+### OpenAI Compatible API
			
 
				+
			
 
				+Use the standard OpenAI SDK with ZenMux's base URL:
			
 
				+
			
 
				+```javascript
			
 
				+import OpenAI from "openai"
			
 
				+
			
 
				+const openai = new OpenAI({
			
 
				+	baseURL: "https://zenmux.ai/api/v1",
			
 
				+	apiKey: "<ZENMUX_API_KEY>",
			
 
				+})
			
 
				+
			
 
				+async function main() {
			
 
				+	const completion = await openai.chat.completions.create({
			
 
				+		model: "openai/gpt-5",
			
 
				+		messages: [
			
 
				+			{
			
 
				+				role: "user",
			
 
				+				content: "What is the meaning of life?",
			
 
				+			},
			
 
				+		],
			
 
				+	})
			
 
				+
			
 
				+	console.log(completion.choices[0].message)
			
 
				+}
			
 
				+
			
 
				+main()
			
 
				+```
			
 
				+
			
 
				+### Anthropic API
			
 
				+
			
 
				+For Anthropic models, use the dedicated endpoint:
			
 
				+
			
 
				+```typescript
			
 
				+import Anthropic from "@anthropic-ai/sdk"
			
 
				+
			
 
				+// 1. Initialize the Anthropic client
			
 
				+const anthropic = new Anthropic({
			
 
				+	// 2. Replace with the API key from your ZenMux console
			
 
				+	apiKey: "<YOUR ZENMUX_API_KEY>",
			
 
				+	// 3. Point the base URL to the ZenMux endpoint
			
 
				+	baseURL: "https://zenmux.ai/api/anthropic",
			
 
				+})
			
 
				+
			
 
				+async function main() {
			
 
				+	const msg = await anthropic.messages.create({
			
 
				+		model: "anthropic/claude-sonnet-4.5",
			
 
				+		max_tokens: 1024,
			
 
				+		messages: [{ role: "user", content: "Hello, Claude" }],
			
 
				+	})
			
 
				+	console.log(msg)
			
 
				+}
			
 
				+
			
 
				+main()
			
 
				+```
			
 
				+
			
 
				+### Platform API
			
 
				+
			
 
				+The Get generation interface is used to query generation information, such as usage and costs.
			
 
				+
			
 
				+```bash
			
 
				+curl https://zenmux.ai/api/v1/generation?id=<generation_id> \
			
 
				+  -H "Authorization: Bearer $ZENMUX_API_KEY"
			
 
				+```
			
 
				+
			
 
				+### Google Vertex AI API
			
 
				+
			
 
				+For Google models:
			
 
				+
			
 
				+```typescript
			
 
				+const genai = require("@google/genai")
			
 
				+
			
 
				+const client = new genai.GoogleGenAI({
			
 
				+	apiKey: "$ZENMUX_API_KEY",
			
 
				+	vertexai: true,
			
 
				+	httpOptions: {
			
 
				+		baseUrl: "https://zenmux.ai/api/vertex-ai",
			
 
				+		apiVersion: "v1",
			
 
				+	},
			
 
				+})
			
 
				+
			
 
				+const response = await client.models.generateContent({
			
 
				+	model: "google/gemini-2.5-pro",
			
 
				+	contents: "How does AI work?",
			
 
				+})
			
 
				+console.log(response)
			
 
				+```
			
 
				+
			
 
				+## Features
			
 
				+
			
 
				+### Automatic Routing
			
 
				+
			
 
				+ZenMux automatically routes your requests to the best available provider based on:
			
 
				+
			
 
				+- Model availability
			
 
				+- Response time
			
 
				+- Cost optimization
			
 
				+- Provider health status
			
 
				+
			
 
				+### Fallback Support
			
 
				+
			
 
				+If a provider is unavailable, ZenMux automatically falls back to alternative providers that support the same model capabilities.
			
 
				+
			
 
				+### Cost Optimization
			
 
				+
			
 
				+ZenMux can be configured to optimize for cost, routing requests to the most cost-effective provider while maintaining quality.
			
 
				+
			
 
				+### Zero Data Retention (ZDR)
			
 
				+
			
 
				+Enable ZDR mode to ensure that no request or response data is stored by ZenMux, providing maximum privacy for sensitive applications.
			
 
				+
			
 
				+## Advanced Configuration
			
 
				+
			
 
				+### Provider Routing
			
 
				+
			
 
				+You can specify routing preferences:
			
 
				+
			
 
				+- **Price**: Route to the lowest cost provider
			
 
				+- **Throughput**: Route to the provider with highest tokens/second
			
 
				+- **Latency**: Route to the provider with fastest response time
			
 
				+
			
 
				+### Data Collection Settings
			
 
				+
			
 
				+Control how ZenMux handles your data:
			
 
				+
			
 
				+- **Allow**: Allow data collection for service improvement
			
 
				+- **Deny**: Disable all data collection
			
 
				+
			
 
				+### Middle-Out Transform
			
 
				+
			
 
				+Enable the middle-out transform feature to optimize prompts that exceed model context limits.
			
 
				+
			
 
				+## Troubleshooting
			
 
				+
			
 
				+### API Key Issues
			
 
				+
			
 
				+- Ensure your API key is correctly copied without any extra spaces
			
 
				+- Check that your ZenMux account is active and has available credits
			
 
				+- Verify the API key has the necessary permissions
			
 
				+
			
 
				+### Model Availability
			
 
				+
			
 
				+- Some models may have regional restrictions
			
 
				+- Check the ZenMux dashboard for current model availability
			
 
				+- Ensure your account tier has access to the desired models
			
 
				+
			
 
				+### Connection Issues
			
 
				+
			
 
				+- Verify your internet connection
			
 
				+- Check if you're behind a firewall that might block API requests
			
 
				+- Try using a custom base URL if the default endpoint is blocked
			
 
				+
			
 
				+## Support
			
 
				+
			
 
				+For additional support:
			
 
				+
			
 
				+- Visit the [ZenMux documentation](https://zenmux.ai/docs)
			
 
				+- Contact ZenMux support through their dashboard
			
 
				+- Check the [Kilo Code GitHub repository](https://github.com/kilocode/kilocode) for integration-specific issues
			
--- a/apps/kilocode-docs/pages/collaborate/enterprise/sso.md
+++ b/apps/kilocode-docs/pages/collaborate/enterprise/sso.md
@@ -5,7 +5,11 @@ description: "Configure Single Sign-On for your organization"
 
				 
			
 
				 # SSO
			
 
				 
			
 
				-Kilo Enterprise lets your organization securely manage access using **Single Sign-On (SSO)**. With SSO enabled, team members can sign in to Kilo using your company’s existing identity provider, such as Okta, Github, Google Workspace, etc.
			
 
				+Kilo Enterprise lets your organization securely manage access using **Single Sign-On (SSO)**. With SSO enabled, team members can sign in to Kilo using your company's existing identity provider, such as Okta, Github, Google Workspace, etc.
			
 
				+
			
 
				+{% callout type="warning" %}
			
 
				+**IDP-initiated logins are not currently supported.** Users must navigate to the [Kilo Web App](https://app.kilo.ai) to log in. Logging in directly from your identity provider's dashboard is not supported at this time.
			
 
				+{% /callout %}
			
 
				 
			
 
				 ## Why Enable SSO?
			
 
				 
			
--- a/apps/kilocode-docs/pages/collaborate/teams/about-plans.md
+++ b/apps/kilocode-docs/pages/collaborate/teams/about-plans.md
@@ -40,4 +40,4 @@ No credits are included with a Teams or Enterprise plan purchase.
 
				 - **SLA commitments** for support issues
			
 
				 - **Dedicated support channels** for private, direct communication
			
 
				 
			
 
				-**Cost:** $150 per user per month
			
 
				+**Cost:** [Contact Sales](https://kilo.ai/contact-sales)
			
--- a/apps/kilocode-docs/pages/customize/agents-md.md
+++ b/apps/kilocode-docs/pages/customize/agents-md.md
@@ -8,7 +8,7 @@ description: "Project-level configuration with agents.md files"
 
				 AGENTS.md files provide a standardized way to configure AI agent behavior across different AI coding tools. They allow you to define project-specific instructions, coding standards, and guidelines that AI agents should follow when working with your codebase.
			
 
				 
			
 
				 {% callout type="note" title="Memory Bank Deprecation" %}
			
 
				-The Kilo Code **memory bank** feature has been deprecated in favor of AGENTS.md. 
			
 
				+The Kilo Code **memory bank** feature has been deprecated in favor of AGENTS.md.
			
 
				 
			
 
				 **Existing memory bank rules will continue to work.**
			
 
				 
			
@@ -16,7 +16,7 @@ If you'd like to migrate your memory bank content to AGENTS.md:
 
				 
			
 
				 1. Examine the contents in `.kilocode/rules/memory-bank/`
			
 
				 2. Move that content into your project's `AGENTS.md` file (or ask Kilo to do it for you)
			
 
				-{% /callout %}
			
 
				+   {% /callout %}
			
 
				 
			
 
				 ## What is AGENTS.md?
			
 
				 
			
--- a/cli/src/config/mapper.ts
+++ b/cli/src/config/mapper.ts
@@ -106,6 +106,8 @@ export function getModelIdForProvider(provider: ProviderConfig): string {
 
				 			return provider.apiModelId || ""
			
 
				 		case "openrouter":
			
 
				 			return provider.openRouterModelId || ""
			
 
				+		case "zenmux":
			
 
				+			return provider.zenmuxModelId || ""
			
 
				 		case "ollama":
			
 
				 			return provider.ollamaModelId || ""
			
 
				 		case "lmstudio":
			
--- a/cli/src/constants/providers/labels.ts
+++ b/cli/src/constants/providers/labels.ts
@@ -11,6 +11,7 @@ export const PROVIDER_LABELS: Record<ProviderName, string> = {
 
				 	"openai-codex": "OpenAI - ChatGPT Plus/Pro",
			
 
				 	"openai-responses": "OpenAI Compatible (Responses)",
			
 
				 	openrouter: "OpenRouter",
			
 
				+	zenmux: "ZenMux",
			
 
				 	bedrock: "Amazon Bedrock",
			
 
				 	gemini: "Google Gemini",
			
 
				 	vertex: "GCP Vertex AI",
			
@@ -51,6 +52,7 @@ export const PROVIDER_LABELS: Record<ProviderName, string> = {
 
				 	synthetic: "Synthetic",
			
 
				 	"sap-ai-core": "SAP AI Core",
			
 
				 	baseten: "BaseTen",
			
 
				+	corethink: "Corethink"
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/cli/src/constants/providers/models.ts
+++ b/cli/src/constants/providers/models.ts
@@ -46,6 +46,7 @@ import {
 
				 	minimaxModels,
			
 
				 	minimaxDefaultModelId,
			
 
				 	ovhCloudAiEndpointsDefaultModelId,
			
 
				+	zenmuxDefaultModelId,
			
 
				 } from "@roo-code/types"
			
 
				 
			
 
				 /**
			
@@ -64,6 +65,7 @@ export type RouterName =
 
				 	| "deepinfra"
			
 
				 	| "vercel-ai-gateway"
			
 
				 	| "ovhcloud"
			
 
				+	| "zenmux"
			
 
				 	| "nano-gpt"
			
 
				 
			
 
				 /**
			
@@ -120,6 +122,7 @@ export type RouterModels = Record<RouterName, ModelRecord>
 
				 export const PROVIDER_TO_ROUTER_NAME: Record<ProviderName, RouterName | null> = {
			
 
				 	kilocode: "kilocode",
			
 
				 	openrouter: "openrouter",
			
 
				+	zenmux: "zenmux", // kilocode_change
			
 
				 	ollama: "ollama",
			
 
				 	lmstudio: "lmstudio",
			
 
				 	litellm: "litellm",
			
@@ -165,6 +168,7 @@ export const PROVIDER_TO_ROUTER_NAME: Record<ProviderName, RouterName | null> =
 
				 	synthetic: null,
			
 
				 	"sap-ai-core": null,
			
 
				 	baseten: null,
			
 
				+	corethink: null,
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -173,6 +177,7 @@ export const PROVIDER_TO_ROUTER_NAME: Record<ProviderName, RouterName | null> =
 
				 export const PROVIDER_MODEL_FIELD: Record<ProviderName, string | null> = {
			
 
				 	kilocode: "kilocodeModel",
			
 
				 	openrouter: "openRouterModelId",
			
 
				+	zenmux: "zenmuxModelId", // kilocode_change
			
 
				 	ollama: "ollamaModelId",
			
 
				 	lmstudio: "lmStudioModelId",
			
 
				 	litellm: "litellmModelId",
			
@@ -218,6 +223,7 @@ export const PROVIDER_MODEL_FIELD: Record<ProviderName, string | null> = {
 
				 	synthetic: null,
			
 
				 	"sap-ai-core": "sapAiCoreModelId",
			
 
				 	baseten: null,
			
 
				+	corethink: null,
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -283,6 +289,7 @@ export const DEFAULT_MODEL_IDS: Partial<Record<ProviderName, string>> = {
 
				 	zai: internationalZAiDefaultModelId,
			
 
				 	roo: rooDefaultModelId,
			
 
				 	ovhcloud: ovhCloudAiEndpointsDefaultModelId,
			
 
				+	zenmux: zenmuxDefaultModelId,
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -458,6 +465,8 @@ export function getModelIdKey(provider: ProviderName): string {
 
				 			return "vercelAiGatewayModelId"
			
 
				 		case "ovhcloud":
			
 
				 			return "ovhCloudAiEndpointsModelId"
			
 
				+		case "zenmux":
			
 
				+			return "zenmuxModelId"
			
 
				 		case "nano-gpt":
			
 
				 			return "nanoGptModelId"
			
 
				 		default:
			
--- a/cli/src/constants/providers/settings.ts
+++ b/cli/src/constants/providers/settings.ts
@@ -91,6 +91,25 @@ export const FIELD_REGISTRY: Record<string, FieldMetadata> = {
 
				 		placeholder: "Enter base URL (or leave empty for default)...",
			
 
				 		isOptional: true,
			
 
				 	},
			
 
				+
			
 
				+	// kilocode_change start - ZenMux fields
			
 
				+	zenmuxApiKey: {
			
 
				+		label: "API Key",
			
 
				+		type: "password",
			
 
				+		placeholder: "Enter ZenMux API key...",
			
 
				+	},
			
 
				+	zenmuxModelId: {
			
 
				+		label: "Model",
			
 
				+		type: "text",
			
 
				+		placeholder: "Enter model name...",
			
 
				+	},
			
 
				+	zenmuxBaseUrl: {
			
 
				+		label: "Base URL",
			
 
				+		type: "text",
			
 
				+		placeholder: "Enter base URL (or leave empty for default)...",
			
 
				+		isOptional: true,
			
 
				+	},
			
 
				+	// kilocode_change end
			
 
				 	openRouterProviderDataCollection: {
			
 
				 		label: "Provider Data Collection",
			
 
				 		type: "select",
			
@@ -791,6 +810,13 @@ export const getProviderSettings = (provider: ProviderName, config: ProviderSett
 
				 				createFieldConfig("openRouterBaseUrl", config, "Default"),
			
 
				 			]
			
 
				 
			
 
				+		case "zenmux": // kilocode_change
			
 
				+			return [
			
 
				+				createFieldConfig("zenmuxApiKey", config),
			
 
				+				createFieldConfig("zenmuxModelId", config, "openai/gpt-5"),
			
 
				+				createFieldConfig("zenmuxBaseUrl", config, "Default"),
			
 
				+			]
			
 
				+
			
 
				 		case "openai-native":
			
 
				 			return [
			
 
				 				createFieldConfig("openAiNativeApiKey", config),
			
@@ -1043,6 +1069,7 @@ export const PROVIDER_DEFAULT_MODELS: Record<ProviderName, string> = {
 
				 	"openai-codex": "gpt-4o",
			
 
				 	"openai-responses": "gpt-4o",
			
 
				 	openrouter: "anthropic/claude-3-5-sonnet",
			
 
				+	zenmux: "openai/gpt-5", // kilocode_change
			
 
				 	bedrock: "anthropic.claude-3-5-sonnet-20241022-v2:0",
			
 
				 	gemini: "gemini-1.5-pro-latest",
			
 
				 	vertex: "claude-3-5-sonnet@20241022",
			
@@ -1083,6 +1110,7 @@ export const PROVIDER_DEFAULT_MODELS: Record<ProviderName, string> = {
 
				 	synthetic: "synthetic-model",
			
 
				 	"sap-ai-core": "gpt-4o",
			
 
				 	baseten: "zai-org/GLM-4.6",
			
 
				+	corethink: "corethink"
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/cli/src/constants/providers/validation.ts
+++ b/cli/src/constants/providers/validation.ts
@@ -10,6 +10,7 @@ export const PROVIDER_REQUIRED_FIELDS: Record<ProviderName, string[]> = {
 
				 	"openai-native": ["openAiNativeApiKey", "apiModelId"],
			
 
				 	"openai-codex": ["apiModelId"],
			
 
				 	openrouter: ["openRouterApiKey", "openRouterModelId"],
			
 
				+	zenmux: ["zenmuxApiKey", "zenmuxModelId"], // kilocode_change
			
 
				 	ollama: ["ollamaBaseUrl", "ollamaModelId"],
			
 
				 	lmstudio: ["lmStudioBaseUrl", "lmStudioModelId"],
			
 
				 	bedrock: ["awsRegion", "apiModelId"], // Auth fields handled in handleSpecialValidations (supports API key, profile, or direct credentials)
			
@@ -52,4 +53,5 @@ export const PROVIDER_REQUIRED_FIELDS: Record<ProviderName, string[]> = {
 
				 	"virtual-quota-fallback": [], // Has array validation
			
 
				 	minimax: ["minimaxBaseUrl", "minimaxApiKey", "apiModelId"],
			
 
				 	baseten: ["basetenApiKey", "apiModelId"],
			
 
				+	corethink: ["corethinkApiKey", "corethinkModelId"]
			
 
				 }
			
--- a/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/commands/SetContextCommands.kt
+++ b/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/commands/SetContextCommands.kt
@@ -9,7 +9,7 @@ import com.intellij.openapi.project.Project
 
				  * 
			
 
				  * The setContext command allows the extension to set context values that can be used
			
 
				  * to control UI state and feature availability. This is commonly used by features like
			
 
				- * the GhostProvider (autocomplete) to enable/disable keybindings dynamically.
			
 
				+ * the AutocompleteProvider (autocomplete) to enable/disable keybindings dynamically.
			
 
				  * 
			
 
				  * @param project The current IntelliJ project
			
 
				  * @param registry The command registry to register commands with
			
@@ -47,9 +47,9 @@ fun registerSetContextCommands(project: Project, registry: CommandRegistry) {
 
				  * setting context key-value pairs that can be used throughout the plugin to control
			
 
				  * feature availability and UI state.
			
 
				  * 
			
 
				- * Example context keys used by GhostProvider:
			
 
				- * - kilocode.ghost.enableQuickInlineTaskKeybinding
			
 
				- * - kilocode.ghost.enableSmartInlineTaskKeybinding
			
 
				+ * Example context keys used by AutocompleteProvider:
			
 
				+ * - kilocode.autocomplete.enableQuickInlineTaskKeybinding
			
 
				+ * - kilocode.autocomplete.enableSmartInlineTaskKeybinding
			
 
				  */
			
 
				 class SetContextCommands(val project: Project) {
			
 
				     private val logger = Logger.getInstance(SetContextCommands::class.java)
			
@@ -61,7 +61,7 @@ class SetContextCommands(val project: Project) {
 
				      * This method is called when the setContext command is executed from the extension.
			
 
				      * It stores the key-value pair in the ContextManager for later retrieval.
			
 
				      * 
			
 
				-     * @param key The context key to set (e.g., "kilocode.ghost.enableQuickInlineTaskKeybinding")
			
 
				+     * @param key The context key to set (e.g., "kilocode.autocomplete.enableQuickInlineTaskKeybinding")
			
 
				      * @param value The value to set (typically Boolean, but can be String, Number, etc.)
			
 
				      * @return null (void return type)
			
 
				      */
			
--- a/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/core/ContextManager.kt
+++ b/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/core/ContextManager.kt
@@ -17,8 +17,8 @@ import java.util.concurrent.ConcurrentHashMap
 
				  * Example usage:
			
 
				  * ```kotlin
			
 
				  * val contextManager = project.getService(ContextManager::class.java)
			
 
				- * contextManager.setContext("kilocode.ghost.enableQuickInlineTaskKeybinding", true)
			
 
				- * val value = contextManager.getContext("kilocode.ghost.enableQuickInlineTaskKeybinding")
			
 
				+ * contextManager.setContext("kilocode.autocomplete.enableQuickInlineTaskKeybinding", true)
			
 
				+ * val value = contextManager.getContext("kilocode.autocomplete.enableQuickInlineTaskKeybinding")
			
 
				  * ```
			
 
				  */
			
 
				 @Service(Service.Level.PROJECT)
			
@@ -34,7 +34,7 @@ class ContextManager {
 
				      * Sets a context value for the given key.
			
 
				      * If the value is null, the context key will be removed.
			
 
				      * 
			
 
				-     * @param key The context key (e.g., "kilocode.ghost.enableQuickInlineTaskKeybinding")
			
 
				+     * @param key The context key (e.g., "kilocode.autocomplete.enableQuickInlineTaskKeybinding")
			
 
				      * @param value The value to set (can be Boolean, String, Number, or any serializable type)
			
 
				      */
			
 
				     fun setContext(key: String, value: Any?) {
			
--- a/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/InlineCompletionConstants.kt
+++ b/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/InlineCompletionConstants.kt
@@ -11,9 +11,9 @@ object InlineCompletionConstants {
 
				 
			
 
				     /**
			
 
				      * Command ID registered in the VSCode extension for tracking acceptance events.
			
 
				-     * This matches the command registered in GhostInlineCompletionProvider.
			
 
				+     * This matches the command registered in AutocompleteInlineCompletionProvider.
			
 
				      */
			
 
				-    const val INLINE_COMPLETION_ACCEPTED_COMMAND = "kilocode.ghost.inline-completion.accepted"
			
 
				+    const val INLINE_COMPLETION_ACCEPTED_COMMAND = "kilocode.autocomplete.inline-completion.accepted"
			
 
				 
			
 
				     /**
			
 
				      * Default timeout in milliseconds for inline completion requests.
			
--- a/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/InlineCompletionService.kt
+++ b/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/InlineCompletionService.kt
@@ -16,7 +16,7 @@ import java.util.concurrent.atomic.AtomicReference
 
				 
			
 
				 /**
			
 
				  * Service responsible for getting inline completions via RPC communication
			
 
				- * with the VSCode extension's Ghost service. Encapsulates all RPC logic,
			
 
				+ * with the VSCode extension's Autocomplete service. Encapsulates all RPC logic,
			
 
				  * error handling, and result processing for inline completion generation.
			
 
				  */
			
 
				 class InlineCompletionService {
			
--- a/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/KiloCodeInlineCompletionProvider.kt
+++ b/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/KiloCodeInlineCompletionProvider.kt
@@ -12,12 +12,12 @@ import com.intellij.openapi.fileEditor.FileDocumentManager
 
				 import com.intellij.openapi.project.Project
			
 
				 
			
 
				 /**
			
 
				- * IntelliJ inline completion provider that bridges to VSCode extension's Ghost service.
			
 
				+ * IntelliJ inline completion provider that bridges to VSCode extension's Autocomplete service.
			
 
				  * This provider uses the new InlineCompletionService which sends full file content
			
 
				- * to the Ghost service via RPC for accurate completions.
			
 
				+ * to the Autocomplete service via RPC for accurate completions.
			
 
				  *
			
 
				  * The provider handles triggering and rendering, while all AI logic (debouncing,
			
 
				- * caching, context gathering, and telemetry) is handled by the Ghost service.
			
 
				+ * caching, context gathering, and telemetry) is handled by the Autocomplete service.
			
 
				  */
			
 
				 class KiloCodeInlineCompletionProvider(
			
 
				     private val handle: Int,
			
@@ -42,7 +42,7 @@ class KiloCodeInlineCompletionProvider(
 
				     override val id: InlineCompletionProviderID = InlineCompletionProviderID("kilocode-inline-completion-$extensionId-$handle")
			
 
				 
			
 
				     /**
			
 
				-     * Gets inline completion suggestions using the Ghost service.
			
 
				+     * Gets inline completion suggestions using the Autocomplete service.
			
 
				      * Sends full file content to ensure accurate completions.
			
 
				      */
			
 
				     override suspend fun getSuggestion(request: InlineCompletionRequest): InlineCompletionSingleSuggestion {
			
--- a/jetbrains/plugin/src/test/kotlin/ai/kilocode/jetbrains/util/ReflectUtilsStatusBarTest.kt
+++ b/jetbrains/plugin/src/test/kotlin/ai/kilocode/jetbrains/util/ReflectUtilsStatusBarTest.kt
@@ -107,10 +107,10 @@ class ReflectUtilsStatusBarTest {
 
				 
			
 
				         val args = listOf(
			
 
				             1.0,                    // id
			
 
				-            "ghost-extension",      // extensionId
			
 
				-            "ghost-status",         // entryId
			
 
				-            "Ghost Status",         // name
			
 
				-            "Ghost (5)",            // text
			
 
				+            "autocomplete-extension",      // extensionId
			
 
				+            "autocomplete-status",         // entryId
			
 
				+            "Autocomplete Status",         // name
			
 
				+            "Autocomplete (5)",            // text
			
 
				             markdownTooltip,        // tooltip - MarkdownString object
			
 
				             false,                  // showProgress
			
 
				             null,                   // command
			
--- a/jetbrains/plugin/turbo.json
+++ b/jetbrains/plugin/turbo.json
@@ -22,11 +22,11 @@
 
				 		},
			
 
				 		"copy:kilocode": {
			
 
				 			"cache": false,
			
 
				-			"dependsOn": ["clean:kilocode", "kilo-code#vsix:unpacked"]
			
 
				+			"dependsOn": ["clean:kilocode", "kilo-code#vsix", "kilo-code#vsix:unpacked"]
			
 
				 		},
			
 
				 		"copy:resource-kilocode": {
			
 
				 			"cache": false,
			
 
				-			"dependsOn": ["clean:resource-kilocode", "kilo-code#vsix:unpacked"]
			
 
				+			"dependsOn": ["clean:resource-kilocode", "kilo-code#vsix", "kilo-code#vsix:unpacked"]
			
 
				 		},
			
 
				 		"copy:resource-host": {
			
 
				 			"cache": false,
			
--- a/package.json
+++ b/package.json
@@ -83,6 +83,9 @@
 
				 			"glob": ">=11.1.0",
			
 
				 			"@types/react": "^18.3.23",
			
 
				 			"@types/react-dom": "^18.3.5",
			
 
				+			"qs": "^6.14.1",
			
 
				+			"body-parser": "^2.2.2",
			
 
				+			"preact": "^10.28.2",
			
 
				 			"zod": "3.25.76",
			
 
				 			"@sap-ai-sdk/prompt-registry>zod": "^4.0.0"
			
 
				 		}
			
--- a/packages/core-schemas/src/config/provider.ts
+++ b/packages/core-schemas/src/config/provider.ts
@@ -88,6 +88,21 @@ export const openRouterProviderSchema = baseProviderSchema.extend({
 
				 	openRouterZdr: z.boolean().optional(),
			
 
				 })
			
 
				 
			
 
				+// kilocode_change start
			
 
				+// ZenMux provider
			
 
				+export const zenmuxProviderSchema = baseProviderSchema.extend({
			
 
				+	provider: z.literal("zenmux"),
			
 
				+	zenmuxModelId: z.string().optional(),
			
 
				+	zenmuxApiKey: z.string().optional(),
			
 
				+	zenmuxBaseUrl: z.string().optional(),
			
 
				+	zenmuxSpecificProvider: z.string().optional(),
			
 
				+	zenmuxUseMiddleOutTransform: z.boolean().optional(),
			
 
				+	zenmuxProviderDataCollection: z.enum(["allow", "deny"]).optional(),
			
 
				+	zenmuxProviderSort: z.enum(["price", "throughput", "latency"]).optional(),
			
 
				+	zenmuxZdr: z.boolean().optional(),
			
 
				+})
			
 
				+// kilocode_change end
			
 
				+
			
 
				 // Ollama provider
			
 
				 export const ollamaProviderSchema = baseProviderSchema.extend({
			
 
				 	provider: z.literal("ollama"),
			
@@ -407,6 +422,7 @@ export const providerConfigSchema = z.discriminatedUnion("provider", [
 
				 	openAIProviderSchema,
			
 
				 	openAIResponsesProviderSchema, // kilocode_change
			
 
				 	openRouterProviderSchema,
			
 
				+	zenmuxProviderSchema, // kilocode_change
			
 
				 	ollamaProviderSchema,
			
 
				 	lmStudioProviderSchema,
			
 
				 	glamaProviderSchema,
			
@@ -453,6 +469,7 @@ export type OpenAICodexProviderConfig = z.infer<typeof openAICodexProviderSchema
 
				 export type OpenAIProviderConfig = z.infer<typeof openAIProviderSchema>
			
 
				 export type OpenAIResponsesProviderConfig = z.infer<typeof openAIResponsesProviderSchema> // kilocode_change
			
 
				 export type OpenRouterProviderConfig = z.infer<typeof openRouterProviderSchema>
			
 
				+export type ZenmuxProviderConfig = z.infer<typeof zenmuxProviderSchema> // kilocode_change
			
 
				 export type OllamaProviderConfig = z.infer<typeof ollamaProviderSchema>
			
 
				 export type LMStudioProviderConfig = z.infer<typeof lmStudioProviderSchema>
			
 
				 export type GlamaProviderConfig = z.infer<typeof glamaProviderSchema>
			
--- a/packages/core/src/message-utils/consolidateTokenUsage.ts
+++ b/packages/core/src/message-utils/consolidateTokenUsage.ts
@@ -99,7 +99,6 @@ export function consolidateTokenUsage(messages: ClineMessage[]): TokenUsage {
 
				 	// with only apiProtocol (no token data). We need to skip these placeholders and
			
 
				 	// find the last message with actual token data to avoid showing 0% context.
			
 
				 	result.contextTokens = 0
			
 
				-	let foundValidTokenData = false
			
 
				 
			
 
				 	for (let i = messages.length - 1; i >= 0; i--) {
			
 
				 		const message = messages[i]
			
@@ -112,21 +111,14 @@ export function consolidateTokenUsage(messages: ClineMessage[]): TokenUsage {
 
				 				const hasTokenData = typeof tokensIn === "number" || typeof tokensOut === "number"
			
 
				 
			
 
				 				if (hasTokenData) {
			
 
				-					// Since tokensIn now stores TOTAL input tokens (including cache tokens),
			
 
				-					// we no longer need to add cacheWrites and cacheReads separately.
			
 
				-					// This applies to both Anthropic and OpenAI protocols.
			
 
				 					result.contextTokens = (tokensIn || 0) + (tokensOut || 0)
			
 
				-					foundValidTokenData = true
			
 
				+					break
			
 
				 				}
			
 
				 			} catch {
			
 
				-				// Ignore JSON parse errors
			
 
				 				continue
			
 
				 			}
			
 
				 		} else if (message.type === "say" && message.say === "condense_context") {
			
 
				 			result.contextTokens = message.contextCondense?.newContextTokens ?? 0
			
 
				-			foundValidTokenData = true
			
 
				-		}
			
 
				-		if (foundValidTokenData) {
			
 
				 			break
			
 
				 		}
			
 
				 	}
			
--- a/packages/types/src/__tests__/kilocode.test.ts
+++ b/packages/types/src/__tests__/kilocode.test.ts
@@ -2,16 +2,16 @@
 
				 
			
 
				 import { describe, it, expect, vi, afterEach } from "vitest"
			
 
				 import {
			
 
				-	ghostServiceSettingsSchema,
			
 
				+	autocompleteServiceSettingsSchema,
			
 
				 	getAppUrl,
			
 
				 	getApiUrl,
			
 
				 	getKiloUrlFromToken,
			
 
				 	getExtensionConfigUrl,
			
 
				 } from "../kilocode/kilocode.js"
			
 
				 
			
 
				-describe("ghostServiceSettingsSchema", () => {
			
 
				+describe("autocompleteServiceSettingsSchema", () => {
			
 
				 	it("should accept all boolean settings", () => {
			
 
				-		const result = ghostServiceSettingsSchema.safeParse({
			
 
				+		const result = autocompleteServiceSettingsSchema.safeParse({
			
 
				 			enableAutoTrigger: true,
			
 
				 			enableQuickInlineTaskKeybinding: false,
			
 
				 			enableSmartInlineTaskKeybinding: true,
			
@@ -20,7 +20,7 @@ describe("ghostServiceSettingsSchema", () => {
 
				 	})
			
 
				 
			
 
				 	it("should accept combined settings", () => {
			
 
				-		const result = ghostServiceSettingsSchema.safeParse({
			
 
				+		const result = autocompleteServiceSettingsSchema.safeParse({
			
 
				 			enableAutoTrigger: true,
			
 
				 			enableQuickInlineTaskKeybinding: true,
			
 
				 			enableSmartInlineTaskKeybinding: true,
			
@@ -29,7 +29,7 @@ describe("ghostServiceSettingsSchema", () => {
 
				 	})
			
 
				 
			
 
				 	it("should be optional", () => {
			
 
				-		const result = ghostServiceSettingsSchema.safeParse({
			
 
				+		const result = autocompleteServiceSettingsSchema.safeParse({
			
 
				 			enableAutoTrigger: true,
			
 
				 		})
			
 
				 		expect(result.success).toBe(true)
			
--- a/packages/types/src/global-settings.ts
+++ b/packages/types/src/global-settings.ts
@@ -14,7 +14,7 @@ import { telemetrySettingsSchema } from "./telemetry.js"
 
				 import { modeConfigSchema } from "./mode.js"
			
 
				 import { customModePromptsSchema, customSupportPromptsSchema } from "./mode.js"
			
 
				 import { languagesSchema } from "./vscode.js"
			
 
				-import { fastApplyModelSchema, ghostServiceSettingsSchema, fastApplyApiProviderSchema } from "./kilocode/kilocode.js"
			
 
				+import { fastApplyModelSchema, autocompleteServiceSettingsSchema, fastApplyApiProviderSchema } from "./kilocode/kilocode.js"
			
 
				 
			
 
				 /**
			
 
				  * Default delay in milliseconds after writes to allow diagnostics to detect potential problems.
			
@@ -227,7 +227,7 @@ export const globalSettingsSchema = z.object({
 
				 	dismissedNotificationIds: z.string().array().optional(), // kilocode_change
			
 
				 	commitMessageApiConfigId: z.string().optional(), // kilocode_change
			
 
				 	terminalCommandApiConfigId: z.string().optional(), // kilocode_change
			
 
				-	ghostServiceSettings: ghostServiceSettingsSchema, // kilocode_change
			
 
				+	ghostServiceSettings: autocompleteServiceSettingsSchema, // kilocode_change
			
 
				 	hasPerformedOrganizationAutoSwitch: z.boolean().optional(), // kilocode_change
			
 
				 	includeTaskHistoryInEnhance: z.boolean().optional(),
			
 
				 	historyPreviewCollapsed: z.boolean().optional(),
			
@@ -310,6 +310,7 @@ export const SECRET_STATE_KEYS = [
 
				 	"vercelAiGatewayApiKey",
			
 
				 	"sapAiCoreServiceKey", // kilocode_change
			
 
				 	"basetenApiKey",
			
 
				+	"corethinkApiKey",
			
 
				 ] as const
			
 
				 
			
 
				 // Global secrets that are part of GlobalSettings (not ProviderSettings)
			
--- a/packages/types/src/kilocode/kilocode.ts
+++ b/packages/types/src/kilocode/kilocode.ts
@@ -6,7 +6,7 @@ declare global {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-export const ghostServiceSettingsSchema = z
			
 
				+export const autocompleteServiceSettingsSchema = z
			
 
				 	.object({
			
 
				 		enableAutoTrigger: z.boolean().optional(),
			
 
				 		enableSmartInlineTaskKeybinding: z.boolean().optional(),
			
@@ -18,7 +18,7 @@ export const ghostServiceSettingsSchema = z
 
				 	})
			
 
				 	.optional()
			
 
				 
			
 
				-export type GhostServiceSettings = z.infer<typeof ghostServiceSettingsSchema>
			
 
				+export type AutocompleteServiceSettings = z.infer<typeof autocompleteServiceSettingsSchema>
			
 
				 
			
 
				 /**
			
 
				  * Map of provider names to their default autocomplete models.
			
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -6,6 +6,7 @@ import { profileTypeSchema } from "./profile-type.js" // kilocode_change
 
				 import {
			
 
				 	anthropicModels,
			
 
				 	basetenModels,
			
 
				+	corethinkModels,
			
 
				 	bedrockModels,
			
 
				 	cerebrasModels,
			
 
				 	claudeCodeModels,
			
@@ -56,6 +57,7 @@ export const dynamicProviders = [
 
				 	"inception",
			
 
				 	"synthetic",
			
 
				 	"sap-ai-core",
			
 
				+	"zenmux",
			
 
				 	// kilocode_change end
			
 
				 	"deepinfra",
			
 
				 	"io-intelligence",
			
@@ -139,6 +141,7 @@ export const providerNames = [
 
				 	"anthropic",
			
 
				 	"bedrock",
			
 
				 	"baseten",
			
 
				+	"corethink",
			
 
				 	"cerebras",
			
 
				 	"claude-code",
			
 
				 	"doubao",
			
@@ -161,6 +164,7 @@ export const providerNames = [
 
				 	"virtual-quota-fallback",
			
 
				 	"synthetic",
			
 
				 	"inception",
			
 
				+	"zenmux",
			
 
				 	// kilocode_change end
			
 
				 	"sambanova",
			
 
				 	"vertex",
			
@@ -248,6 +252,10 @@ const nanoGptSchema = baseProviderSettingsSchema.extend({
 
				 
			
 
				 export const openRouterProviderDataCollectionSchema = z.enum(["allow", "deny"])
			
 
				 export const openRouterProviderSortSchema = z.enum(["price", "throughput", "latency"])
			
 
				+
			
 
				+// ZenMux provider schemas - kilocode_change
			
 
				+export const zenmuxProviderDataCollectionSchema = z.enum(["allow", "deny"])
			
 
				+export const zenmuxProviderSortSchema = z.enum(["price", "throughput", "latency"])
			
 
				 // kilocode_change end
			
 
				 
			
 
				 const openRouterSchema = baseProviderSettingsSchema.extend({
			
@@ -262,6 +270,19 @@ const openRouterSchema = baseProviderSettingsSchema.extend({
 
				 	// kilocode_change end
			
 
				 })
			
 
				 
			
 
				+// kilocode_change start
			
 
				+const zenmuxSchema = baseProviderSettingsSchema.extend({
			
 
				+	zenmuxApiKey: z.string().optional(),
			
 
				+	zenmuxModelId: z.string().optional(),
			
 
				+	zenmuxBaseUrl: z.string().optional(),
			
 
				+	zenmuxSpecificProvider: z.string().optional(),
			
 
				+	zenmuxUseMiddleOutTransform: z.boolean().optional(),
			
 
				+	zenmuxProviderDataCollection: zenmuxProviderDataCollectionSchema.optional(),
			
 
				+	zenmuxProviderSort: zenmuxProviderSortSchema.optional(),
			
 
				+	zenmuxZdr: z.boolean().optional(),
			
 
				+})
			
 
				+// kilocode_change end
			
 
				+
			
 
				 const bedrockSchema = apiModelIdProviderModelSchema.extend({
			
 
				 	awsAccessKey: z.string().optional(),
			
 
				 	awsSecretKey: z.string().optional(),
			
@@ -555,6 +576,10 @@ const basetenSchema = apiModelIdProviderModelSchema.extend({
 
				 	basetenApiKey: z.string().optional(),
			
 
				 })
			
 
				 
			
 
				+const corethinkSchema = apiModelIdProviderModelSchema.extend({
			
 
				+	corethinkApiKey: z.string().optional(),
			
 
				+})
			
 
				+
			
 
				 const defaultSchema = z.object({
			
 
				 	apiProvider: z.undefined(),
			
 
				 })
			
@@ -565,6 +590,7 @@ export const providerSettingsSchemaDiscriminated = z.discriminatedUnion("apiProv
 
				 	glamaSchema.merge(z.object({ apiProvider: z.literal("glama") })), // kilocode_change
			
 
				 	nanoGptSchema.merge(z.object({ apiProvider: z.literal("nano-gpt") })), // kilocode_change
			
 
				 	openRouterSchema.merge(z.object({ apiProvider: z.literal("openrouter") })),
			
 
				+	zenmuxSchema.merge(z.object({ apiProvider: z.literal("zenmux") })), // kilocode_change
			
 
				 	bedrockSchema.merge(z.object({ apiProvider: z.literal("bedrock") })),
			
 
				 	vertexSchema.merge(z.object({ apiProvider: z.literal("vertex") })),
			
 
				 	openAiSchema.merge(z.object({ apiProvider: z.literal("openai") })),
			
@@ -595,6 +621,7 @@ export const providerSettingsSchemaDiscriminated = z.discriminatedUnion("apiProv
 
				 	// kilocode_change end
			
 
				 	groqSchema.merge(z.object({ apiProvider: z.literal("groq") })),
			
 
				 	basetenSchema.merge(z.object({ apiProvider: z.literal("baseten") })),
			
 
				+	corethinkSchema.merge(z.object({ apiProvider: z.literal("corethink") })),
			
 
				 	huggingFaceSchema.merge(z.object({ apiProvider: z.literal("huggingface") })),
			
 
				 	chutesSchema.merge(z.object({ apiProvider: z.literal("chutes") })),
			
 
				 	litellmSchema.merge(z.object({ apiProvider: z.literal("litellm") })),
			
@@ -618,6 +645,7 @@ export const providerSettingsSchema = z.object({
 
				 	...glamaSchema.shape, // kilocode_change
			
 
				 	...nanoGptSchema.shape, // kilocode_change
			
 
				 	...openRouterSchema.shape,
			
 
				+	...zenmuxSchema.shape, // kilocode_change
			
 
				 	...bedrockSchema.shape,
			
 
				 	...vertexSchema.shape,
			
 
				 	...openAiSchema.shape,
			
@@ -648,6 +676,7 @@ export const providerSettingsSchema = z.object({
 
				 	...xaiSchema.shape,
			
 
				 	...groqSchema.shape,
			
 
				 	...basetenSchema.shape,
			
 
				+	...corethinkSchema.shape,
			
 
				 	...huggingFaceSchema.shape,
			
 
				 	...chutesSchema.shape,
			
 
				 	...litellmSchema.shape,
			
@@ -686,6 +715,7 @@ export const modelIdKeys = [
 
				 	"glamaModelId", // kilocode_change
			
 
				 	"nanoGptModelId", // kilocode_change
			
 
				 	"openRouterModelId",
			
 
				+	"zenmuxModelId", // kilocode_change
			
 
				 	"openAiModelId",
			
 
				 	"ollamaModelId",
			
 
				 	"lmStudioModelId",
			
@@ -748,9 +778,11 @@ export const modelIdKeysByProvider: Record<TypicalProvider, ModelIdKey> = {
 
				 	ovhcloud: "ovhCloudAiEndpointsModelId",
			
 
				 	inception: "inceptionLabsModelId",
			
 
				 	"sap-ai-core": "sapAiCoreModelId",
			
 
				+	zenmux: "zenmuxModelId", // kilocode_change
			
 
				 	// kilocode_change end
			
 
				 	groq: "apiModelId",
			
 
				 	baseten: "apiModelId",
			
 
				+	corethink: "apiModelId",
			
 
				 	chutes: "apiModelId",
			
 
				 	litellm: "litellmModelId",
			
 
				 	huggingface: "huggingFaceModelId",
			
@@ -904,6 +936,7 @@ export const MODELS_BY_PROVIDER: Record<
 
				 	xai: { id: "xai", label: "xAI (Grok)", models: Object.keys(xaiModels) },
			
 
				 	zai: { id: "zai", label: "Z.ai", models: Object.keys(internationalZAiModels) },
			
 
				 	baseten: { id: "baseten", label: "Baseten", models: Object.keys(basetenModels) },
			
 
				+	corethink: { id: "corethink", label: "Corethink", models: Object.keys(corethinkModels) },
			
 
				 
			
 
				 	// Dynamic providers; models pulled from remote APIs.
			
 
				 	glama: { id: "glama", label: "Glama", models: [] }, // kilocode_change
			
@@ -920,6 +953,7 @@ export const MODELS_BY_PROVIDER: Record<
 
				 	inception: { id: "inception", label: "Inception", models: [] },
			
 
				 	kilocode: { id: "kilocode", label: "Kilocode", models: [] },
			
 
				 	"virtual-quota-fallback": { id: "virtual-quota-fallback", label: "Virtual Quota Fallback", models: [] },
			
 
				+	zenmux: { id: "zenmux", label: "ZenMux", models: [] }, // kilocode_change
			
 
				 	// kilocode_change end
			
 
				 	deepinfra: { id: "deepinfra", label: "DeepInfra", models: [] },
			
 
				 	"vercel-ai-gateway": { id: "vercel-ai-gateway", label: "Vercel AI Gateway", models: [] },
			
--- a/packages/types/src/providers/corethink.ts
+++ b/packages/types/src/providers/corethink.ts
@@ -0,0 +1,20 @@
 
				+import type { ModelInfo } from "../model.js"
			
 
				+
			
 
				+export const corethinkModels = {
			
 
				+	"corethink": {
			
 
				+		maxTokens: 8192,
			
 
				+		contextWindow: 79000,
			
 
				+		supportsImages: true,
			
 
				+		supportsPromptCache: false,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 1.0,
			
 
				+		outputPrice: 1.0,
			
 
				+		cacheWritesPrice: 0,
			
 
				+		cacheReadsPrice: 0,
			
 
				+		description: "Corethink 1 - AI coding assistant powered by Corethink.",
			
 
				+	},
			
 
				+} as const satisfies Record<string, ModelInfo>
			
 
				+
			
 
				+export type CorethinkModelId = keyof typeof corethinkModels
			
 
				+
			
 
				+export const corethinkDefaultModelId = "corethink" satisfies CorethinkModelId
			
--- a/packages/types/src/providers/fireworks.ts
+++ b/packages/types/src/providers/fireworks.ts
@@ -1,18 +1,24 @@
 
				 import type { ModelInfo } from "../model.js"
			
 
				 
			
 
				 export type FireworksModelId =
			
 
				+	| "accounts/fireworks/models/kimi-k2p5"
			
 
				 	| "accounts/fireworks/models/kimi-k2-instruct"
			
 
				 	| "accounts/fireworks/models/kimi-k2-instruct-0905"
			
 
				 	| "accounts/fireworks/models/kimi-k2-thinking"
			
 
				 	| "accounts/fireworks/models/minimax-m2"
			
 
				+	| "accounts/fireworks/models/minimax-m2p1"
			
 
				+	| "accounts/fireworks/models/qwen3-235b-a22b"
			
 
				 	| "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507"
			
 
				 	| "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct"
			
 
				 	| "accounts/fireworks/models/deepseek-r1-0528"
			
 
				 	| "accounts/fireworks/models/deepseek-v3"
			
 
				+	| "accounts/fireworks/models/deepseek-v3-0324"
			
 
				 	| "accounts/fireworks/models/deepseek-v3p1"
			
 
				+	| "accounts/fireworks/models/deepseek-v3p2"
			
 
				 	| "accounts/fireworks/models/glm-4p5"
			
 
				 	| "accounts/fireworks/models/glm-4p5-air"
			
 
				 	| "accounts/fireworks/models/glm-4p6"
			
 
				+	| "accounts/fireworks/models/glm-4p7"
			
 
				 	| "accounts/fireworks/models/gpt-oss-20b"
			
 
				 	| "accounts/fireworks/models/gpt-oss-120b"
			
 
				 
			
@@ -28,7 +34,8 @@ export const fireworksModels = {
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.6,
			
 
				 		outputPrice: 2.5,
			
 
				-		cacheReadsPrice: 0.15,
			
 
				+		cacheReadsPrice: 0.3,
			
 
				+		displayName: "Kimi K2 Instruct 0905",
			
 
				 		description:
			
 
				 			"Kimi K2 model gets a new version update: Agentic coding: more accurate, better generalization across scaffolds. Frontend coding: improved aesthetics and functionalities on web, 3d, and other tasks. Context length: extended from 128k to 256k, providing better long-horizon support.",
			
 
				 	},
			
@@ -41,8 +48,18 @@ export const fireworksModels = {
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.6,
			
 
				 		outputPrice: 2.5,
			
 
				-		description:
			
 
				-			"Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities.",
			
 
				+		deprecated: true,
			
 
				+	},
			
 
				+	"accounts/fireworks/models/kimi-k2p5": {
			
 
				+		maxTokens: 256000,
			
 
				+		contextWindow: 256000,
			
 
				+		supportsImages: true,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.6,
			
 
				+		outputPrice: 3,
			
 
				+		cacheReadsPrice: 0.1,
			
 
				+		displayName: "Kimi K2.5",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/kimi-k2-thinking": {
			
 
				 		maxTokens: 16000,
			
@@ -60,19 +77,41 @@ export const fireworksModels = {
 
				 			"The kimi-k2-thinking model is a general-purpose agentic reasoning model developed by Moonshot AI. Thanks to its strength in deep reasoning and multi-turn tool use, it can solve even the hardest problems.",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/minimax-m2": {
			
 
				-		maxTokens: 4096,
			
 
				-		contextWindow: 204800,
			
 
				+		maxTokens: 192000,
			
 
				+		contextWindow: 192000,
			
 
				 		supportsImages: false,
			
 
				-		supportsPromptCache: false,
			
 
				+		supportsPromptCache: true,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.3,
			
 
				 		outputPrice: 1.2,
			
 
				-		description:
			
 
				-			"MiniMax M2 is a high-performance language model with 204.8K context window, optimized for long-context understanding and generation tasks.",
			
 
				+		cacheReadsPrice: 0.15,
			
 
				+		displayName: "MiniMax-M2",
			
 
				+	},
			
 
				+	"accounts/fireworks/models/minimax-m2p1": {
			
 
				+		maxTokens: 200000,
			
 
				+		contextWindow: 200000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.3,
			
 
				+		outputPrice: 1.2,
			
 
				+		cacheReadsPrice: 0.15,
			
 
				+		displayName: "MiniMax-M2.1",
			
 
				+	},
			
 
				+	"accounts/fireworks/models/qwen3-235b-a22b": {
			
 
				+		maxTokens: 16384,
			
 
				+		contextWindow: 128000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.22,
			
 
				+		outputPrice: 0.88,
			
 
				+		cacheReadsPrice: 0.11,
			
 
				+		displayName: "Qwen3 235B A22B",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/qwen3-235b-a22b-instruct-2507": {
			
 
				-		maxTokens: 32768,
			
 
				+		maxTokens: 256000,
			
 
				 		contextWindow: 256000,
			
 
				 		supportsImages: false,
			
 
				 		supportsPromptCache: false,
			
@@ -81,20 +120,22 @@ export const fireworksModels = {
 
				 		inputPrice: 0.22,
			
 
				 		outputPrice: 0.88,
			
 
				 		description: "Latest Qwen3 thinking model, competitive against the best closed source models in Jul 2025.",
			
 
				+		displayName: "Qwen3 235B A22B Instruct 2507",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/qwen3-coder-480b-a35b-instruct": {
			
 
				-		maxTokens: 32768,
			
 
				-		contextWindow: 256000,
			
 
				+		maxTokens: 256_000,
			
 
				+		contextWindow: 256_000,
			
 
				 		supportsImages: false,
			
 
				-		supportsPromptCache: false,
			
 
				+		supportsPromptCache: true,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.45,
			
 
				 		outputPrice: 1.8,
			
 
				-		description: "Qwen3's most agentic code model to date.",
			
 
				+		cacheReadsPrice: 0.23,
			
 
				+		displayName: "Qwen3 Coder 480B A35B Instruct",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/deepseek-r1-0528": {
			
 
				-		maxTokens: 20480,
			
 
				+		maxTokens: 160000,
			
 
				 		contextWindow: 160000,
			
 
				 		supportsImages: false,
			
 
				 		supportsPromptCache: false,
			
@@ -102,8 +143,7 @@ export const fireworksModels = {
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 3,
			
 
				 		outputPrice: 8,
			
 
				-		description:
			
 
				-			"05/28 updated checkpoint of Deepseek R1. Its overall performance is now approaching that of leading models, such as O3 and Gemini 2.5 Pro. Compared to the previous version, the upgraded model shows significant improvements in handling complex reasoning tasks, and this version also offers a reduced hallucination rate, enhanced support for function calling, and better experience for vibe coding. Note that fine-tuning for this model is only available through contacting fireworks at https://fireworks.ai/company/contact-us.",
			
 
				+		displayName: "DeepSeek R1 0528",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/deepseek-v3": {
			
 
				 		maxTokens: 16384,
			
@@ -114,79 +154,113 @@ export const fireworksModels = {
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.9,
			
 
				 		outputPrice: 0.9,
			
 
				+		deprecated: true,
			
 
				 		description:
			
 
				-			"A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek. Note that fine-tuning for this model is only available through contacting fireworks at https://fireworks.ai/company/contact-us.",
			
 
				+			"A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token from Deepseek.",
			
 
				+	},
			
 
				+	"accounts/fireworks/models/deepseek-v3-0324": {
			
 
				+		maxTokens: 160000,
			
 
				+		contextWindow: 160000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: false,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.9,
			
 
				+		outputPrice: 0.9,
			
 
				+		displayName: "DeepSeek V3 0324",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/deepseek-v3p1": {
			
 
				-		maxTokens: 16384,
			
 
				-		contextWindow: 163840,
			
 
				+		maxTokens: 160_000,
			
 
				+		contextWindow: 160_000,
			
 
				 		supportsImages: false,
			
 
				 		supportsPromptCache: false,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.56,
			
 
				 		outputPrice: 1.68,
			
 
				-		description:
			
 
				-			"DeepSeek v3.1 is an improved version of the v3 model with enhanced performance, better reasoning capabilities, and improved code generation. This Mixture-of-Experts (MoE) model maintains the same 671B total parameters with 37B activated per token.",
			
 
				+		displayName: "DeepSeek V3.1",
			
 
				+	},
			
 
				+	"accounts/fireworks/models/deepseek-v3p2": {
			
 
				+		maxTokens: 160_000,
			
 
				+		contextWindow: 160_000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.56,
			
 
				+		outputPrice: 1.68,
			
 
				+		cacheReadsPrice: 0.28,
			
 
				+		displayName: "Deepseek v3.2",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/glm-4p5": {
			
 
				-		maxTokens: 16384,
			
 
				-		contextWindow: 128000,
			
 
				+		maxTokens: 131_072,
			
 
				+		contextWindow: 131_072,
			
 
				 		supportsImages: false,
			
 
				 		supportsPromptCache: false,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.55,
			
 
				 		outputPrice: 2.19,
			
 
				-		description:
			
 
				-			"Z.ai GLM-4.5 with 355B total parameters and 32B active parameters. Features unified reasoning, coding, and intelligent agent capabilities.",
			
 
				+		displayName: "GLM-4.5",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/glm-4p5-air": {
			
 
				-		maxTokens: 16384,
			
 
				-		contextWindow: 128000,
			
 
				+		maxTokens: 131_072,
			
 
				+		contextWindow: 131_072,
			
 
				 		supportsImages: false,
			
 
				 		supportsPromptCache: false,
			
 
				 		supportsNativeTools: true,
			
 
				-		defaultToolProtocol: "native",
			
 
				-		inputPrice: 0.55,
			
 
				-		outputPrice: 2.19,
			
 
				+    defaultToolProtocol: "native",
			
 
				+		inputPrice: 0.22,
			
 
				+		outputPrice: 0.88,
			
 
				+		displayName: "GLM-4.5 Air",
			
 
				 		description:
			
 
				 			"Z.ai GLM-4.5-Air with 106B total parameters and 12B active parameters. Features unified reasoning, coding, and intelligent agent capabilities.",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/glm-4p6": {
			
 
				-		maxTokens: 25344,
			
 
				-		contextWindow: 198000,
			
 
				+		maxTokens: 198_000,
			
 
				+		contextWindow: 198_000,
			
 
				 		supportsImages: false,
			
 
				-		supportsPromptCache: false,
			
 
				+		supportsPromptCache: true,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.55,
			
 
				 		outputPrice: 2.19,
			
 
				-		description:
			
 
				-			"Z.ai GLM-4.6 is an advanced coding model with exceptional performance on complex programming tasks. Features improved reasoning capabilities and enhanced code generation quality, making it ideal for software development workflows.",
			
 
				+		cacheReadsPrice: 0.28,
			
 
				+		displayName: "GLM-4.6",
			
 
				+	},
			
 
				+	"accounts/fireworks/models/glm-4p7": {
			
 
				+		maxTokens: 198_000,
			
 
				+		contextWindow: 198_000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		inputPrice: 0.6,
			
 
				+		outputPrice: 2.2,
			
 
				+		cacheReadsPrice: 0.3,
			
 
				+		displayName: "GLM-4.7",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/gpt-oss-20b": {
			
 
				-		maxTokens: 16384,
			
 
				-		contextWindow: 128000,
			
 
				+		maxTokens: 128_000,
			
 
				+		contextWindow: 128_000,
			
 
				 		supportsImages: false,
			
 
				-		supportsPromptCache: false,
			
 
				+		supportsPromptCache: true,
			
 
				 		supportsNativeTools: true,
			
 
				-		defaultToolProtocol: "native",
			
 
				-		inputPrice: 0.07,
			
 
				-		outputPrice: 0.3,
			
 
				+    defaultToolProtocol: "native",
			
 
				+		inputPrice: 0.05,
			
 
				+		outputPrice: 0.2,
			
 
				+		cacheReadsPrice: 0.04,
			
 
				+		displayName: "GPT-OSS 20B",
			
 
				 		description:
			
 
				 			"OpenAI gpt-oss-20b: Compact model for local/edge deployments. Optimized for low-latency and resource-constrained environments with chain-of-thought output, adjustable reasoning, and agentic workflows.",
			
 
				 	},
			
 
				 	"accounts/fireworks/models/gpt-oss-120b": {
			
 
				-		maxTokens: 16384,
			
 
				-		contextWindow: 128000,
			
 
				+		maxTokens: 128_000,
			
 
				+		contextWindow: 128_000,
			
 
				 		supportsImages: false,
			
 
				-		supportsPromptCache: false,
			
 
				+		supportsPromptCache: true,
			
 
				 		supportsNativeTools: true,
			
 
				 		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.15,
			
 
				 		outputPrice: 0.6,
			
 
				-		description:
			
 
				-			"OpenAI gpt-oss-120b: Production-grade, general-purpose model that fits on a single H100 GPU. Features complex reasoning, configurable effort, full chain-of-thought transparency, and supports function calling, tool use, and structured outputs.",
			
 
				+		cacheReadsPrice: 0.08,
			
 
				+		displayName: "GPT-OSS 120B",
			
 
				 	},
			
 
				 } as const satisfies Record<string, ModelInfo>
			
--- a/packages/types/src/providers/index.ts
+++ b/packages/types/src/providers/index.ts
@@ -1,5 +1,6 @@
 
				 export * from "./anthropic.js"
			
 
				 export * from "./baseten.js"
			
 
				+export * from "./corethink.js"
			
 
				 export * from "./bedrock.js"
			
 
				 export * from "./cerebras.js"
			
 
				 export * from "./chutes.js"
			
@@ -15,6 +16,7 @@ export * from "./synthetic.js"
 
				 export * from "./inception.js"
			
 
				 export * from "./minimax.js"
			
 
				 export * from "./glama.js"
			
 
				+export * from "./zenmux.js"
			
 
				 // kilocode_change end
			
 
				 export * from "./groq.js"
			
 
				 export * from "./huggingface.js"
			
@@ -44,6 +46,7 @@ export * from "./minimax.js"
 
				 
			
 
				 import { anthropicDefaultModelId } from "./anthropic.js"
			
 
				 import { basetenDefaultModelId } from "./baseten.js"
			
 
				+import { corethinkDefaultModelId } from "./corethink.js"
			
 
				 import { bedrockDefaultModelId } from "./bedrock.js"
			
 
				 import { cerebrasDefaultModelId } from "./cerebras.js"
			
 
				 import { chutesDefaultModelId } from "./chutes.js"
			
@@ -54,6 +57,7 @@ import { featherlessDefaultModelId } from "./featherless.js"
 
				 import { fireworksDefaultModelId } from "./fireworks.js"
			
 
				 import { geminiDefaultModelId } from "./gemini.js"
			
 
				 import { glamaDefaultModelId } from "./glama.js" // kilocode_change
			
 
				+import { zenmuxDefaultModelId } from "./zenmux.js" // kilocode_change
			
 
				 import { groqDefaultModelId } from "./groq.js"
			
 
				 import { ioIntelligenceDefaultModelId } from "./io-intelligence.js"
			
 
				 import { litellmDefaultModelId } from "./lite-llm.js"
			
@@ -89,6 +93,8 @@ export function getProviderDefaultModelId(
 
				 	switch (provider) {
			
 
				 		case "openrouter":
			
 
				 			return openRouterDefaultModelId
			
 
				+		case "zenmux": // kilocode_change
			
 
				+			return zenmuxDefaultModelId // kilocode_change
			
 
				 		case "requesty":
			
 
				 			return requestyDefaultModelId
			
 
				 		// kilocode_change start
			
@@ -109,6 +115,8 @@ export function getProviderDefaultModelId(
 
				 			return chutesDefaultModelId
			
 
				 		case "baseten":
			
 
				 			return basetenDefaultModelId
			
 
				+		case "corethink":
			
 
				+			return corethinkDefaultModelId
			
 
				 		case "bedrock":
			
 
				 			return bedrockDefaultModelId
			
 
				 		case "vertex":
			
--- a/packages/types/src/providers/moonshot.ts
+++ b/packages/types/src/providers/moonshot.ts
@@ -6,21 +6,26 @@ export type MoonshotModelId = keyof typeof moonshotModels
 
				 export const moonshotDefaultModelId: MoonshotModelId = "kimi-k2-thinking"
			
 
				 
			
 
				 export const moonshotModels = {
			
 
				+	// kilocode_change start
			
 
				 	"kimi-for-coding": {
			
 
				 		maxTokens: 32_000,
			
 
				 		contextWindow: 131_072,
			
 
				-		supportsImages: false,
			
 
				+		supportsImages: true,
			
 
				 		supportsPromptCache: true,
			
 
				-		supportsReasoningBudget: true,
			
 
				-		supportsReasoningEffort: true,
			
 
				+		supportsReasoningBinary: true,
			
 
				+		supportsAdaptiveThinking: true,
			
 
				 		inputPrice: 0.6, // $0.60 per million tokens (cache miss)
			
 
				 		outputPrice: 2.5, // $2.50 per million tokens
			
 
				 		cacheWritesPrice: 0, // $0 per million tokens (cache miss)
			
 
				 		cacheReadsPrice: 0.15, // $0.15 per million tokens (cache hit)
			
 
				 		preserveReasoning: true,
			
 
				 		supportsNativeTools: true,
			
 
				+		defaultToolProtocol: "native",
			
 
				+		supportsTemperature: false,
			
 
				+		defaultTemperature: 0.6,
			
 
				 		description: `Kimi for coding`,
			
 
				 	},
			
 
				+	// kilocode_change end
			
 
				 	"kimi-k2-0711-preview": {
			
 
				 		maxTokens: 32_000,
			
 
				 		contextWindow: 131_072,
			
@@ -91,19 +96,26 @@ export const moonshotModels = {
 
				 		defaultTemperature: 1.0,
			
 
				 		description: `The kimi-k2-thinking model is a general-purpose agentic reasoning model developed by Moonshot AI. Thanks to its strength in deep reasoning and multi-turn tool use, it can solve even the hardest problems.`,
			
 
				 	},
			
 
				+	// kilocode_change start
			
 
				 	"kimi-k2.5": {
			
 
				 		maxTokens: 16_384,
			
 
				 		contextWindow: 262_144,
			
 
				-		supportsImages: false,
			
 
				+		supportsImages: true,
			
 
				 		supportsPromptCache: true,
			
 
				+		supportsReasoningBinary: true,
			
 
				+		supportsAdaptiveThinking: true,
			
 
				+		preserveReasoning: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		defaultToolProtocol: "native",
			
 
				 		inputPrice: 0.6, // $0.60 per million tokens (cache miss)
			
 
				 		outputPrice: 3.0, // $3.00 per million tokens
			
 
				 		cacheReadsPrice: 0.1, // $0.10 per million tokens (cache hit)
			
 
				-		supportsTemperature: true,
			
 
				-		defaultTemperature: 1.0,
			
 
				+		supportsTemperature: false,
			
 
				+		defaultTemperature: 0.6,
			
 
				 		description:
			
 
				 			"Kimi K2.5 is the latest generation of Moonshot AI's Kimi series, featuring improved reasoning capabilities and enhanced performance across diverse tasks.",
			
 
				 	},
			
 
				+	// kilocode_change end
			
 
				 } as const satisfies Record<string, ModelInfo>
			
 
				 
			
 
				 export const MOONSHOT_DEFAULT_TEMPERATURE = 0.6
			
--- a/packages/types/src/providers/zai.ts
+++ b/packages/types/src/providers/zai.ts
@@ -6,11 +6,12 @@ import { ZaiApiLine } from "../provider-settings.js"
 
				 // https://docs.z.ai/guides/llm/glm-4.5
			
 
				 // https://docs.z.ai/guides/llm/glm-4.6
			
 
				 // https://docs.z.ai/guides/llm/glm-4.7
			
 
				+// https://docs.z.ai/guides/llm/glm-5 // kilocode_change
			
 
				 // https://docs.z.ai/guides/overview/pricing
			
 
				 // https://bigmodel.cn/pricing
			
 
				 
			
 
				 export type InternationalZAiModelId = keyof typeof internationalZAiModels
			
 
				-export const internationalZAiDefaultModelId: InternationalZAiModelId = "glm-4.7"
			
 
				+export const internationalZAiDefaultModelId: InternationalZAiModelId = "glm-5" // kilocode_change
			
 
				 export const internationalZAiModels = {
			
 
				 	"glm-4.5": {
			
 
				 		maxTokens: 16_384,
			
@@ -157,6 +158,24 @@ export const internationalZAiModels = {
 
				 		preferredIndex: 1,
			
 
				 	},
			
 
				 	// kilocode_change start
			
 
				+	"glm-5": {
			
 
				+		maxTokens: 131_072,
			
 
				+		contextWindow: 200_000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		defaultToolProtocol: "native",
			
 
				+		supportsReasoningEffort: ["disable", "medium"],
			
 
				+		reasoningEffort: "medium",
			
 
				+		preserveReasoning: true,
			
 
				+		inputPrice: 1,
			
 
				+		outputPrice: 3.2,
			
 
				+		cacheWritesPrice: 0,
			
 
				+		cacheReadsPrice: 0.2,
			
 
				+		description:
			
 
				+			"GLM-5 is Z.AI's flagship text model with 200K context, 128K max output, thinking mode, function calling, and context caching.",
			
 
				+		preferredIndex: 0,
			
 
				+	},
			
 
				 	"glm-4.7-flash": {
			
 
				 		maxTokens: 16_384,
			
 
				 		contextWindow: 200_000,
			
@@ -189,7 +208,7 @@ export const internationalZAiModels = {
 
				 } as const satisfies Record<string, ModelInfo>
			
 
				 
			
 
				 export type MainlandZAiModelId = keyof typeof mainlandZAiModels
			
 
				-export const mainlandZAiDefaultModelId: MainlandZAiModelId = "glm-4.7"
			
 
				+export const mainlandZAiDefaultModelId: MainlandZAiModelId = "glm-5" // kilocode_change
			
 
				 export const mainlandZAiModels = {
			
 
				 	"glm-4.5": {
			
 
				 		maxTokens: 16_384,
			
@@ -306,6 +325,24 @@ export const mainlandZAiModels = {
 
				 		preferredIndex: 1,
			
 
				 	},
			
 
				 	// kilocode_change start
			
 
				+	"glm-5": {
			
 
				+		maxTokens: 131_072,
			
 
				+		contextWindow: 200_000,
			
 
				+		supportsImages: false,
			
 
				+		supportsPromptCache: true,
			
 
				+		supportsNativeTools: true,
			
 
				+		defaultToolProtocol: "native",
			
 
				+		supportsReasoningEffort: ["disable", "medium"],
			
 
				+		reasoningEffort: "medium",
			
 
				+		preserveReasoning: true,
			
 
				+		inputPrice: 0.57,
			
 
				+		outputPrice: 2.57,
			
 
				+		cacheWritesPrice: 0,
			
 
				+		cacheReadsPrice: 0.14,
			
 
				+		description:
			
 
				+			"GLM-5 is Z.AI's flagship text model with 200K context, 128K max output, thinking mode, function calling, and context caching.",
			
 
				+		preferredIndex: 0,
			
 
				+	},
			
 
				 	"glm-4.7-flash": {
			
 
				 		maxTokens: 16_384,
			
 
				 		contextWindow: 204_800,
			
--- a/packages/types/src/providers/zenmux.ts
+++ b/packages/types/src/providers/zenmux.ts
@@ -0,0 +1,17 @@
 
				+// kilocode_change - new file
			
 
				+import type { ModelInfo } from "../model.js"
			
 
				+
			
 
				+// Default model for ZenMux - using OpenAI GPT-5 as default
			
 
				+export const zenmuxDefaultModelId = "anthropic/claude-opus-4"
			
 
				+
			
 
				+export const zenmuxDefaultModelInfo: ModelInfo = {
			
 
				+	maxTokens: 8192,
			
 
				+	contextWindow: 200_000,
			
 
				+	supportsImages: true,
			
 
				+	supportsPromptCache: true,
			
 
				+	inputPrice: 15.0,
			
 
				+	outputPrice: 75.0,
			
 
				+	cacheWritesPrice: 18.75,
			
 
				+	cacheReadsPrice: 1.5,
			
 
				+	description: "Claude Opus 4 via ZenMux",
			
 
				+}
			
--- a/packages/types/src/vscode-extension-host.ts
+++ b/packages/types/src/vscode-extension-host.ts
@@ -574,6 +574,7 @@ export type ExtensionState = Pick<
 
				 	clineMessages: ClineMessage[]
			
 
				 	currentTaskItem?: HistoryItem
			
 
				 	currentTaskTodos?: TodoItem[] // Initial todos for the current task
			
 
				+	currentTaskCumulativeCost?: number // kilocode_change: cumulative cost including deleted messages
			
 
				 	apiConfiguration: ProviderSettings
			
 
				 	uriScheme?: string
			
 
				 	uiKind?: string // kilocode_change
			
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -14,6 +14,9 @@ overrides:
 
				   glob: '>=11.1.0'
			
 
				   '@types/react': ^18.3.23
			
 
				   '@types/react-dom': ^18.3.5
			
 
				+  qs: ^6.14.1
			
 
				+  body-parser: ^2.2.2
			
 
				+  preact: ^10.28.2
			
 
				   zod: 3.25.76
			
 
				   '@sap-ai-sdk/prompt-registry>zod': ^4.0.0
			
 
				 
			
@@ -2415,8 +2418,8 @@ importers:
 
				         specifier: ^11.4.1
			
 
				         version: 11.10.0
			
 
				       posthog-js:
			
 
				-        specifier: ^1.227.2
			
 
				-        version: 1.242.1
			
 
				+        specifier: ^1.337.0
			
 
				+        version: 1.337.0
			
 
				       pretty-bytes:
			
 
				         specifier: ^7.0.0
			
 
				         version: 7.0.0
			
@@ -5315,9 +5318,15 @@ packages:
 
				   '@posthog/[email protected]':
			
 
				     resolution: {integrity: sha512-havjGYHwL8Gy6LXIR911h+M/sYlJLQbepxP/cc1M7Cp3v8F92bzpqkbuvUIUyb7/izkxfGwc9wMqKAo0QxMTrg==}
			
 
				 
			
 
				+  '@posthog/[email protected]':
			
 
				+    resolution: {integrity: sha512-irPbrcopCT0LCRgGM4V8jFuMNCFos6EM4QFf5KA2sHQFC/6pGaHZYoyHcjRUDUKFw4vmpLlmGEXA5ah8x5K4LQ==}
			
 
				+
			
 
				   '@posthog/[email protected]':
			
 
				     resolution: {integrity: sha512-mReFmfI+ep5sH3cnFhjvWfOcl3j6olKpN5lHFbOomLGxYTHMXcyMUBE3/o8WfrAgR1qxKQUsWMNcv6BhLr/GKA==}
			
 
				 
			
 
				+  '@posthog/[email protected]':
			
 
				+    resolution: {integrity: sha512-R7J5BIeulNbjmUNfc8FICRa57K1IizSpJBRI6IuJvRFnm3eeczWOw6DKH0NCHXHZiE3XzVcUrJUOKaKXBcdQxQ==}
			
 
				+
			
 
				   '@protobufjs/[email protected]':
			
 
				     resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==}
			
 
				 
			
@@ -9055,8 +9064,8 @@ packages:
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg==}
			
 
				 
			
 
				-  [email protected].0:
			
 
				-    resolution: {integrity: sha512-02qvAaxv8tp7fBa/mw1ga98OGm+eCbqzJOKoRt70sLmfEEi+jyBYVTDGfCL/k06/4EMk/z01gCe7HoCH/f2LTg==}
			
 
				+  [email protected].2:
			
 
				+    resolution: {integrity: sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==}
			
 
				     engines: {node: '>=18'}
			
 
				 
			
 
				   [email protected]:
			
@@ -12224,6 +12233,10 @@ packages:
 
				     resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==}
			
 
				     engines: {node: '>= 0.8'}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==}
			
 
				+    engines: {node: '>= 0.8'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==}
			
 
				     engines: {node: '>= 6'}
			
@@ -14034,8 +14047,8 @@ packages:
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-JoPBfJ3gBnHZ18icCwHR50orC9kNH81tiR1gs01D8Q5YpV6adHNO9nKNuFBCJQ941/32PT1a63UF/DitmS3amQ==}
			
 
				 
			
 
				-  [email protected].0:
			
 
				-    resolution: {integrity: sha512-QGYKEuUsYT9ykKBCMOEDLsU5JRObWQusAolFMeko/tYPufNkRffBAQjIE+99jbA87xv6FgmjLtwjh9wBWajwAA==}
			
 
				+  [email protected].1:
			
 
				+    resolution: {integrity: sha512-cctsq2wp5vTsLIcaymblUriiTcZd0CwWtCbLvrOzYCDZoWyMNV8sZ7krj09FSnsiJi3WVsHLM4k6Dq/yaPyCXA==}
			
 
				 
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA==}
			
@@ -15686,17 +15699,6 @@ packages:
 
				     resolution: {integrity: sha512-Jtc2612XINuBjIl/QTWsV5UvE8UHuNblcO3vVADSrKsrc6RqGX6lOW1cEo3CM2v0XG4Nat8nI+YM7/f26VxXLw==}
			
 
				     engines: {node: '>=12'}
			
 
				 
			
 
				-  [email protected]:
			
 
				-    resolution: {integrity: sha512-j2mzw0eukyuw/Qm3tNZ6pfaXmc7eglWj6ftmvR1Lz9GtMr85ndGNXJvIGO+6PBrQW2o0D1G0k/KV93ehta0hFA==}
			
 
				-    peerDependencies:
			
 
				-      '@rrweb/types': 2.0.0-alpha.17
			
 
				-      rrweb-snapshot: 2.0.0-alpha.17
			
 
				-    peerDependenciesMeta:
			
 
				-      '@rrweb/types':
			
 
				-        optional: true
			
 
				-      rrweb-snapshot:
			
 
				-        optional: true
			
 
				-
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-OMXCO/IfcJBjYTuebVynMbp8Kq329yKEQSCAnkqLmi8W2Bt5bi7S5xxMwDM3Pm7818Uh0C40XMG3rAtYozId6Q==}
			
 
				     peerDependencies:
			
@@ -15711,6 +15713,9 @@ packages:
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-ZQg3ozgsPom+SZtAxMN97Zx9Vqkdsv1D4TZU/OqbAZdm27PswV6+ShBurm3nKm9jrlUU1cGHMRn2ZJZf249znQ==}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-wtzPoMGlCAJGgfjOIjimFRj+8SH4Aojfhc2+s8HdL54ivdQGK24JvwQoz12bKssvDSpFiBPwX1xunkt0f7mpcg==}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-6VISkNdxO24ehXiDA4dugyCSIV7lpGVaEu5kn/dlAj+SJ1lgcDru9PQ8p/+GSXsXVxohd1t7kHL2JKc9NoGb0w==}
			
 
				     engines: {node: '>=20'}
			
@@ -15719,9 +15724,6 @@ packages:
 
				     resolution: {integrity: sha512-xFRlaZTrVfIVrRfEZsI/DM6pdJqeX6iaRlo46nexhB1wfRcuIy1mtp76nkZSw3DDRXBczTo41K7raO2yS3dxzA==}
			
 
				     engines: {node: '>=20'}
			
 
				 
			
 
				-  [email protected]:
			
 
				-    resolution: {integrity: sha512-5SRRBinwpwkaD+OqlBDeITlRgvd8I8QlxHJw9AxSdMNV6O+LodN9nUyYGpSF7sadHjs6RzeFShMexC6DbtWr9g==}
			
 
				-
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-lbteaWGzGHdlIuiJ0l2Jq454m6kcpI1zNje6d8MlGAFlYvP2GO4ibnat7P74Esfz4sPTdM6UxtTwh/d3pwM9JA==}
			
 
				 
			
@@ -15908,8 +15910,8 @@ packages:
 
				     engines: {node: '>=10.13.0'}
			
 
				     hasBin: true
			
 
				 
			
 
				-  [email protected].0:
			
 
				-    resolution: {integrity: sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==}
			
 
				+  [email protected].1:
			
 
				+    resolution: {integrity: sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ==}
			
 
				     engines: {node: '>=0.6'}
			
 
				 
			
 
				   [email protected]:
			
@@ -15953,6 +15955,10 @@ packages:
 
				     resolution: {integrity: sha512-RmkhL8CAyCRPXCE28MMH0z2PNWQBNk2Q09ZdxM9IOOXwxwZbN+qbWaatPkdkWIKL2ZVDImrN/pK5HTRz2PcS4g==}
			
 
				     engines: {node: '>= 0.8'}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==}
			
 
				+    engines: {node: '>= 0.10'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==}
			
 
				     hasBin: true
			
@@ -17020,6 +17026,10 @@ packages:
 
				     resolution: {integrity: sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==}
			
 
				     engines: {node: '>= 0.8'}
			
 
				 
			
 
				+  [email protected]:
			
 
				+    resolution: {integrity: sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==}
			
 
				+    engines: {node: '>= 0.8'}
			
 
				+
			
 
				   [email protected]:
			
 
				     resolution: {integrity: sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==}
			
 
				 
			
@@ -22683,8 +22693,14 @@ snapshots:
 
				     dependencies:
			
 
				       cross-spawn: 7.0.6
			
 
				 
			
 
				+  '@posthog/[email protected]':
			
 
				+    dependencies:
			
 
				+      cross-spawn: 7.0.6
			
 
				+
			
 
				   '@posthog/[email protected]': {}
			
 
				 
			
 
				+  '@posthog/[email protected]': {}
			
 
				+
			
 
				   '@protobufjs/[email protected]': {}
			
 
				 
			
 
				   '@protobufjs/[email protected]': {}
			
@@ -27206,16 +27222,16 @@ snapshots:
 
				 
			
 
				   [email protected]: {}
			
 
				 
			
 
				-  [email protected].0:
			
 
				+  [email protected].2:
			
 
				     dependencies:
			
 
				       bytes: 3.1.2
			
 
				       content-type: 1.0.5
			
 
				       debug: 4.4.3([email protected])
			
 
				       http-errors: 2.0.0
			
 
				-      iconv-lite: 0.6.3
			
 
				+      iconv-lite: 0.7.2
			
 
				       on-finished: 2.4.1
			
 
				-      qs: 6.14.0
			
 
				-      raw-body: 3.0.0
			
 
				+      qs: 6.14.1
			
 
				+      raw-body: 3.0.2
			
 
				       type-is: 2.0.1
			
 
				     transitivePeerDependencies:
			
 
				       - supports-color
			
@@ -29620,7 +29636,7 @@ snapshots:
 
				   [email protected]:
			
 
				     dependencies:
			
 
				       accepts: 2.0.0
			
 
				-      body-parser: 2.2.0
			
 
				+      body-parser: 2.2.2
			
 
				       content-disposition: 1.0.0
			
 
				       content-type: 1.0.5
			
 
				       cookie: 0.7.2
			
@@ -29638,7 +29654,7 @@ snapshots:
 
				       once: 1.4.0
			
 
				       parseurl: 1.3.3
			
 
				       proxy-addr: 2.0.7
			
 
				-      qs: 6.14.0
			
 
				+      qs: 6.14.1
			
 
				       range-parser: 1.2.1
			
 
				       router: 2.2.0
			
 
				       send: 1.2.0
			
@@ -30766,7 +30782,7 @@ snapshots:
 
				       hast-util-from-parse5: 8.0.3
			
 
				       hast-util-to-parse5: 8.0.0
			
 
				       html-void-elements: 3.0.0
			
 
				-      mdast-util-to-hast: 13.2.0
			
 
				+      mdast-util-to-hast: 13.2.1
			
 
				       parse5: 7.3.0
			
 
				       unist-util-position: 5.0.0
			
 
				       unist-util-visit: 5.0.0
			
@@ -30782,7 +30798,7 @@ snapshots:
 
				       comma-separated-tokens: 2.0.3
			
 
				       hast-util-whitespace: 3.0.0
			
 
				       html-void-elements: 3.0.0
			
 
				-      mdast-util-to-hast: 13.2.0
			
 
				+      mdast-util-to-hast: 13.2.1
			
 
				       property-information: 7.1.0
			
 
				       space-separated-tokens: 2.0.2
			
 
				       stringify-entities: 4.0.4
			
@@ -30940,6 +30956,14 @@ snapshots:
 
				       statuses: 2.0.1
			
 
				       toidentifier: 1.0.1
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      depd: 2.0.0
			
 
				+      inherits: 2.0.4
			
 
				+      setprototypeof: 1.2.0
			
 
				+      statuses: 2.0.2
			
 
				+      toidentifier: 1.0.1
			
 
				+
			
 
				   [email protected]:
			
 
				     dependencies:
			
 
				       '@tootallnate/once': 2.0.0
			
@@ -33178,7 +33202,7 @@ snapshots:
 
				       unist-util-position: 3.1.0
			
 
				       unist-util-visit: 2.0.3
			
 
				 
			
 
				-  [email protected].0:
			
 
				+  [email protected].1:
			
 
				     dependencies:
			
 
				       '@types/hast': 3.0.4
			
 
				       '@types/mdast': 4.0.4
			
@@ -35075,18 +35099,11 @@ snapshots:
 
				 
			
 
				   [email protected]: {}
			
 
				 
			
 
				-  [email protected]:
			
 
				-    dependencies:
			
 
				-      core-js: 3.42.0
			
 
				-      fflate: 0.4.8
			
 
				-      preact: 10.26.6
			
 
				-      web-vitals: 4.2.4
			
 
				-
			
 
				   [email protected]:
			
 
				     dependencies:
			
 
				       core-js: 3.42.0
			
 
				       fflate: 0.4.8
			
 
				-      preact: 10.26.6
			
 
				+      preact: 10.28.2
			
 
				       web-vitals: 4.2.4
			
 
				 
			
 
				   [email protected]:
			
@@ -35105,14 +35122,28 @@ snapshots:
 
				       query-selector-shadow-dom: 1.0.1
			
 
				       web-vitals: 5.1.0
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      '@opentelemetry/api': 1.9.0
			
 
				+      '@opentelemetry/api-logs': 0.208.0
			
 
				+      '@opentelemetry/exporter-logs-otlp-http': 0.208.0(@opentelemetry/[email protected])
			
 
				+      '@opentelemetry/resources': 2.5.0(@opentelemetry/[email protected])
			
 
				+      '@opentelemetry/sdk-logs': 0.208.0(@opentelemetry/[email protected])
			
 
				+      '@posthog/core': 1.18.0
			
 
				+      '@posthog/types': 1.337.0
			
 
				+      core-js: 3.42.0
			
 
				+      dompurify: 3.3.1
			
 
				+      fflate: 0.4.8
			
 
				+      preact: 10.28.2
			
 
				+      query-selector-shadow-dom: 1.0.1
			
 
				+      web-vitals: 5.1.0
			
 
				+
			
 
				   [email protected]: {}
			
 
				 
			
 
				   [email protected]:
			
 
				     dependencies:
			
 
				       '@posthog/core': 1.10.0
			
 
				 
			
 
				-  [email protected]: {}
			
 
				-
			
 
				   [email protected]: {}
			
 
				 
			
 
				   [email protected]:
			
@@ -35350,7 +35381,7 @@ snapshots:
 
				       pngjs: 5.0.0
			
 
				       yargs: 15.4.1
			
 
				 
			
 
				-  [email protected].0:
			
 
				+  [email protected].1:
			
 
				     dependencies:
			
 
				       side-channel: 1.1.0
			
 
				 
			
@@ -35389,6 +35420,13 @@ snapshots:
 
				       iconv-lite: 0.6.3
			
 
				       unpipe: 1.0.0
			
 
				 
			
 
				+  [email protected]:
			
 
				+    dependencies:
			
 
				+      bytes: 3.1.2
			
 
				+      http-errors: 2.0.1
			
 
				+      iconv-lite: 0.7.2
			
 
				+      unpipe: 1.0.0
			
 
				+
			
 
				   [email protected]:
			
 
				     dependencies:
			
 
				       deep-extend: 0.6.0
			
@@ -35511,7 +35549,7 @@ snapshots:
 
				       devlop: 1.1.0
			
 
				       hast-util-to-jsx-runtime: 2.3.6
			
 
				       html-url-attributes: 3.0.1
			
 
				-      mdast-util-to-hast: 13.2.0
			
 
				+      mdast-util-to-hast: 13.2.1
			
 
				       react: 18.3.1
			
 
				       remark-parse: 11.0.0
			
 
				       remark-rehype: 11.1.2
			
@@ -35905,7 +35943,7 @@ snapshots:
 
				     dependencies:
			
 
				       '@types/hast': 3.0.4
			
 
				       '@types/mdast': 4.0.4
			
 
				-      mdast-util-to-hast: 13.2.0
			
 
				+      mdast-util-to-hast: 13.2.1
			
 
				       unified: 11.0.5
			
 
				       vfile: 6.0.3
			
 
				 
			
@@ -36757,6 +36795,8 @@ snapshots:
 
				 
			
 
				   [email protected]: {}
			
 
				 
			
 
				+  [email protected]: {}
			
 
				+
			
 
				   [email protected]: {}
			
 
				 
			
 
				   [email protected]: {}
			
@@ -37773,7 +37813,7 @@ snapshots:
 
				 
			
 
				   [email protected]:
			
 
				     dependencies:
			
 
				-      qs: 6.14.0
			
 
				+      qs: 6.14.1
			
 
				       tunnel: 0.0.6
			
 
				       underscore: 1.13.7
			
 
				 
			
--- a/src/__tests__/extension.spec.ts
+++ b/src/__tests__/extension.spec.ts
@@ -253,8 +253,8 @@ vi.mock("../i18n", () => ({
 
				 	}),
			
 
				 }))
			
 
				 
			
 
				-vi.mock("../services/ghost/GhostServiceManager", () => ({
			
 
				-	GhostServiceManager: {
			
 
				+vi.mock("../services/autocomplete/AutocompleteServiceManager", () => ({
			
 
				+	AutocompleteServiceManager: {
			
 
				 		initialize: vi.fn().mockReturnValue({
			
 
				 			load: vi.fn(),
			
 
				 		}),
			
@@ -263,8 +263,8 @@ vi.mock("../services/ghost/GhostServiceManager", () => ({
 
				 	},
			
 
				 }))
			
 
				 
			
 
				-vi.mock("../services/ghost", () => ({
			
 
				-	registerGhostProvider: vi.fn(),
			
 
				+vi.mock("../services/autocomplete", () => ({
			
 
				+	registerAutocompleteProvider: vi.fn(),
			
 
				 }))
			
 
				 
			
 
				 vi.mock("../services/commit-message", () => ({
			
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -11,6 +11,7 @@ import {
 
				 	AwsBedrockHandler,
			
 
				 	CerebrasHandler,
			
 
				 	OpenRouterHandler,
			
 
				+	ZenMuxHandler, // kilocode_change
			
 
				 	VertexHandler,
			
 
				 	AnthropicVertexHandler,
			
 
				 	OpenAiHandler,
			
@@ -51,6 +52,7 @@ import {
 
				 	DeepInfraHandler,
			
 
				 	MiniMaxHandler,
			
 
				 	BasetenHandler,
			
 
				+	CorethinkHandler,
			
 
				 	OpenAiCompatibleResponsesHandler, // kilocode_change
			
 
				 } from "./providers"
			
 
				 // kilocode_change start
			
@@ -180,6 +182,8 @@ export function buildApiHandler(configuration: ProviderSettings): ApiHandler {
 
				 		// kilocode_change end
			
 
				 		case "openrouter":
			
 
				 			return new OpenRouterHandler(options)
			
 
				+		case "zenmux": // kilocode_change
			
 
				+			return new ZenMuxHandler(options) // kilocode_change
			
 
				 		case "bedrock":
			
 
				 			return new AwsBedrockHandler(options)
			
 
				 		case "vertex":
			
@@ -268,6 +272,8 @@ export function buildApiHandler(configuration: ProviderSettings): ApiHandler {
 
				 			return new MiniMaxHandler(options)
			
 
				 		case "baseten":
			
 
				 			return new BasetenHandler(options)
			
 
				+		case "corethink":
			
 
				+			return new CorethinkHandler(options)
			
 
				 		default:
			
 
				 			apiProvider satisfies undefined
			
 
				 			return new AnthropicHandler(options)
			
--- a/src/api/providers/__tests__/bedrock-custom-arn.spec.ts
+++ b/src/api/providers/__tests__/bedrock-custom-arn.spec.ts
@@ -163,6 +163,45 @@ describe("Bedrock ARN Handling", () => {
 
				 			expect(result.crossRegionInference).toBe(false)
			
 
				 		})
			
 
				 
			
 
				+		it("should correctly parse GovCloud inference-profile ARN", () => {
			
 
				+			const handler = createHandler()
			
 
				+			const arn =
			
 
				+				"arn:aws-us-gov:bedrock:us-gov-west-1:123456789012:inference-profile/us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
			
 
				+
			
 
				+			const result = (handler as any).parseArn(arn, "us-gov-west-1")
			
 
				+
			
 
				+			expect(result.isValid).toBe(true)
			
 
				+			expect(result.region).toBe("us-gov-west-1")
			
 
				+			expect(result.modelType).toBe("inference-profile")
			
 
				+			expect(result.modelId).toBe("us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0")
			
 
				+			expect(result.crossRegionInference).toBe(false)
			
 
				+		})
			
 
				+
			
 
				+		it("should correctly parse GovCloud foundation-model ARN", () => {
			
 
				+			const handler = createHandler()
			
 
				+			const arn = "arn:aws-us-gov:bedrock:us-gov-west-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0"
			
 
				+
			
 
				+			const result = (handler as any).parseArn(arn, "us-gov-west-1")
			
 
				+
			
 
				+			expect(result.isValid).toBe(true)
			
 
				+			expect(result.region).toBe("us-gov-west-1")
			
 
				+			expect(result.modelType).toBe("foundation-model")
			
 
				+			expect(result.modelId).toBe("anthropic.claude-3-sonnet-20240229-v1:0")
			
 
				+			expect(result.crossRegionInference).toBe(false)
			
 
				+		})
			
 
				+
			
 
				+		it("should correctly parse China partition ARN", () => {
			
 
				+			const handler = createHandler()
			
 
				+			const arn = "arn:aws-cn:bedrock:cn-north-1::foundation-model/anthropic.claude-v2"
			
 
				+
			
 
				+			const result = (handler as any).parseArn(arn, "cn-north-1")
			
 
				+
			
 
				+			expect(result.isValid).toBe(true)
			
 
				+			expect(result.region).toBe("cn-north-1")
			
 
				+			expect(result.modelType).toBe("foundation-model")
			
 
				+			expect(result.modelId).toBe("anthropic.claude-v2")
			
 
				+		})
			
 
				+
			
 
				 		it("should return isValid: false for simple ARN format", () => {
			
 
				 			const handler = createHandler()
			
 
				 			const arn = "arn:aws:bedrock:us-east-1:123456789012:some-other-resource"
			
--- a/src/api/providers/__tests__/chutes.spec.ts
+++ b/src/api/providers/__tests__/chutes.spec.ts
@@ -153,6 +153,39 @@ describe("ChutesHandler", () => {
 
				 		])
			
 
				 	})
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	it("should handle non-DeepSeek reasoning field", async () => {
			
 
				+		mockCreate.mockImplementationOnce(async () => ({
			
 
				+			[Symbol.asyncIterator]: async function* () {
			
 
				+				yield {
			
 
				+					choices: [
			
 
				+						{
			
 
				+							delta: { reasoning: "Thinking through it..." },
			
 
				+							index: 0,
			
 
				+						},
			
 
				+					],
			
 
				+					usage: null,
			
 
				+				}
			
 
				+			},
			
 
				+		}))
			
 
				+
			
 
				+		const systemPrompt = "You are a helpful assistant."
			
 
				+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
			
 
				+		mockFetchModel.mockResolvedValueOnce({
			
 
				+			id: "some-other-model",
			
 
				+			info: { maxTokens: 1024, temperature: 0.7 },
			
 
				+		})
			
 
				+
			
 
				+		const stream = handler.createMessage(systemPrompt, messages)
			
 
				+		const chunks = []
			
 
				+		for await (const chunk of stream) {
			
 
				+			chunks.push(chunk)
			
 
				+		}
			
 
				+
			
 
				+		expect(chunks).toEqual([{ type: "reasoning", text: "Thinking through it..." }])
			
 
				+	})
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	it("should return default model when no model is specified", async () => {
			
 
				 		const model = await handler.fetchModel()
			
 
				 		expect(model.id).toBe(chutesDefaultModelId)
			
@@ -275,6 +308,131 @@ describe("ChutesHandler", () => {
 
				 		})
			
 
				 	})
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	it("createMessage should yield tool_call_end on finish_reason tool_calls", async () => {
			
 
				+		mockCreate.mockImplementationOnce(() => {
			
 
				+			return {
			
 
				+				[Symbol.asyncIterator]: () => ({
			
 
				+					next: vi
			
 
				+						.fn()
			
 
				+						.mockResolvedValueOnce({
			
 
				+							done: false,
			
 
				+							value: {
			
 
				+								choices: [
			
 
				+									{
			
 
				+										delta: {
			
 
				+											tool_calls: [
			
 
				+												{
			
 
				+													index: 0,
			
 
				+													id: "call_finish",
			
 
				+													function: { name: "test_tool", arguments: '{"arg":"value"}' },
			
 
				+												},
			
 
				+											],
			
 
				+										},
			
 
				+										finish_reason: null,
			
 
				+									},
			
 
				+								],
			
 
				+							},
			
 
				+						})
			
 
				+						.mockResolvedValueOnce({
			
 
				+							done: false,
			
 
				+							value: {
			
 
				+								choices: [
			
 
				+									{
			
 
				+										delta: {},
			
 
				+										finish_reason: "tool_calls",
			
 
				+									},
			
 
				+								],
			
 
				+							},
			
 
				+						})
			
 
				+						.mockResolvedValueOnce({ done: true }),
			
 
				+				}),
			
 
				+			}
			
 
				+		})
			
 
				+
			
 
				+		const stream = handler.createMessage("system prompt", [])
			
 
				+		const chunks = []
			
 
				+		for await (const chunk of stream) {
			
 
				+			chunks.push(chunk)
			
 
				+		}
			
 
				+
			
 
				+		expect(chunks).toEqual([
			
 
				+			{
			
 
				+				type: "tool_call_partial",
			
 
				+				index: 0,
			
 
				+				id: "call_finish",
			
 
				+				name: "test_tool",
			
 
				+				arguments: '{"arg":"value"}',
			
 
				+			},
			
 
				+			{
			
 
				+				type: "tool_call_end",
			
 
				+				id: "call_finish",
			
 
				+			},
			
 
				+		])
			
 
				+	})
			
 
				+
			
 
				+	it("createMessage should synthesize tool call ids when provider omits them", async () => {
			
 
				+		mockCreate.mockImplementationOnce(() => {
			
 
				+			return {
			
 
				+				[Symbol.asyncIterator]: () => ({
			
 
				+					next: vi
			
 
				+						.fn()
			
 
				+						.mockResolvedValueOnce({
			
 
				+							done: false,
			
 
				+							value: {
			
 
				+								choices: [
			
 
				+									{
			
 
				+										delta: {
			
 
				+											tool_calls: [
			
 
				+												{
			
 
				+													index: 0,
			
 
				+													function: { name: "test_tool", arguments: '{"arg":"value"}' },
			
 
				+												},
			
 
				+											],
			
 
				+										},
			
 
				+										finish_reason: null,
			
 
				+									},
			
 
				+								],
			
 
				+							},
			
 
				+						})
			
 
				+						.mockResolvedValueOnce({
			
 
				+							done: false,
			
 
				+							value: {
			
 
				+								choices: [
			
 
				+									{
			
 
				+										delta: {},
			
 
				+										finish_reason: "tool_calls",
			
 
				+									},
			
 
				+								],
			
 
				+							},
			
 
				+						})
			
 
				+						.mockResolvedValueOnce({ done: true }),
			
 
				+				}),
			
 
				+			}
			
 
				+		})
			
 
				+
			
 
				+		const stream = handler.createMessage("system prompt", [])
			
 
				+		const chunks = []
			
 
				+		for await (const chunk of stream) {
			
 
				+			chunks.push(chunk)
			
 
				+		}
			
 
				+
			
 
				+		expect(chunks).toEqual([
			
 
				+			{
			
 
				+				type: "tool_call_partial",
			
 
				+				index: 0,
			
 
				+				id: "chutes_tool_call_0",
			
 
				+				name: "test_tool",
			
 
				+				arguments: '{"arg":"value"}',
			
 
				+			},
			
 
				+			{
			
 
				+				type: "tool_call_end",
			
 
				+				id: "chutes_tool_call_0",
			
 
				+			},
			
 
				+		])
			
 
				+	})
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	it("createMessage should pass tools and tool_choice to API", async () => {
			
 
				 		const tools = [
			
 
				 			{
			
@@ -307,6 +465,9 @@ describe("ChutesHandler", () => {
 
				 				tools,
			
 
				 				tool_choice,
			
 
				 			}),
			
 
				+			expect.objectContaining({
			
 
				+				timeout: expect.any(Number),
			
 
				+			}),
			
 
				 		)
			
 
				 	})
			
 
				 
			
@@ -326,11 +487,29 @@ describe("ChutesHandler", () => {
 
				 			apiModelId: testModelId,
			
 
				 			chutesApiKey: "test-chutes-api-key",
			
 
				 		})
			
 
				-		// Note: getModel() returns fallback default without calling fetchModel
			
 
				-		// Since we haven't called fetchModel, it returns the default chutesDefaultModelId
			
 
				-		// which is DeepSeek-R1-0528, therefore temperature will be DEEP_SEEK_DEFAULT_TEMPERATURE
			
 
				+		;(handlerWithModel as any).models = {
			
 
				+			[testModelId]: {
			
 
				+				...chutesDefaultModelInfo,
			
 
				+				temperature: 0.7,
			
 
				+			},
			
 
				+		}
			
 
				 		const model = handlerWithModel.getModel()
			
 
				-		// The default model is DeepSeek-R1, so it returns DEEP_SEEK_DEFAULT_TEMPERATURE
			
 
				-		expect(model.info.temperature).toBe(DEEP_SEEK_DEFAULT_TEMPERATURE)
			
 
				+		expect(model.id).toBe(testModelId)
			
 
				+		expect(model.info.temperature).toBe(0.5)
			
 
				+	})
			
 
				+
			
 
				+	// kilocode_change start
			
 
				+	it("should preserve explicit Chutes model id when it is unavailable in cached model list", () => {
			
 
				+		const unsupportedModelId = "moonshotai/Kimi-K2.5-TEE"
			
 
				+		const handlerWithModel = new ChutesHandler({
			
 
				+			apiModelId: unsupportedModelId,
			
 
				+			chutesApiKey: "test-chutes-api-key",
			
 
				+		})
			
 
				+
			
 
				+		const model = handlerWithModel.getModel()
			
 
				+
			
 
				+		expect(model.id).toBe(unsupportedModelId)
			
 
				+		expect(model.info.temperature).toBe(0.5)
			
 
				 	})
			
 
				+	// kilocode_change end
			
 
				 })
			
--- a/src/api/providers/__tests__/fireworks.spec.ts
+++ b/src/api/providers/__tests__/fireworks.spec.ts
@@ -102,17 +102,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 128000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.6,
			
 
				-				outputPrice: 2.5,
			
 
				-				description: expect.stringContaining("Kimi K2 is a state-of-the-art mixture-of-experts"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return Kimi K2 Thinking model with correct configuration", () => {
			
@@ -148,17 +139,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 4096,
			
 
				-				contextWindow: 204800,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.3,
			
 
				-				outputPrice: 1.2,
			
 
				-				description: expect.stringContaining("MiniMax M2 is a high-performance language model"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return Qwen3 235B model with correct configuration", () => {
			
@@ -169,18 +151,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 32768,
			
 
				-				contextWindow: 256000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.22,
			
 
				-				outputPrice: 0.88,
			
 
				-				description:
			
 
				-					"Latest Qwen3 thinking model, competitive against the best closed source models in Jul 2025.",
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return DeepSeek R1 model with correct configuration", () => {
			
@@ -191,17 +163,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 20480,
			
 
				-				contextWindow: 160000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 3,
			
 
				-				outputPrice: 8,
			
 
				-				description: expect.stringContaining("05/28 updated checkpoint of Deepseek R1"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return DeepSeek V3 model with correct configuration", () => {
			
@@ -233,17 +196,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 163840,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.56,
			
 
				-				outputPrice: 1.68,
			
 
				-				description: expect.stringContaining("DeepSeek v3.1 is an improved version"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return GLM-4.5 model with correct configuration", () => {
			
@@ -254,17 +208,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 128000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.55,
			
 
				-				outputPrice: 2.19,
			
 
				-				description: expect.stringContaining("Z.ai GLM-4.5 with 355B total parameters"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return GLM-4.5-Air model with correct configuration", () => {
			
@@ -275,17 +220,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 128000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.55,
			
 
				-				outputPrice: 2.19,
			
 
				-				description: expect.stringContaining("Z.ai GLM-4.5-Air with 106B total parameters"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return GLM-4.6 model with correct configuration", () => {
			
@@ -296,17 +232,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 25344,
			
 
				-				contextWindow: 198000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.55,
			
 
				-				outputPrice: 2.19,
			
 
				-				description: expect.stringContaining("Z.ai GLM-4.6 is an advanced coding model"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return gpt-oss-20b model with correct configuration", () => {
			
@@ -317,17 +244,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 128000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.07,
			
 
				-				outputPrice: 0.3,
			
 
				-				description: expect.stringContaining("OpenAI gpt-oss-20b: Compact model for local/edge deployments"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("should return gpt-oss-120b model with correct configuration", () => {
			
@@ -338,17 +256,8 @@ describe("FireworksHandler", () => {
 
				 		})
			
 
				 		const model = handlerWithModel.getModel()
			
 
				 		expect(model.id).toBe(testModelId)
			
 
				-		expect(model.info).toEqual(
			
 
				-			expect.objectContaining({
			
 
				-				maxTokens: 16384,
			
 
				-				contextWindow: 128000,
			
 
				-				supportsImages: false,
			
 
				-				supportsPromptCache: false,
			
 
				-				inputPrice: 0.15,
			
 
				-				outputPrice: 0.6,
			
 
				-				description: expect.stringContaining("OpenAI gpt-oss-120b: Production-grade, general-purpose model"),
			
 
				-			}),
			
 
				-		)
			
 
				+		// Keep this test aligned with the shared model registry.
			
 
				+		expect(model.info).toEqual(expect.objectContaining(fireworksModels[testModelId]))
			
 
				 	})
			
 
				 
			
 
				 	it("completePrompt method should return text from Fireworks API", async () => {
			
--- a/src/api/providers/__tests__/kilocode-openrouter.spec.ts
+++ b/src/api/providers/__tests__/kilocode-openrouter.spec.ts
@@ -8,6 +8,8 @@ vitest.mock("vscode", () => ({
 
				 		language: "en",
			
 
				 		uiKind: 1,
			
 
				 		appName: "Visual Studio Code",
			
 
				+		isTelemetryEnabled: true,
			
 
				+		machineId: "test-machine-id",
			
 
				 	},
			
 
				 	version: "1.85.0",
			
 
				 }))
			
@@ -22,12 +24,13 @@ import {
 
				 	X_KILOCODE_ORGANIZATIONID,
			
 
				 	X_KILOCODE_PROJECTID,
			
 
				 	X_KILOCODE_EDITORNAME,
			
 
				+	X_KILOCODE_MACHINEID,
			
 
				 	X_KILOCODE_MODE,
			
 
				 } from "../../../shared/kilocode/headers"
			
 
				-import { streamSse } from "../../../services/continuedev/core/fetch/stream"
			
 
				+import { streamSse } from "../../../services/autocomplete/continuedev/core/fetch/stream"
			
 
				 
			
 
				 // Mock the stream module
			
 
				-vitest.mock("../../../services/continuedev/core/fetch/stream", () => ({
			
 
				+vitest.mock("../../../services/autocomplete/continuedev/core/fetch/stream", () => ({
			
 
				 	streamSse: vitest.fn(),
			
 
				 }))
			
 
				 
			
@@ -85,6 +88,7 @@ describe("KilocodeOpenrouterHandler", () => {
 
				 					[X_KILOCODE_MODE]: "code",
			
 
				 					[X_KILOCODE_TASKID]: "test-task-id",
			
 
				 					[X_KILOCODE_EDITORNAME]: "Visual Studio Code 1.85.0",
			
 
				+					[X_KILOCODE_MACHINEID]: "test-machine-id",
			
 
				 				},
			
 
				 			})
			
 
				 		})
			
@@ -102,6 +106,7 @@ describe("KilocodeOpenrouterHandler", () => {
 
				 					[X_KILOCODE_TASKID]: "test-task-id",
			
 
				 					[X_KILOCODE_ORGANIZATIONID]: "test-org-id",
			
 
				 					[X_KILOCODE_EDITORNAME]: "Visual Studio Code 1.85.0",
			
 
				+					[X_KILOCODE_MACHINEID]: "test-machine-id",
			
 
				 				},
			
 
				 			})
			
 
				 		})
			
@@ -124,6 +129,7 @@ describe("KilocodeOpenrouterHandler", () => {
 
				 					[X_KILOCODE_ORGANIZATIONID]: "test-org-id",
			
 
				 					[X_KILOCODE_PROJECTID]: "https://github.com/user/repo.git",
			
 
				 					[X_KILOCODE_EDITORNAME]: "Visual Studio Code 1.85.0",
			
 
				+					[X_KILOCODE_MACHINEID]: "test-machine-id",
			
 
				 				},
			
 
				 			})
			
 
				 		})
			
@@ -146,6 +152,7 @@ describe("KilocodeOpenrouterHandler", () => {
 
				 					[X_KILOCODE_PROJECTID]: "https://github.com/user/repo.git",
			
 
				 					[X_KILOCODE_ORGANIZATIONID]: "test-org-id",
			
 
				 					[X_KILOCODE_EDITORNAME]: "Visual Studio Code 1.85.0",
			
 
				+					[X_KILOCODE_MACHINEID]: "test-machine-id",
			
 
				 				},
			
 
				 			})
			
 
				 		})
			
@@ -163,6 +170,7 @@ describe("KilocodeOpenrouterHandler", () => {
 
				 					[X_KILOCODE_TASKID]: "test-task-id",
			
 
				 					[X_KILOCODE_ORGANIZATIONID]: "test-org-id",
			
 
				 					[X_KILOCODE_EDITORNAME]: "Visual Studio Code 1.85.0",
			
 
				+					[X_KILOCODE_MACHINEID]: "test-machine-id",
			
 
				 				},
			
 
				 			})
			
 
				 			expect(result?.headers).not.toHaveProperty(X_KILOCODE_PROJECTID)
			
@@ -181,18 +189,20 @@ describe("KilocodeOpenrouterHandler", () => {
 
				 					[X_KILOCODE_MODE]: "code",
			
 
				 					[X_KILOCODE_TASKID]: "test-task-id",
			
 
				 					[X_KILOCODE_EDITORNAME]: "Visual Studio Code 1.85.0",
			
 
				+					[X_KILOCODE_MACHINEID]: "test-machine-id",
			
 
				 				},
			
 
				 			})
			
 
				 			expect(result?.headers).not.toHaveProperty(X_KILOCODE_PROJECTID)
			
 
				 		})
			
 
				 
			
 
				-		it("returns only editorName header when no other headers are needed", () => {
			
 
				+		it("returns editorName and machineId headers when no other headers are needed", () => {
			
 
				 			const handler = new KilocodeOpenrouterHandler(mockOptions)
			
 
				 			const result = handler.customRequestOptions()
			
 
				 
			
 
				 			expect(result).toEqual({
			
 
				 				headers: {
			
 
				 					[X_KILOCODE_EDITORNAME]: "Visual Studio Code 1.85.0",
			
 
				+					[X_KILOCODE_MACHINEID]: "test-machine-id",
			
 
				 				},
			
 
				 			})
			
 
				 		})
			
@@ -241,6 +251,7 @@ describe("KilocodeOpenrouterHandler", () => {
 
				 						[X_KILOCODE_PROJECTID]: "https://github.com/user/repo.git",
			
 
				 						[X_KILOCODE_ORGANIZATIONID]: "test-org-id",
			
 
				 						[X_KILOCODE_EDITORNAME]: "Visual Studio Code 1.85.0",
			
 
				+						[X_KILOCODE_MACHINEID]: "test-machine-id",
			
 
				 					}),
			
 
				 				}),
			
 
				 				// kilocode_change end
			
--- a/src/api/providers/__tests__/mistral-fim.spec.ts
+++ b/src/api/providers/__tests__/mistral-fim.spec.ts
@@ -6,10 +6,10 @@ vitest.mock("vscode", () => ({}))
 
				 
			
 
				 import { MistralHandler } from "../mistral"
			
 
				 import { ApiHandlerOptions } from "../../../shared/api"
			
 
				-import { streamSse } from "../../../services/continuedev/core/fetch/stream"
			
 
				+import { streamSse } from "../../../services/autocomplete/continuedev/core/fetch/stream"
			
 
				 
			
 
				 // Mock the stream module
			
 
				-vitest.mock("../../../services/continuedev/core/fetch/stream", () => ({
			
 
				+vitest.mock("../../../services/autocomplete/continuedev/core/fetch/stream", () => ({
			
 
				 	streamSse: vitest.fn(),
			
 
				 }))
			
 
				 
			
--- a/src/api/providers/__tests__/moonshot.spec.ts
+++ b/src/api/providers/__tests__/moonshot.spec.ts
@@ -117,6 +117,31 @@ describe("MoonshotHandler", () => {
 
				 			expect(model).toHaveProperty("temperature")
			
 
				 			expect(model).toHaveProperty("maxTokens")
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should expose native tools for kimi-k2.5", () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-k2.5",
			
 
				+			})
			
 
				+			const model = strictHandler.getModel()
			
 
				+			const strictModelInfo = model.info as { supportsNativeTools?: boolean; defaultToolProtocol?: string }
			
 
				+
			
 
				+			expect(strictModelInfo.supportsNativeTools).toBe(true)
			
 
				+			expect(strictModelInfo.defaultToolProtocol).toBe("native")
			
 
				+			expect(model.info.supportsImages).toBe(true)
			
 
				+		})
			
 
				+
			
 
				+		it("should expose image capability for kimi-for-coding", () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-for-coding",
			
 
				+			})
			
 
				+			const model = strictHandler.getModel()
			
 
				+
			
 
				+			expect(model.info.supportsImages).toBe(true)
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("createMessage", () => {
			
@@ -221,6 +246,187 @@ describe("MoonshotHandler", () => {
 
				 			expect(usageChunks[0].cacheWriteTokens).toBe(0)
			
 
				 			expect(usageChunks[0].cacheReadTokens).toBe(2)
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should include prompt_cache_key for moonshot requests when taskId is provided", async () => {
			
 
				+			async function* mockFullStream() {
			
 
				+				yield { type: "text-delta", text: "Test response" }
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 1,
			
 
				+					outputTokens: 1,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			for await (const _chunk of handler.createMessage(systemPrompt, messages, { taskId: "task-cache-1" })) {
			
 
				+				// Drain stream
			
 
				+			}
			
 
				+
			
 
				+			expect(mockStreamText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							prompt_cache_key: "task-cache-1",
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				+		it("should enforce strict thinking temperature/provider options for kimi-k2.5 by default", async () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-k2.5",
			
 
				+				modelTemperature: 0.1,
			
 
				+			})
			
 
				+
			
 
				+			async function* mockFullStream() {
			
 
				+				yield { type: "text-delta", text: "Test response" }
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 1,
			
 
				+					outputTokens: 1,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			for await (const _chunk of strictHandler.createMessage(systemPrompt, messages)) {
			
 
				+				// Drain stream
			
 
				+			}
			
 
				+
			
 
				+			expect(mockStreamText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					temperature: 1.0,
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							thinking: { type: "enabled" },
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				+		it("should include prompt_cache_key alongside strict thinking controls when taskId is provided", async () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-for-coding",
			
 
				+			})
			
 
				+
			
 
				+			async function* mockFullStream() {
			
 
				+				yield { type: "text-delta", text: "Test response" }
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 1,
			
 
				+					outputTokens: 1,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			for await (const _chunk of strictHandler.createMessage(systemPrompt, messages, { taskId: "task-cache-2" })) {
			
 
				+				// Drain stream
			
 
				+			}
			
 
				+
			
 
				+			expect(mockStreamText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					temperature: 1.0,
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							prompt_cache_key: "task-cache-2",
			
 
				+							thinking: { type: "enabled" },
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				+		it("should enforce strict thinking temperature/provider options for kimi-for-coding by default", async () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-for-coding",
			
 
				+				modelTemperature: 0.1,
			
 
				+			})
			
 
				+
			
 
				+			async function* mockFullStream() {
			
 
				+				yield { type: "text-delta", text: "Test response" }
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 1,
			
 
				+					outputTokens: 1,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			for await (const _chunk of strictHandler.createMessage(systemPrompt, messages)) {
			
 
				+				// Drain stream
			
 
				+			}
			
 
				+
			
 
				+			expect(mockStreamText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					temperature: 1.0,
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							thinking: { type: "enabled" },
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				+		it("should enforce strict non-thinking temperature/provider options when reasoning is disabled", async () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-for-coding",
			
 
				+				enableReasoningEffort: false,
			
 
				+				modelTemperature: 1.9,
			
 
				+			})
			
 
				+
			
 
				+			async function* mockFullStream() {
			
 
				+				yield { type: "text-delta", text: "Test response" }
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 1,
			
 
				+					outputTokens: 1,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			for await (const _chunk of strictHandler.createMessage(systemPrompt, messages)) {
			
 
				+				// Drain stream
			
 
				+			}
			
 
				+
			
 
				+			expect(mockStreamText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					temperature: 0.6,
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							thinking: { type: "disabled" },
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("completePrompt", () => {
			
@@ -238,6 +444,34 @@ describe("MoonshotHandler", () => {
 
				 				}),
			
 
				 			)
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should enforce strict thinking controls for completePrompt on strict Kimi models", async () => {
			
 
				+			const strictHandler = new MoonshotHandler({
			
 
				+				...mockOptions,
			
 
				+				apiModelId: "kimi-k2.5",
			
 
				+				enableReasoningEffort: false,
			
 
				+				modelTemperature: 1.8,
			
 
				+			})
			
 
				+
			
 
				+			mockGenerateText.mockResolvedValue({
			
 
				+				text: "Test completion",
			
 
				+			})
			
 
				+
			
 
				+			await strictHandler.completePrompt("Test prompt")
			
 
				+
			
 
				+			expect(mockGenerateText).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					temperature: 0.6,
			
 
				+					providerOptions: {
			
 
				+						moonshot: {
			
 
				+							thinking: { type: "disabled" },
			
 
				+						},
			
 
				+					},
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("processUsageMetrics", () => {
			
@@ -404,19 +638,13 @@ describe("MoonshotHandler", () => {
 
				 				chunks.push(chunk)
			
 
				 			}
			
 
				 
			
 
				-			const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start")
			
 
				-			const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta")
			
 
				-			const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end")
			
 
				-
			
 
				-			expect(toolCallStartChunks.length).toBe(1)
			
 
				-			expect(toolCallStartChunks[0].id).toBe("tool-call-1")
			
 
				-			expect(toolCallStartChunks[0].name).toBe("read_file")
			
 
				-
			
 
				-			expect(toolCallDeltaChunks.length).toBe(1)
			
 
				-			expect(toolCallDeltaChunks[0].delta).toBe('{"path":"test.ts"}')
			
 
				-
			
 
				-			expect(toolCallEndChunks.length).toBe(1)
			
 
				-			expect(toolCallEndChunks[0].id).toBe("tool-call-1")
			
 
				+			// kilocode_change start
			
 
				+			const toolCallChunks = chunks.filter((c) => c.type === "tool_call")
			
 
				+			expect(toolCallChunks.length).toBe(1)
			
 
				+			expect(toolCallChunks[0].id).toBe("tool-call-1")
			
 
				+			expect(toolCallChunks[0].name).toBe("read_file")
			
 
				+			expect(toolCallChunks[0].arguments).toBe('{"path":"test.ts"}')
			
 
				+			// kilocode_change end
			
 
				 		})
			
 
				 
			
 
				 		it("should handle complete tool calls", async () => {
			
@@ -470,5 +698,63 @@ describe("MoonshotHandler", () => {
 
				 			expect(toolCallChunks[0].name).toBe("read_file")
			
 
				 			expect(toolCallChunks[0].arguments).toBe('{"path":"test.ts"}')
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should flush pending tool-input stream as tool_call when tool-input-end is missing", async () => {
			
 
				+			async function* mockFullStream() {
			
 
				+				yield {
			
 
				+					type: "tool-input-start",
			
 
				+					id: "tool-call-2",
			
 
				+					toolName: "read_file",
			
 
				+				}
			
 
				+				yield {
			
 
				+					type: "tool-input-delta",
			
 
				+					id: "tool-call-2",
			
 
				+					delta: '{"path":"missing-end.ts"}',
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			mockStreamText.mockReturnValue({
			
 
				+				fullStream: mockFullStream(),
			
 
				+				usage: Promise.resolve({
			
 
				+					inputTokens: 10,
			
 
				+					outputTokens: 5,
			
 
				+					details: {},
			
 
				+					raw: {},
			
 
				+				}),
			
 
				+			})
			
 
				+
			
 
				+			const stream = handler.createMessage(systemPrompt, messages, {
			
 
				+				taskId: "test-task",
			
 
				+				tools: [
			
 
				+					{
			
 
				+						type: "function",
			
 
				+						function: {
			
 
				+							name: "read_file",
			
 
				+							description: "Read a file",
			
 
				+							parameters: {
			
 
				+								type: "object",
			
 
				+								properties: { path: { type: "string" } },
			
 
				+								required: ["path"],
			
 
				+							},
			
 
				+						},
			
 
				+					},
			
 
				+				],
			
 
				+			})
			
 
				+
			
 
				+			const chunks: any[] = []
			
 
				+			for await (const chunk of stream) {
			
 
				+				chunks.push(chunk)
			
 
				+			}
			
 
				+
			
 
				+			const toolCallChunks = chunks.filter((c) => c.type === "tool_call")
			
 
				+			expect(toolCallChunks).toHaveLength(1)
			
 
				+			expect(toolCallChunks[0]).toMatchObject({
			
 
				+				id: "tool-call-2",
			
 
				+				name: "read_file",
			
 
				+				arguments: '{"path":"missing-end.ts"}',
			
 
				+			})
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 })
			
--- a/src/api/providers/__tests__/zai.spec.ts
+++ b/src/api/providers/__tests__/zai.spec.ts
@@ -108,6 +108,25 @@ describe("ZAiHandler", () => {
 
				 			expect(model.info.preserveReasoning).toBe(true)
			
 
				 		})
			
 
				 
			
 
				+		// kilocode_change start
			
 
				+		it("should return GLM-5 international model with documented limits", () => {
			
 
				+			const testModelId: InternationalZAiModelId = "glm-5"
			
 
				+			const handlerWithModel = new ZAiHandler({
			
 
				+				apiModelId: testModelId,
			
 
				+				zaiApiKey: "test-zai-api-key",
			
 
				+				zaiApiLine: "international_coding",
			
 
				+			})
			
 
				+			const model = handlerWithModel.getModel()
			
 
				+			expect(model.id).toBe(testModelId)
			
 
				+			expect(model.info).toEqual(internationalZAiModels[testModelId])
			
 
				+			expect(model.info.contextWindow).toBe(200_000)
			
 
				+			expect(model.info.maxTokens).toBe(131_072)
			
 
				+			expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
			
 
				+			expect(model.info.reasoningEffort).toBe("medium")
			
 
				+			expect(model.info.preserveReasoning).toBe(true)
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				+
			
 
				 		it("should return GLM-4.5v international model with vision support", () => {
			
 
				 			const testModelId: InternationalZAiModelId = "glm-4.5v"
			
 
				 			const handlerWithModel = new ZAiHandler({
			
@@ -203,6 +222,25 @@ describe("ZAiHandler", () => {
 
				 			expect(model.info.reasoningEffort).toBe("medium")
			
 
				 			expect(model.info.preserveReasoning).toBe(true)
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should return GLM-5 China model with documented limits", () => {
			
 
				+			const testModelId: MainlandZAiModelId = "glm-5"
			
 
				+			const handlerWithModel = new ZAiHandler({
			
 
				+				apiModelId: testModelId,
			
 
				+				zaiApiKey: "test-zai-api-key",
			
 
				+				zaiApiLine: "china_coding",
			
 
				+			})
			
 
				+			const model = handlerWithModel.getModel()
			
 
				+			expect(model.id).toBe(testModelId)
			
 
				+			expect(model.info).toEqual(mainlandZAiModels[testModelId])
			
 
				+			expect(model.info.contextWindow).toBe(200_000)
			
 
				+			expect(model.info.maxTokens).toBe(131_072)
			
 
				+			expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
			
 
				+			expect(model.info.reasoningEffort).toBe("medium")
			
 
				+			expect(model.info.preserveReasoning).toBe(true)
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("International API", () => {
			
@@ -242,6 +280,23 @@ describe("ZAiHandler", () => {
 
				 			expect(model.id).toBe(testModelId)
			
 
				 			expect(model.info).toEqual(internationalZAiModels[testModelId])
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should return GLM-5 international API model with documented limits", () => {
			
 
				+			const testModelId: InternationalZAiModelId = "glm-5"
			
 
				+			const handlerWithModel = new ZAiHandler({
			
 
				+				apiModelId: testModelId,
			
 
				+				zaiApiKey: "test-zai-api-key",
			
 
				+				zaiApiLine: "international_api",
			
 
				+			})
			
 
				+			const model = handlerWithModel.getModel()
			
 
				+			expect(model.id).toBe(testModelId)
			
 
				+			expect(model.info).toEqual(internationalZAiModels[testModelId])
			
 
				+			expect(model.info.contextWindow).toBe(200_000)
			
 
				+			expect(model.info.maxTokens).toBe(131_072)
			
 
				+			expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("China API", () => {
			
@@ -281,6 +336,23 @@ describe("ZAiHandler", () => {
 
				 			expect(model.id).toBe(testModelId)
			
 
				 			expect(model.info).toEqual(mainlandZAiModels[testModelId])
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should return GLM-5 China API model with documented limits", () => {
			
 
				+			const testModelId: MainlandZAiModelId = "glm-5"
			
 
				+			const handlerWithModel = new ZAiHandler({
			
 
				+				apiModelId: testModelId,
			
 
				+				zaiApiKey: "test-zai-api-key",
			
 
				+				zaiApiLine: "china_api",
			
 
				+			})
			
 
				+			const model = handlerWithModel.getModel()
			
 
				+			expect(model.id).toBe(testModelId)
			
 
				+			expect(model.info).toEqual(mainlandZAiModels[testModelId])
			
 
				+			expect(model.info.contextWindow).toBe(200_000)
			
 
				+			expect(model.info.maxTokens).toBe(131_072)
			
 
				+			expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("Default behavior", () => {
			
@@ -414,7 +486,8 @@ describe("ZAiHandler", () => {
 
				 		})
			
 
				 	})
			
 
				 
			
 
				-	describe("GLM-4.7 Thinking Mode", () => {
			
 
				+	// kilocode_change start
			
 
				+	describe("Z.ai Thinking Mode", () => {
			
 
				 		it("should enable thinking by default for GLM-4.7 (default reasoningEffort is medium)", async () => {
			
 
				 			const handlerWithModel = new ZAiHandler({
			
 
				 				apiModelId: "glm-4.7",
			
@@ -507,6 +580,64 @@ describe("ZAiHandler", () => {
 
				 			)
			
 
				 		})
			
 
				 
			
 
				+		it("should enable thinking by default for GLM-5 (default reasoningEffort is medium)", async () => {
			
 
				+			const handlerWithModel = new ZAiHandler({
			
 
				+				apiModelId: "glm-5",
			
 
				+				zaiApiKey: "test-zai-api-key",
			
 
				+				zaiApiLine: "international_coding",
			
 
				+			})
			
 
				+
			
 
				+			mockCreate.mockImplementationOnce(() => {
			
 
				+				return {
			
 
				+					[Symbol.asyncIterator]: () => ({
			
 
				+						async next() {
			
 
				+							return { done: true }
			
 
				+						},
			
 
				+					}),
			
 
				+				}
			
 
				+			})
			
 
				+
			
 
				+			const messageGenerator = handlerWithModel.createMessage("system prompt", [])
			
 
				+			await messageGenerator.next()
			
 
				+
			
 
				+			expect(mockCreate).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					model: "glm-5",
			
 
				+					thinking: { type: "enabled" },
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				+		it("should disable thinking for GLM-5 when reasoningEffort is set to disable", async () => {
			
 
				+			const handlerWithModel = new ZAiHandler({
			
 
				+				apiModelId: "glm-5",
			
 
				+				zaiApiKey: "test-zai-api-key",
			
 
				+				zaiApiLine: "international_coding",
			
 
				+				enableReasoningEffort: true,
			
 
				+				reasoningEffort: "disable",
			
 
				+			})
			
 
				+
			
 
				+			mockCreate.mockImplementationOnce(() => {
			
 
				+				return {
			
 
				+					[Symbol.asyncIterator]: () => ({
			
 
				+						async next() {
			
 
				+							return { done: true }
			
 
				+						},
			
 
				+					}),
			
 
				+				}
			
 
				+			})
			
 
				+
			
 
				+			const messageGenerator = handlerWithModel.createMessage("system prompt", [])
			
 
				+			await messageGenerator.next()
			
 
				+
			
 
				+			expect(mockCreate).toHaveBeenCalledWith(
			
 
				+				expect.objectContaining({
			
 
				+					model: "glm-5",
			
 
				+					thinking: { type: "disabled" },
			
 
				+				}),
			
 
				+			)
			
 
				+		})
			
 
				+
			
 
				 		it("should NOT add thinking parameter for non-thinking models like GLM-4.6", async () => {
			
 
				 			const handlerWithModel = new ZAiHandler({
			
 
				 				apiModelId: "glm-4.6",
			
@@ -532,4 +663,5 @@ describe("ZAiHandler", () => {
 
				 			expect(callArgs.thinking).toBeUndefined()
			
 
				 		})
			
 
				 	})
			
 
				+	// kilocode_change end
			
 
				 })
			
--- a/src/api/providers/__tests__/zenmux.spec.ts
+++ b/src/api/providers/__tests__/zenmux.spec.ts
@@ -0,0 +1,47 @@
 
				+// kilocode_change - new test file for ZenMux provider
			
 
				+import { ZenMuxHandler } from "../zenmux"
			
 
				+import { ApiHandlerOptions } from "../../../shared/api"
			
 
				+
			
 
				+describe("ZenMuxHandler", () => {
			
 
				+	let mockOptions: ApiHandlerOptions
			
 
				+
			
 
				+	beforeEach(() => {
			
 
				+		mockOptions = {
			
 
				+			zenmuxApiKey: "test-api-key",
			
 
				+			zenmuxModelId: "openai/gpt-4",
			
 
				+			zenmuxBaseUrl: "https://test.zenmux.ai/api/v1",
			
 
				+		}
			
 
				+	})
			
 
				+
			
 
				+	test("should use default base URL when not provided", () => {
			
 
				+		const optionsWithoutBaseUrl = {
			
 
				+			...mockOptions,
			
 
				+			zenmuxBaseUrl: undefined,
			
 
				+		}
			
 
				+		const handler = new ZenMuxHandler(optionsWithoutBaseUrl)
			
 
				+		// The handler should initialize without errors
			
 
				+		expect(handler).toBeDefined()
			
 
				+	})
			
 
				+
			
 
				+	test("should use provided base URL", () => {
			
 
				+		const handler = new ZenMuxHandler(mockOptions)
			
 
				+		expect(handler).toBeDefined()
			
 
				+		// The base URL should be used in the OpenAI client
			
 
				+	})
			
 
				+
			
 
				+	test("should handle missing API key gracefully", () => {
			
 
				+		const optionsWithoutKey = {
			
 
				+			...mockOptions,
			
 
				+			zenmuxApiKey: undefined,
			
 
				+		}
			
 
				+		const handler = new ZenMuxHandler(optionsWithoutKey)
			
 
				+		expect(handler).toBeDefined()
			
 
				+	})
			
 
				+
			
 
				+	test("should return correct model info", () => {
			
 
				+		const handler = new ZenMuxHandler(mockOptions)
			
 
				+		const model = handler.getModel()
			
 
				+		expect(model.id).toBe("openai/gpt-4")
			
 
				+		expect(model.info).toBeDefined()
			
 
				+	})
			
 
				+})
			
--- a/src/api/providers/chutes.ts
+++ b/src/api/providers/chutes.ts
@@ -11,6 +11,7 @@ import { ApiStream } from "../transform/stream"
 
				 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
			
 
				 
			
 
				 import { RouterProvider } from "./router-provider"
			
 
				+import { getApiRequestTimeout } from "./utils/timeout-config"
			
 
				 
			
 
				 export class ChutesHandler extends RouterProvider implements SingleCompletionHandler {
			
 
				 	constructor(options: ApiHandlerOptions) {
			
@@ -25,6 +26,14 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 		})
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	private getRequestOptions() {
			
 
				+		return {
			
 
				+			timeout: getApiRequestTimeout(),
			
 
				+		}
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	private getCompletionParams(
			
 
				 		systemPrompt: string,
			
 
				 		messages: Anthropic.Messages.MessageParam[],
			
@@ -59,6 +68,32 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 		return params
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	private getToolCallId(
			
 
				+		toolCall: {
			
 
				+			id?: string
			
 
				+			index?: number
			
 
				+		},
			
 
				+		toolCallIdsByIndex: Map<number, string>,
			
 
				+	): string {
			
 
				+		const toolCallIndex = toolCall.index ?? 0
			
 
				+
			
 
				+		if (toolCall.id) {
			
 
				+			toolCallIdsByIndex.set(toolCallIndex, toolCall.id)
			
 
				+			return toolCall.id
			
 
				+		}
			
 
				+
			
 
				+		const existingId = toolCallIdsByIndex.get(toolCallIndex)
			
 
				+		if (existingId) {
			
 
				+			return existingId
			
 
				+		}
			
 
				+
			
 
				+		const syntheticId = `chutes_tool_call_${toolCallIndex}`
			
 
				+		toolCallIdsByIndex.set(toolCallIndex, syntheticId)
			
 
				+		return syntheticId
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	override async *createMessage(
			
 
				 		systemPrompt: string,
			
 
				 		messages: Anthropic.Messages.MessageParam[],
			
@@ -70,7 +105,7 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 			const stream = await this.client.chat.completions.create({
			
 
				 				...this.getCompletionParams(systemPrompt, messages, metadata),
			
 
				 				messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]),
			
 
				-			})
			
 
				+			}, this.getRequestOptions())
			
 
				 
			
 
				 			const matcher = new XmlMatcher(
			
 
				 				"think",
			
@@ -80,9 +115,16 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 						text: chunk.data,
			
 
				 					}) as const,
			
 
				 			)
			
 
				+			// kilocode_change start
			
 
				+			const activeToolCallIds = new Set<string>()
			
 
				+			const toolCallIdsByIndex = new Map<number, string>()
			
 
				+			// kilocode_change end
			
 
				 
			
 
				 			for await (const chunk of stream) {
			
 
				 				const delta = chunk.choices[0]?.delta
			
 
				+				// kilocode_change start
			
 
				+				const finishReason = chunk.choices[0]?.finish_reason
			
 
				+				// kilocode_change end
			
 
				 
			
 
				 				if (delta?.content) {
			
 
				 					for (const processedChunk of matcher.update(delta.content)) {
			
@@ -93,15 +135,27 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 				// Emit raw tool call chunks - NativeToolCallParser handles state management
			
 
				 				if (delta && "tool_calls" in delta && Array.isArray(delta.tool_calls)) {
			
 
				 					for (const toolCall of delta.tool_calls) {
			
 
				+						// kilocode_change start
			
 
				+						const toolCallId = this.getToolCallId(toolCall, toolCallIdsByIndex)
			
 
				+						activeToolCallIds.add(toolCallId)
			
 
				+						// kilocode_change end
			
 
				 						yield {
			
 
				 							type: "tool_call_partial",
			
 
				 							index: toolCall.index,
			
 
				-							id: toolCall.id,
			
 
				+							id: toolCallId,
			
 
				 							name: toolCall.function?.name,
			
 
				 							arguments: toolCall.function?.arguments,
			
 
				 						}
			
 
				 					}
			
 
				 				}
			
 
				+				// kilocode_change start
			
 
				+				if (finishReason === "tool_calls" && activeToolCallIds.size > 0) {
			
 
				+					for (const id of activeToolCallIds) {
			
 
				+						yield { type: "tool_call_end", id }
			
 
				+					}
			
 
				+					activeToolCallIds.clear()
			
 
				+				}
			
 
				+				// kilocode_change end
			
 
				 
			
 
				 				if (chunk.usage) {
			
 
				 					yield {
			
@@ -120,31 +174,61 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 			// For non-DeepSeek-R1 models, use standard OpenAI streaming
			
 
				 			const stream = await this.client.chat.completions.create(
			
 
				 				this.getCompletionParams(systemPrompt, messages, metadata),
			
 
				+				this.getRequestOptions(),
			
 
				 			)
			
 
				+			// kilocode_change start
			
 
				+			const activeToolCallIds = new Set<string>()
			
 
				+			const toolCallIdsByIndex = new Map<number, string>()
			
 
				+			// kilocode_change end
			
 
				 
			
 
				 			for await (const chunk of stream) {
			
 
				 				const delta = chunk.choices[0]?.delta
			
 
				+				// kilocode_change start
			
 
				+				const finishReason = chunk.choices[0]?.finish_reason
			
 
				+				// kilocode_change end
			
 
				 
			
 
				 				if (delta?.content) {
			
 
				 					yield { type: "text", text: delta.content }
			
 
				 				}
			
 
				 
			
 
				-				if (delta && "reasoning_content" in delta && delta.reasoning_content) {
			
 
				-					yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" }
			
 
				+				// kilocode_change start
			
 
				+				if (delta) {
			
 
				+					for (const key of ["reasoning_content", "reasoning"] as const) {
			
 
				+						if (key in delta) {
			
 
				+							const reasoningContent = ((delta as any)[key] as string | undefined) || ""
			
 
				+							if (reasoningContent.trim()) {
			
 
				+								yield { type: "reasoning", text: reasoningContent }
			
 
				+							}
			
 
				+							break
			
 
				+						}
			
 
				+					}
			
 
				 				}
			
 
				+				// kilocode_change end
			
 
				 
			
 
				 				// Emit raw tool call chunks - NativeToolCallParser handles state management
			
 
				 				if (delta && "tool_calls" in delta && Array.isArray(delta.tool_calls)) {
			
 
				 					for (const toolCall of delta.tool_calls) {
			
 
				+						// kilocode_change start
			
 
				+						const toolCallId = this.getToolCallId(toolCall, toolCallIdsByIndex)
			
 
				+						activeToolCallIds.add(toolCallId)
			
 
				+						// kilocode_change end
			
 
				 						yield {
			
 
				 							type: "tool_call_partial",
			
 
				 							index: toolCall.index,
			
 
				-							id: toolCall.id,
			
 
				+							id: toolCallId,
			
 
				 							name: toolCall.function?.name,
			
 
				 							arguments: toolCall.function?.arguments,
			
 
				 						}
			
 
				 					}
			
 
				 				}
			
 
				+				// kilocode_change start
			
 
				+				if (finishReason === "tool_calls" && activeToolCallIds.size > 0) {
			
 
				+					for (const id of activeToolCallIds) {
			
 
				+						yield { type: "tool_call_end", id }
			
 
				+					}
			
 
				+					activeToolCallIds.clear()
			
 
				+				}
			
 
				+				// kilocode_change end
			
 
				 
			
 
				 				if (chunk.usage) {
			
 
				 					yield {
			
@@ -184,7 +268,7 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 				requestParams.temperature = this.options.modelTemperature ?? defaultTemperature
			
 
				 			}
			
 
				 
			
 
				-			const response = await this.client.chat.completions.create(requestParams)
			
 
				+			const response = await this.client.chat.completions.create(requestParams, this.getRequestOptions())
			
 
				 			return response.choices[0]?.message.content || ""
			
 
				 		} catch (error) {
			
 
				 			if (error instanceof Error) {
			
@@ -196,12 +280,25 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan
 
				 
			
 
				 	override getModel() {
			
 
				 		const model = super.getModel()
			
 
				-		const isDeepSeekR1 = model.id.includes("DeepSeek-R1")
			
 
				+		const configuredModelId = this.options.apiModelId
			
 
				+		// kilocode_change start
			
 
				+		// Keep explicit Chutes model IDs instead of silently switching to the provider default.
			
 
				+		// This prevents hidden model substitution when model lists are stale/unavailable.
			
 
				+		const shouldPreserveExplicitModelId =
			
 
				+			!!configuredModelId &&
			
 
				+			configuredModelId !== this.defaultModelId &&
			
 
				+			model.id === this.defaultModelId &&
			
 
				+			!this.models[configuredModelId]
			
 
				+
			
 
				+		const effectiveModelId = shouldPreserveExplicitModelId ? configuredModelId : model.id
			
 
				+		const baseInfo = shouldPreserveExplicitModelId ? this.defaultModelInfo : model.info
			
 
				+		// kilocode_change end
			
 
				+		const isDeepSeekR1 = effectiveModelId.includes("DeepSeek-R1")
			
 
				 
			
 
				 		return {
			
 
				-			...model,
			
 
				+			id: effectiveModelId,
			
 
				 			info: {
			
 
				-				...model.info,
			
 
				+				...baseInfo,
			
 
				 				temperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0.5,
			
 
				 			},
			
 
				 		}
			
--- a/src/api/providers/corethink.ts
+++ b/src/api/providers/corethink.ts
@@ -0,0 +1,18 @@
 
				+import { type CorethinkModelId, corethinkDefaultModelId, corethinkModels } from "@roo-code/types"
			
 
				+
			
 
				+import type { ApiHandlerOptions } from "../../shared/api"
			
 
				+import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider"
			
 
				+
			
 
				+export class CorethinkHandler extends BaseOpenAiCompatibleProvider<CorethinkModelId> {
			
 
				+	constructor(options: ApiHandlerOptions) {
			
 
				+		super({
			
 
				+			...options,
			
 
				+			providerName: "Corethink",
			
 
				+			baseURL: "https://api.corethink.ai/v1/kilo",
			
 
				+			apiKey: options.corethinkApiKey || "API_KEY_NOT_NEEDED_FOR_NOW",
			
 
				+			defaultProviderModelId: corethinkDefaultModelId,
			
 
				+			providerModels: corethinkModels,
			
 
				+			defaultTemperature: 0.5,
			
 
				+		})
			
 
				+	}
			
 
				+}
			
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -40,6 +40,7 @@ import { getHuggingFaceModels } from "./huggingface"
 
				 import { getRooModels } from "./roo"
			
 
				 import { getChutesModels } from "./chutes"
			
 
				 import { getNanoGptModels } from "./nano-gpt" //kilocode_change
			
 
				+import { getZenmuxModels } from "./zenmux"
			
 
				 
			
 
				 const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
			
 
				 
			
@@ -75,7 +76,6 @@ async function fetchModelsFromProvider(options: GetModelsOptions): Promise<Model
 
				 	const { provider } = options
			
 
				 
			
 
				 	let models: ModelRecord
			
 
				-
			
 
				 	switch (provider) {
			
 
				 		case "openrouter":
			
 
				 			// kilocode_change start: base url and bearer token
			
@@ -85,6 +85,12 @@ async function fetchModelsFromProvider(options: GetModelsOptions): Promise<Model
 
				 			})
			
 
				 			// kilocode_change end
			
 
				 			break
			
 
				+		case "zenmux":
			
 
				+			models = await getZenmuxModels({
			
 
				+				openRouterBaseUrl: options.baseUrl || "https://zenmux.ai/api/v1",
			
 
				+				headers: options.apiKey ? { Authorization: `Bearer ${options.apiKey}` } : undefined,
			
 
				+			})
			
 
				+			break
			
 
				 		case "requesty":
			
 
				 			// Requesty models endpoint requires an API key for per-user custom policies.
			
 
				 			models = await getRequestyModels(options.baseUrl, options.apiKey)
			
--- a/src/api/providers/fetchers/zenmux.ts
+++ b/src/api/providers/fetchers/zenmux.ts
@@ -0,0 +1,72 @@
 
				+import { z } from "zod"
			
 
				+
			
 
				+import { type ModelInfo } from "@roo-code/types"
			
 
				+import type { ApiHandlerOptions } from "../../../shared/api"
			
 
				+import { DEFAULT_HEADERS } from "../constants"
			
 
				+import { parseApiPrice } from "../../../shared/cost"
			
 
				+
			
 
				+/**
			
 
				+ * ZenMuxModel
			
 
				+ */
			
 
				+const zenMuxModelSchema = z.object({
			
 
				+	id: z.string(),
			
 
				+	object: z.string(),
			
 
				+	created: z.number(),
			
 
				+	owned_by: z.string(),
			
 
				+})
			
 
				+
			
 
				+export type ZenMuxModel = z.infer<typeof zenMuxModelSchema>
			
 
				+
			
 
				+/**
			
 
				+ * ZenMuxModelsResponse
			
 
				+ */
			
 
				+const zenMuxModelsResponseSchema = z.object({
			
 
				+	data: z.array(zenMuxModelSchema),
			
 
				+	object: z.string(),
			
 
				+})
			
 
				+
			
 
				+/**
			
 
				+ * getZenmuxRouterModels
			
 
				+ */
			
 
				+export async function getZenmuxModels(
			
 
				+	options?: ApiHandlerOptions & { headers?: Record<string, string> },
			
 
				+): Promise<Record<string, ModelInfo>> {
			
 
				+	const models: Record<string, ModelInfo> = {}
			
 
				+	const baseURL = "https://zenmux.ai/api/v1"
			
 
				+	try {
			
 
				+		const response = await fetch(`${baseURL}/models`, {
			
 
				+			headers: { ...DEFAULT_HEADERS, ...(options?.headers ?? {}) },
			
 
				+		})
			
 
				+		const json = await response.json()
			
 
				+		const result = zenMuxModelsResponseSchema.safeParse(json)
			
 
				+
			
 
				+		if (!result.success) {
			
 
				+			throw new Error("ZenMux models response is invalid: " + JSON.stringify(result.error.format(), undefined, 2))
			
 
				+		}
			
 
				+
			
 
				+		const data = result.data.data
			
 
				+
			
 
				+		for (const model of data) {
			
 
				+			const { id, owned_by } = model
			
 
				+
			
 
				+			const modelInfo: ModelInfo = {
			
 
				+				maxTokens: 0,
			
 
				+				contextWindow: 0,
			
 
				+				supportsPromptCache: false,
			
 
				+				inputPrice: 0,
			
 
				+				outputPrice: 0,
			
 
				+				description: `${owned_by || "ZenMux"} model`,
			
 
				+				displayName: id,
			
 
				+			}
			
 
				+
			
 
				+			models[id] = modelInfo
			
 
				+		}
			
 
				+
			
 
				+		console.log(`Successfully fetched ${Object.keys(models).length} ZenMux models`)
			
 
				+	} catch (error) {
			
 
				+		console.error(`Error fetching ZenMux models: ${JSON.stringify(error, Object.getOwnPropertyNames(error), 2)}`)
			
 
				+		throw error
			
 
				+	}
			
 
				+
			
 
				+	return models
			
 
				+}
			
--- a/src/api/providers/index.ts
+++ b/src/api/providers/index.ts
@@ -25,6 +25,7 @@ export { OpenAiCompatibleResponsesHandler } from "./openai-responses" // kilocod
 
				 export { OpenAICompatibleHandler } from "./openai-compatible"
			
 
				 export type { OpenAICompatibleConfig } from "./openai-compatible"
			
 
				 export { OpenRouterHandler } from "./openrouter"
			
 
				+export { ZenMuxHandler } from "./zenmux" // kilocode_change
			
 
				 export { QwenCodeHandler } from "./qwen-code"
			
 
				 export { RequestyHandler } from "./requesty"
			
 
				 export { SambaNovaHandler } from "./sambanova"
			
@@ -47,3 +48,4 @@ export { VercelAiGatewayHandler } from "./vercel-ai-gateway"
 
				 export { DeepInfraHandler } from "./deepinfra"
			
 
				 export { MiniMaxHandler } from "./minimax"
			
 
				 export { BasetenHandler } from "./baseten"
			
 
				+export { CorethinkHandler } from "./corethink"
			
--- a/src/api/providers/kilocode-openrouter.ts
+++ b/src/api/providers/kilocode-openrouter.ts
@@ -16,12 +16,13 @@ import {
 
				 	X_KILOCODE_MODE,
			
 
				 	X_KILOCODE_TESTER,
			
 
				 	X_KILOCODE_EDITORNAME,
			
 
				+	X_KILOCODE_MACHINEID,
			
 
				 } from "../../shared/kilocode/headers"
			
 
				-import { KILOCODE_TOKEN_REQUIRED_ERROR } from "../../shared/kilocode/errorUtils"
			
 
				 import { DEFAULT_HEADERS } from "./constants"
			
 
				-import { streamSse } from "../../services/continuedev/core/fetch/stream"
			
 
				+import { streamSse } from "../../services/autocomplete/continuedev/core/fetch/stream"
			
 
				 import { getEditorNameHeader } from "../../core/kilocode/wrapper"
			
 
				 import type { FimHandler } from "./kilocode/FimHandler"
			
 
				+import * as vscode from "vscode"
			
 
				 
			
 
				 /**
			
 
				  * A custom OpenRouter handler that overrides the getModel function
			
@@ -60,6 +61,10 @@ export class KilocodeOpenrouterHandler extends OpenRouterHandler {
 
				 			[X_KILOCODE_EDITORNAME]: getEditorNameHeader(),
			
 
				 		}
			
 
				 
			
 
				+		if (vscode?.env?.isTelemetryEnabled && vscode.env.machineId) {
			
 
				+			headers[X_KILOCODE_MACHINEID] = vscode.env.machineId
			
 
				+		}
			
 
				+
			
 
				 		if (metadata?.mode) {
			
 
				 			headers[X_KILOCODE_MODE] = metadata.mode
			
 
				 		}
			
--- a/src/api/providers/mistral.ts
+++ b/src/api/providers/mistral.ts
@@ -20,7 +20,7 @@ import { handleProviderError } from "./utils/error-handler"
 
				 import { BaseProvider } from "./base-provider"
			
 
				 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
			
 
				 import { DEFAULT_HEADERS } from "./constants" // kilocode_change
			
 
				-import { streamSse } from "../../services/continuedev/core/fetch/stream" // kilocode_change
			
 
				+import { streamSse } from "../../services/autocomplete/continuedev/core/fetch/stream" // kilocode_change
			
 
				 import type { CompletionUsage } from "./openrouter" // kilocode_change
			
 
				 import type { FimHandler } from "./kilocode/FimHandler" // kilocode_change
			
 
				 
			
--- a/src/api/providers/moonshot.ts
+++ b/src/api/providers/moonshot.ts
@@ -7,6 +7,22 @@ import { getModelParams } from "../transform/model-params"
 
				 
			
 
				 import { OpenAICompatibleHandler, OpenAICompatibleConfig } from "./openai-compatible"
			
 
				 
			
 
				+// kilocode_change start
			
 
				+const STRICT_KIMI_TEMPERATURES = {
			
 
				+	"kimi-k2.5": {
			
 
				+		thinkingEnabled: 1.0,
			
 
				+		thinkingDisabled: moonshotModels["kimi-k2.5"].defaultTemperature ?? 0.6,
			
 
				+	},
			
 
				+	"kimi-for-coding": {
			
 
				+		thinkingEnabled: 1.0,
			
 
				+		thinkingDisabled: moonshotModels["kimi-for-coding"].defaultTemperature ?? 0.6,
			
 
				+	},
			
 
				+} as const
			
 
				+
			
 
				+type StrictKimiModelId = keyof typeof STRICT_KIMI_TEMPERATURES
			
 
				+const STRICT_KIMI_MODELS = new Set(Object.keys(STRICT_KIMI_TEMPERATURES))
			
 
				+// kilocode_change end
			
 
				+
			
 
				 export class MoonshotHandler extends OpenAICompatibleHandler {
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				 		const modelId = options.apiModelId ?? moonshotDefaultModelId
			
@@ -67,4 +83,73 @@ export class MoonshotHandler extends OpenAICompatibleHandler {
 
				 		// Moonshot always requires max_tokens
			
 
				 		return this.options.modelMaxTokens || modelInfo.maxTokens || undefined
			
 
				 	}
			
 
				+
			
 
				+	// kilocode_change start
			
 
				+	private isStrictKimiModel(modelId: string): boolean {
			
 
				+		return STRICT_KIMI_MODELS.has(modelId)
			
 
				+	}
			
 
				+
			
 
				+	private getStrictKimiTemperatureConfig(modelId: string) {
			
 
				+		if (!this.isStrictKimiModel(modelId)) {
			
 
				+			return undefined
			
 
				+		}
			
 
				+
			
 
				+		return STRICT_KIMI_TEMPERATURES[modelId as StrictKimiModelId]
			
 
				+	}
			
 
				+
			
 
				+	private isStrictKimiThinkingEnabled(): boolean {
			
 
				+		return this.options.enableReasoningEffort !== false
			
 
				+	}
			
 
				+
			
 
				+	protected override getRequestTemperature(model: { id: string; temperature?: number }): number | undefined {
			
 
				+		const strictTemperatureConfig = this.getStrictKimiTemperatureConfig(model.id)
			
 
				+		if (strictTemperatureConfig) {
			
 
				+			return this.isStrictKimiThinkingEnabled()
			
 
				+				? strictTemperatureConfig.thinkingEnabled
			
 
				+				: strictTemperatureConfig.thinkingDisabled
			
 
				+		}
			
 
				+
			
 
				+		return super.getRequestTemperature(model)
			
 
				+	}
			
 
				+
			
 
				+	protected override getProviderOptions(
			
 
				+		model: { id: string; info: ModelInfo },
			
 
				+		metadata?: Parameters<OpenAICompatibleHandler["getProviderOptions"]>[1],
			
 
				+	): ReturnType<OpenAICompatibleHandler["getProviderOptions"]> {
			
 
				+		const inheritedProviderOptions = super.getProviderOptions(model, metadata)
			
 
				+		const existingMoonshotOptions =
			
 
				+			inheritedProviderOptions?.moonshot &&
			
 
				+			typeof inheritedProviderOptions.moonshot === "object" &&
			
 
				+			!Array.isArray(inheritedProviderOptions.moonshot)
			
 
				+				? inheritedProviderOptions.moonshot
			
 
				+				: {}
			
 
				+		const moonshotOptions = {
			
 
				+			...existingMoonshotOptions,
			
 
				+			...(metadata?.taskId ? { prompt_cache_key: metadata.taskId } : {}),
			
 
				+		}
			
 
				+
			
 
				+		if (!this.isStrictKimiModel(model.id)) {
			
 
				+			if (Object.keys(moonshotOptions).length === 0) {
			
 
				+				return inheritedProviderOptions
			
 
				+			}
			
 
				+
			
 
				+			return {
			
 
				+				...inheritedProviderOptions,
			
 
				+				moonshot: moonshotOptions,
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		const thinking = {
			
 
				+			type: (this.isStrictKimiThinkingEnabled() ? "enabled" : "disabled") as "enabled" | "disabled",
			
 
				+		}
			
 
				+
			
 
				+		return {
			
 
				+			...inheritedProviderOptions,
			
 
				+			moonshot: {
			
 
				+				...moonshotOptions,
			
 
				+				thinking,
			
 
				+			},
			
 
				+		}
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				 }
			
--- a/src/api/providers/nano-gpt.ts
+++ b/src/api/providers/nano-gpt.ts
@@ -98,6 +98,19 @@ export class NanoGptHandler extends BaseProvider implements SingleCompletionHand
 
				 				}
			
 
				 			}
			
 
				 
			
 
				+			// Handle native reasoning fields (reasoning_content or reasoning)
			
 
				+			if (delta) {
			
 
				+				for (const key of ["reasoning_content", "reasoning"] as const) {
			
 
				+					if (key in delta) {
			
 
				+						const reasoning_content = ((delta as any)[key] as string | undefined) || ""
			
 
				+						if (reasoning_content?.trim()) {
			
 
				+							yield { type: "reasoning", text: reasoning_content }
			
 
				+						}
			
 
				+						break
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				 			// Handle native tool calls
			
 
				 			if (delta?.tool_calls) {
			
 
				 				for (const toolCall of delta.tool_calls) {
			
--- a/src/api/providers/openai-compatible.ts
+++ b/src/api/providers/openai-compatible.ts
@@ -13,12 +13,14 @@ import type { ModelInfo } from "@roo-code/types"
 
				 import type { ApiHandlerOptions } from "../../shared/api"
			
 
				 
			
 
				 import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart } from "../transform/ai-sdk"
			
 
				-import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
			
 
				+import { ApiStream, ApiStreamChunk, ApiStreamUsageChunk } from "../transform/stream"
			
 
				 
			
 
				 import { DEFAULT_HEADERS } from "./constants"
			
 
				 import { BaseProvider } from "./base-provider"
			
 
				 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
			
 
				 
			
 
				+type StreamTextProviderOptions = Parameters<typeof streamText>[0]["providerOptions"]
			
 
				+
			
 
				 /**
			
 
				  * Configuration options for creating an OpenAI-compatible provider.
			
 
				  */
			
@@ -147,6 +149,27 @@ export abstract class OpenAICompatibleHandler extends BaseProvider implements Si
 
				 		return maxTokens ?? undefined
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	/**
			
 
				+	 * Get the temperature to use for a request.
			
 
				+	 * Subclasses can override this to enforce provider/model-specific behavior.
			
 
				+	 */
			
 
				+	protected getRequestTemperature(model: { temperature?: number }): number | undefined {
			
 
				+		return model.temperature ?? this.config.temperature ?? 0
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+	 * Get provider-specific AI SDK options.
			
 
				+	 * Subclasses can override this to pass provider-specific request fields.
			
 
				+	 */
			
 
				+	protected getProviderOptions(
			
 
				+		_model: { id: string; info: ModelInfo },
			
 
				+		_metadata?: ApiHandlerCreateMessageMetadata,
			
 
				+	): StreamTextProviderOptions {
			
 
				+		return undefined
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	/**
			
 
				 	 * Create a message stream using the AI SDK.
			
 
				 	 */
			
@@ -170,23 +193,97 @@ export abstract class OpenAICompatibleHandler extends BaseProvider implements Si
 
				 			model: languageModel,
			
 
				 			system: systemPrompt,
			
 
				 			messages: aiSdkMessages,
			
 
				-			temperature: model.temperature ?? this.config.temperature ?? 0,
			
 
				+			temperature: this.getRequestTemperature(model),
			
 
				 			maxOutputTokens: this.getMaxOutputTokens(),
			
 
				 			tools: aiSdkTools,
			
 
				 			toolChoice: this.mapToolChoice(metadata?.tool_choice),
			
 
				+			// kilocode_change
			
 
				+			providerOptions: this.getProviderOptions(model, metadata),
			
 
				 		}
			
 
				 
			
 
				 		// Use streamText for streaming responses
			
 
				 		const result = streamText(requestOptions)
			
 
				 
			
 
				+		// kilocode_change start
			
 
				+		// Moonshot/Kimi can stream tool calls as tool-input-* events without a final tool-call event.
			
 
				+		// Accumulate these events and emit a complete tool_call chunk so Task can execute tools reliably.
			
 
				+		const pendingToolInputs = new Map<string, { toolName: string; input: string }>()
			
 
				+		const emittedToolCallIds = new Set<string>()
			
 
				+
			
 
				+		const emitToolCallFromPendingInput = (toolCallId: string): ApiStreamChunk | undefined => {
			
 
				+			if (emittedToolCallIds.has(toolCallId)) {
			
 
				+				pendingToolInputs.delete(toolCallId)
			
 
				+				return undefined
			
 
				+			}
			
 
				+
			
 
				+			const pending = pendingToolInputs.get(toolCallId)
			
 
				+			pendingToolInputs.delete(toolCallId)
			
 
				+
			
 
				+			emittedToolCallIds.add(toolCallId)
			
 
				+
			
 
				+			return {
			
 
				+				type: "tool_call",
			
 
				+				id: toolCallId,
			
 
				+				name: pending?.toolName || "unknown_tool",
			
 
				+				arguments: pending?.input || "{}",
			
 
				+			}
			
 
				+		}
			
 
				+		// kilocode_change end
			
 
				+
			
 
				 		// Process the full stream to get all events
			
 
				 		for await (const part of result.fullStream) {
			
 
				+			// kilocode_change start
			
 
				+			if (part.type === "tool-input-start") {
			
 
				+				const existing = pendingToolInputs.get(part.id)
			
 
				+				pendingToolInputs.set(part.id, {
			
 
				+					toolName: part.toolName || existing?.toolName || "unknown_tool",
			
 
				+					input: existing?.input || "",
			
 
				+				})
			
 
				+				continue
			
 
				+			}
			
 
				+
			
 
				+			if (part.type === "tool-input-delta") {
			
 
				+				const existing = pendingToolInputs.get(part.id)
			
 
				+				pendingToolInputs.set(part.id, {
			
 
				+					toolName: existing?.toolName || "unknown_tool",
			
 
				+					input: (existing?.input || "") + part.delta,
			
 
				+				})
			
 
				+				continue
			
 
				+			}
			
 
				+
			
 
				+			if (part.type === "tool-input-end") {
			
 
				+				const toolCallChunk = emitToolCallFromPendingInput(part.id)
			
 
				+				if (toolCallChunk) {
			
 
				+					yield toolCallChunk
			
 
				+				}
			
 
				+				continue
			
 
				+			}
			
 
				+
			
 
				+			if (part.type === "tool-call") {
			
 
				+				if (emittedToolCallIds.has(part.toolCallId)) {
			
 
				+					continue
			
 
				+				}
			
 
				+				emittedToolCallIds.add(part.toolCallId)
			
 
				+				pendingToolInputs.delete(part.toolCallId)
			
 
				+			}
			
 
				+			// kilocode_change end
			
 
				+
			
 
				 			// Use the processAiSdkStreamPart utility to convert stream parts
			
 
				 			for (const chunk of processAiSdkStreamPart(part)) {
			
 
				 				yield chunk
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				+		// kilocode_change start
			
 
				+		// Flush any unfinished tool-input streams at end-of-stream.
			
 
				+		for (const toolCallId of pendingToolInputs.keys()) {
			
 
				+			const toolCallChunk = emitToolCallFromPendingInput(toolCallId)
			
 
				+			if (toolCallChunk) {
			
 
				+				yield toolCallChunk
			
 
				+			}
			
 
				+		}
			
 
				+		// kilocode_change end
			
 
				+
			
 
				 		// Yield usage metrics at the end
			
 
				 		const usage = await result.usage
			
 
				 		if (usage) {
			
@@ -199,12 +296,15 @@ export abstract class OpenAICompatibleHandler extends BaseProvider implements Si
 
				 	 */
			
 
				 	async completePrompt(prompt: string): Promise<string> {
			
 
				 		const languageModel = this.getLanguageModel()
			
 
				+		const model = this.getModel()
			
 
				 
			
 
				 		const { text } = await generateText({
			
 
				 			model: languageModel,
			
 
				 			prompt,
			
 
				 			maxOutputTokens: this.getMaxOutputTokens(),
			
 
				-			temperature: this.config.temperature ?? 0,
			
 
				+			temperature: this.getRequestTemperature(model),
			
 
				+			// kilocode_change
			
 
				+			providerOptions: this.getProviderOptions(model),
			
 
				 		})
			
 
				 
			
 
				 		return text
			
--- a/src/api/providers/zai.ts
+++ b/src/api/providers/zai.ts
@@ -39,10 +39,11 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 
				 		})
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				 	/**
			
 
				-	 * Override createStream to handle GLM-4.7's thinking mode.
			
 
				-	 * GLM-4.7 has thinking enabled by default in the API, so we need to
			
 
				-	 * explicitly send { type: "disabled" } when the user turns off reasoning.
			
 
				+	 * Override createStream to handle Z.ai models with thinking mode.
			
 
				+	 * Thinking-capable models have reasoning enabled by default in the API,
			
 
				+	 * so we explicitly send { type: "disabled" } when users turn reasoning off.
			
 
				 	 */
			
 
				 	protected override createStream(
			
 
				 		systemPrompt: string,
			
@@ -50,13 +51,13 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 
				 		metadata?: ApiHandlerCreateMessageMetadata,
			
 
				 		requestOptions?: OpenAI.RequestOptions,
			
 
				 	) {
			
 
				-		const { id: modelId, info } = this.getModel()
			
 
				+		const { info } = this.getModel()
			
 
				 
			
 
				-		// Check if this is a GLM-4.7 model with thinking support
			
 
				-		const isThinkingModel = modelId === "glm-4.7" && Array.isArray(info.supportsReasoningEffort)
			
 
				+		// Thinking models advertise explicit reasoning effort support.
			
 
				+		const isThinkingModel = Array.isArray(info.supportsReasoningEffort)
			
 
				 
			
 
				 		if (isThinkingModel) {
			
 
				-			// For GLM-4.7, thinking is ON by default in the API.
			
 
				+			// For thinking-enabled models, thinking is ON by default in the API.
			
 
				 			// We need to explicitly disable it when reasoning is off.
			
 
				 			const useReasoning = shouldUseReasoningEffort({ model: info, settings: this.options })
			
 
				 
			
@@ -67,9 +68,11 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 
				 		// For non-thinking models, use the default behavior
			
 
				 		return super.createStream(systemPrompt, messages, metadata, requestOptions)
			
 
				 	}
			
 
				+	// kilocode_change end
			
 
				 
			
 
				+	// kilocode_change start
			
 
				 	/**
			
 
				-	 * Creates a stream with explicit thinking control for GLM-4.7
			
 
				+	 * Creates a stream with explicit thinking control for Z.ai thinking models.
			
 
				 	 */
			
 
				 	private createStreamWithThinking(
			
 
				 		systemPrompt: string,
			
@@ -99,7 +102,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 
				 			messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
			
 
				 			stream: true,
			
 
				 			stream_options: { include_usage: true },
			
 
				-			// For GLM-4.7: thinking is ON by default, so we explicitly disable when needed
			
 
				+			// Thinking is ON by default, so we explicitly disable when needed.
			
 
				 			thinking: useReasoning ? { type: "enabled" } : { type: "disabled" },
			
 
				 			...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }),
			
 
				 			...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }),
			
@@ -110,4 +113,5 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 
				 
			
 
				 		return this.client.chat.completions.create(params)
			
 
				 	}
			
 
				+	// kilocode_change end
			
 
				 }
			
--- a/src/api/providers/zenmux.ts
+++ b/src/api/providers/zenmux.ts
@@ -0,0 +1,501 @@
 
				+// kilocode_change - new file
			
 
				+import OpenAI from "openai"
			
 
				+import type Anthropic from "@anthropic-ai/sdk"
			
 
				+import type { ModelInfo } from "@roo-code/types"
			
 
				+import { zenmuxDefaultModelId, zenmuxDefaultModelInfo } from "@roo-code/types"
			
 
				+import { ApiProviderError } from "@roo-code/types"
			
 
				+import { TelemetryService } from "@roo-code/telemetry"
			
 
				+
			
 
				+import { ApiHandlerOptions, ModelRecord } from "../../shared/api"
			
 
				+
			
 
				+import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transform/caching/anthropic"
			
 
				+import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini"
			
 
				+import type { OpenRouterReasoningParams } from "../transform/reasoning"
			
 
				+import { getModelParams } from "../transform/model-params"
			
 
				+
			
 
				+import { getModels } from "./fetchers/modelCache"
			
 
				+
			
 
				+import { DEFAULT_HEADERS } from "./constants"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				+import { verifyFinishReason } from "./kilocode/verifyFinishReason"
			
 
				+
			
 
				+import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from "../index"
			
 
				+import { ChatCompletionTool } from "openai/resources"
			
 
				+import { convertToOpenAiMessages } from "../transform/openai-format"
			
 
				+import { convertToR1Format } from "../transform/r1-format"
			
 
				+import { resolveToolProtocol } from "../../utils/resolveToolProtocol"
			
 
				+import { TOOL_PROTOCOL } from "@roo-code/types"
			
 
				+import { ApiStreamChunk } from "../transform/stream"
			
 
				+import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCallParser"
			
 
				+import { KiloCodeChunkSchema } from "./kilocode/chunk-schema"
			
 
				+
			
 
				+// ZenMux provider parameters
			
 
				+type ZenMuxProviderParams = {
			
 
				+	order?: string[]
			
 
				+	only?: string[]
			
 
				+	allow_fallbacks?: boolean
			
 
				+	data_collection?: "allow" | "deny"
			
 
				+	sort?: "price" | "throughput" | "latency"
			
 
				+	zdr?: boolean
			
 
				+}
			
 
				+
			
 
				+// ZenMux-specific response types
			
 
				+type ZenMuxChatCompletionParams = Omit<OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming, "model"> & {
			
 
				+	model: string
			
 
				+	provider?: ZenMuxProviderParams
			
 
				+	reasoning?: OpenRouterReasoningParams
			
 
				+}
			
 
				+
			
 
				+// ZenMux error structure
			
 
				+interface ZenMuxErrorResponse {
			
 
				+	message?: string
			
 
				+	code?: number
			
 
				+	metadata?: { raw?: string }
			
 
				+}
			
 
				+
			
 
				+// Usage interface for cost calculation
			
 
				+interface CompletionUsage {
			
 
				+	completion_tokens?: number
			
 
				+	completion_tokens_details?: {
			
 
				+		reasoning_tokens?: number
			
 
				+	}
			
 
				+	prompt_tokens?: number
			
 
				+	prompt_tokens_details?: {
			
 
				+		cached_tokens?: number
			
 
				+	}
			
 
				+	total_tokens?: number
			
 
				+	cost?: number
			
 
				+	cost_details?: {
			
 
				+		upstream_inference_cost?: number
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3
			
 
				+
			
 
				+export class ZenMuxHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				+	private client: OpenAI
			
 
				+	protected models: ModelRecord = {}
			
 
				+	protected endpoints: ModelRecord = {}
			
 
				+	lastGenerationId?: string
			
 
				+
			
 
				+	protected get providerName(): "ZenMux" {
			
 
				+		return "ZenMux" as const
			
 
				+	}
			
 
				+
			
 
				+	private currentReasoningDetails: any[] = []
			
 
				+
			
 
				+	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				+		this.options = options
			
 
				+
			
 
				+		const baseURL = this.options.zenmuxBaseUrl || "https://zenmux.ai/api/v1"
			
 
				+		const apiKey = this.options.zenmuxApiKey ?? "not-provided"
			
 
				+
			
 
				+		this.client = new OpenAI({
			
 
				+			baseURL: baseURL,
			
 
				+			apiKey: apiKey,
			
 
				+			defaultHeaders: DEFAULT_HEADERS,
			
 
				+		})
			
 
				+
			
 
				+		// Load models asynchronously to populate cache before getModel() is called
			
 
				+		this.loadDynamicModels().catch((error) => {
			
 
				+			console.error("[ZenMuxHandler] Failed to load dynamic models:", error)
			
 
				+		})
			
 
				+	}
			
 
				+
			
 
				+	private async loadDynamicModels(): Promise<void> {
			
 
				+		try {
			
 
				+			const models = await getModels({ provider: "zenmux" })
			
 
				+			this.models = models
			
 
				+		} catch (error) {
			
 
				+			console.error("[ZenMuxHandler] Error loading dynamic models:", {
			
 
				+				error: error instanceof Error ? error.message : String(error),
			
 
				+				stack: error instanceof Error ? error.stack : undefined,
			
 
				+			})
			
 
				+		}
			
 
				+	}
			
 
				+	async createZenMuxStream(
			
 
				+		client: OpenAI,
			
 
				+		systemPrompt: string,
			
 
				+		messages: Anthropic.Messages.MessageParam[],
			
 
				+		model: { id: string; info: ModelInfo },
			
 
				+		_reasoningEffort?: string,
			
 
				+		thinkingBudgetTokens?: number,
			
 
				+		zenMuxProviderSorting?: string,
			
 
				+		tools?: Array<ChatCompletionTool>,
			
 
				+		_geminiThinkingLevel?: string,
			
 
				+	) {
			
 
				+		// Convert Anthropic messages to OpenAI format
			
 
				+		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
			
 
				+			{ role: "system", content: systemPrompt },
			
 
				+			...convertToOpenAiMessages(messages),
			
 
				+		]
			
 
				+
			
 
				+		// Build reasoning config if thinking budget is set
			
 
				+		let reasoning: { max_tokens: number } | undefined
			
 
				+		if (thinkingBudgetTokens && thinkingBudgetTokens > 0) {
			
 
				+			reasoning = { max_tokens: thinkingBudgetTokens }
			
 
				+		}
			
 
				+
			
 
				+		// @ts-ignore-next-line
			
 
				+		const stream = await client.chat.completions.create({
			
 
				+			model: model.id,
			
 
				+			messages: openAiMessages,
			
 
				+			stream: true,
			
 
				+			stream_options: { include_usage: true },
			
 
				+			...(reasoning ? { reasoning } : {}),
			
 
				+			...(zenMuxProviderSorting && zenMuxProviderSorting !== ""
			
 
				+				? {
			
 
				+						provider: {
			
 
				+							routing: {
			
 
				+								type: "priority",
			
 
				+								primary_factor: zenMuxProviderSorting,
			
 
				+							},
			
 
				+						},
			
 
				+					}
			
 
				+				: {}),
			
 
				+			...this.getOpenAIToolParams(tools),
			
 
				+		})
			
 
				+
			
 
				+		return stream
			
 
				+	}
			
 
				+	getOpenAIToolParams(tools?: ChatCompletionTool[], enableParallelToolCalls: boolean = false) {
			
 
				+		return tools?.length
			
 
				+			? {
			
 
				+					tools,
			
 
				+					tool_choice: tools ? "auto" : undefined,
			
 
				+					parallel_tool_calls: enableParallelToolCalls ? true : false,
			
 
				+				}
			
 
				+			: {
			
 
				+					tools: undefined,
			
 
				+				}
			
 
				+	}
			
 
				+
			
 
				+	getTotalCost(lastUsage: CompletionUsage): number {
			
 
				+		return (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0)
			
 
				+	}
			
 
				+
			
 
				+	private handleStreamingError(error: ZenMuxErrorResponse, modelId: string, operation: string): never {
			
 
				+		const rawErrorMessage = error?.metadata?.raw || error?.message
			
 
				+
			
 
				+		const apiError = Object.assign(
			
 
				+			new ApiProviderError(
			
 
				+				rawErrorMessage ?? "Unknown error",
			
 
				+				this.providerName,
			
 
				+				modelId,
			
 
				+				operation,
			
 
				+				error?.code,
			
 
				+			),
			
 
				+			{ status: error?.code, error: { message: error?.message, metadata: error?.metadata } },
			
 
				+		)
			
 
				+
			
 
				+		TelemetryService.instance.captureException(apiError)
			
 
				+
			
 
				+		throw new Error(`ZenMux API Error ${error?.code}: ${rawErrorMessage}`)
			
 
				+	}
			
 
				+	async *createMessage(
			
 
				+		systemPrompt: string,
			
 
				+		messages: Anthropic.Messages.MessageParam[],
			
 
				+		metadata?: ApiHandlerCreateMessageMetadata,
			
 
				+	): AsyncGenerator<ApiStreamChunk> {
			
 
				+		this.lastGenerationId = undefined
			
 
				+		const model = await this.fetchModel()
			
 
				+
			
 
				+		let { id: modelId } = model
			
 
				+
			
 
				+		// Reset reasoning_details accumulator for this request
			
 
				+		this.currentReasoningDetails = []
			
 
				+
			
 
				+		// Convert Anthropic messages to OpenAI format.
			
 
				+		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
			
 
				+			{ role: "system", content: systemPrompt },
			
 
				+			...convertToOpenAiMessages(messages),
			
 
				+		]
			
 
				+
			
 
				+		// DeepSeek highly recommends using user instead of system role.
			
 
				+		if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") {
			
 
				+			openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
			
 
				+		}
			
 
				+
			
 
				+		// Process reasoning_details when switching models to Gemini for native tool call compatibility
			
 
				+		const toolProtocol = resolveToolProtocol(this.options, model.info)
			
 
				+		const isNativeProtocol = toolProtocol === TOOL_PROTOCOL.NATIVE
			
 
				+		const isGemini = modelId.startsWith("google/gemini")
			
 
				+
			
 
				+		// For Gemini with native protocol: inject fake reasoning.encrypted blocks for tool calls
			
 
				+		// This is required when switching from other models to Gemini to satisfy API validation
			
 
				+		if (isNativeProtocol && isGemini) {
			
 
				+			openAiMessages = openAiMessages.map((msg) => {
			
 
				+				if (msg.role === "assistant") {
			
 
				+					const toolCalls = (msg as any).tool_calls as any[] | undefined
			
 
				+					const existingDetails = (msg as any).reasoning_details as any[] | undefined
			
 
				+
			
 
				+					// Only inject if there are tool calls and no existing encrypted reasoning
			
 
				+					if (toolCalls && toolCalls.length > 0) {
			
 
				+						const hasEncrypted = existingDetails?.some((d) => d.type === "reasoning.encrypted") ?? false
			
 
				+
			
 
				+						if (!hasEncrypted) {
			
 
				+							const fakeEncrypted = toolCalls.map((tc, idx) => ({
			
 
				+								id: tc.id,
			
 
				+								type: "reasoning.encrypted",
			
 
				+								data: "skip_thought_signature_validator",
			
 
				+								format: "google-gemini-v1",
			
 
				+								index: (existingDetails?.length ?? 0) + idx,
			
 
				+							}))
			
 
				+
			
 
				+							return {
			
 
				+								...msg,
			
 
				+								reasoning_details: [...(existingDetails ?? []), ...fakeEncrypted],
			
 
				+							}
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+				return msg
			
 
				+			})
			
 
				+		}
			
 
				+
			
 
				+		// Add cache breakpoints for supported models
			
 
				+		if (modelId.startsWith("anthropic/claude") || modelId.startsWith("google/gemini")) {
			
 
				+			if (modelId.startsWith("google")) {
			
 
				+				addGeminiCacheBreakpoints(systemPrompt, openAiMessages)
			
 
				+			} else {
			
 
				+				addAnthropicCacheBreakpoints(systemPrompt, openAiMessages)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		let stream
			
 
				+		try {
			
 
				+			stream = await this.createZenMuxStream(
			
 
				+				this.client,
			
 
				+				systemPrompt,
			
 
				+				messages,
			
 
				+				model,
			
 
				+				this.options.reasoningEffort,
			
 
				+				this.options.modelMaxThinkingTokens,
			
 
				+				this.options.zenmuxProviderSort,
			
 
				+				metadata?.tools,
			
 
				+			)
			
 
				+		} catch (error) {
			
 
				+			const errorMessage = error instanceof Error ? error.message : String(error)
			
 
				+			const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage")
			
 
				+			TelemetryService.instance.captureException(apiError)
			
 
				+			throw error
			
 
				+		}
			
 
				+
			
 
				+		let lastUsage: CompletionUsage | undefined = undefined
			
 
				+		let inferenceProvider: string | undefined
			
 
				+		// Accumulator for reasoning_details: accumulate text by type-index key
			
 
				+		const reasoningDetailsAccumulator = new Map<
			
 
				+			string,
			
 
				+			{
			
 
				+				type: string
			
 
				+				text?: string
			
 
				+				summary?: string
			
 
				+				data?: string
			
 
				+				id?: string | null
			
 
				+				format?: string
			
 
				+				signature?: string
			
 
				+				index: number
			
 
				+			}
			
 
				+		>()
			
 
				+
			
 
				+		for await (const chunk of stream) {
			
 
				+			// Handle ZenMux streaming error response
			
 
				+			if ("error" in chunk) {
			
 
				+				this.handleStreamingError(chunk.error as ZenMuxErrorResponse, modelId, "createMessage")
			
 
				+			}
			
 
				+
			
 
				+			const kiloCodeChunk = KiloCodeChunkSchema.safeParse(chunk).data
			
 
				+			inferenceProvider =
			
 
				+				kiloCodeChunk?.choices?.[0]?.delta?.provider_metadata?.gateway?.routing?.resolvedProvider ??
			
 
				+				kiloCodeChunk?.provider ??
			
 
				+				inferenceProvider
			
 
				+
			
 
				+			verifyFinishReason(chunk.choices[0])
			
 
				+			const delta = chunk.choices[0]?.delta
			
 
				+			const finishReason = chunk.choices[0]?.finish_reason
			
 
				+
			
 
				+			if (delta) {
			
 
				+				// Handle reasoning_details array format
			
 
				+				const deltaWithReasoning = delta as typeof delta & {
			
 
				+					reasoning_details?: Array<{
			
 
				+						type: string
			
 
				+						text?: string
			
 
				+						summary?: string
			
 
				+						data?: string
			
 
				+						id?: string | null
			
 
				+						format?: string
			
 
				+						signature?: string
			
 
				+						index?: number
			
 
				+					}>
			
 
				+				}
			
 
				+
			
 
				+				if (deltaWithReasoning.reasoning_details && Array.isArray(deltaWithReasoning.reasoning_details)) {
			
 
				+					for (const detail of deltaWithReasoning.reasoning_details) {
			
 
				+						const index = detail.index ?? 0
			
 
				+						const key = `${detail.type}-${index}`
			
 
				+						const existing = reasoningDetailsAccumulator.get(key)
			
 
				+
			
 
				+						if (existing) {
			
 
				+							// Accumulate text/summary/data for existing reasoning detail
			
 
				+							if (detail.text !== undefined) {
			
 
				+								existing.text = (existing.text || "") + detail.text
			
 
				+							}
			
 
				+							if (detail.summary !== undefined) {
			
 
				+								existing.summary = (existing.summary || "") + detail.summary
			
 
				+							}
			
 
				+							if (detail.data !== undefined) {
			
 
				+								existing.data = (existing.data || "") + detail.data
			
 
				+							}
			
 
				+							// Update other fields if provided
			
 
				+							if (detail.id !== undefined) existing.id = detail.id
			
 
				+							if (detail.format !== undefined) existing.format = detail.format
			
 
				+							if (detail.signature !== undefined) existing.signature = detail.signature
			
 
				+						} else {
			
 
				+							// Start new reasoning detail accumulation
			
 
				+							reasoningDetailsAccumulator.set(key, {
			
 
				+								type: detail.type,
			
 
				+								text: detail.text,
			
 
				+								summary: detail.summary,
			
 
				+								data: detail.data,
			
 
				+								id: detail.id,
			
 
				+								format: detail.format,
			
 
				+								signature: detail.signature,
			
 
				+								index,
			
 
				+							})
			
 
				+						}
			
 
				+
			
 
				+						// Yield text for display (still fragmented for live streaming)
			
 
				+						let reasoningText: string | undefined
			
 
				+						if (detail.type === "reasoning.text" && typeof detail.text === "string") {
			
 
				+							reasoningText = detail.text
			
 
				+						} else if (detail.type === "reasoning.summary" && typeof detail.summary === "string") {
			
 
				+							reasoningText = detail.summary
			
 
				+						}
			
 
				+
			
 
				+						if (reasoningText) {
			
 
				+							yield { type: "reasoning", text: reasoningText } as ApiStreamChunk
			
 
				+						}
			
 
				+					}
			
 
				+				} else if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
			
 
				+					// Handle legacy reasoning format
			
 
				+					yield { type: "reasoning", text: delta.reasoning } as ApiStreamChunk
			
 
				+				}
			
 
				+
			
 
				+				if (delta && "reasoning_content" in delta && typeof delta.reasoning_content === "string") {
			
 
				+					yield { type: "reasoning", text: delta.reasoning_content } as ApiStreamChunk
			
 
				+				}
			
 
				+
			
 
				+				// Check for tool calls in delta
			
 
				+				if ("tool_calls" in delta && Array.isArray(delta.tool_calls)) {
			
 
				+					for (const toolCall of delta.tool_calls) {
			
 
				+						yield {
			
 
				+							type: "tool_call_partial",
			
 
				+							index: toolCall.index,
			
 
				+							id: toolCall.id,
			
 
				+							name: toolCall.function?.name,
			
 
				+							arguments: toolCall.function?.arguments,
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				if (delta.content) {
			
 
				+					yield { type: "text", text: delta.content }
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			// Process finish_reason to emit tool_call_end events
			
 
				+			if (finishReason) {
			
 
				+				const endEvents = NativeToolCallParser.processFinishReason(finishReason)
			
 
				+				for (const event of endEvents) {
			
 
				+					yield event
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if (chunk.usage) {
			
 
				+				lastUsage = chunk.usage
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// After streaming completes, store the accumulated reasoning_details
			
 
				+		if (reasoningDetailsAccumulator.size > 0) {
			
 
				+			this.currentReasoningDetails = Array.from(reasoningDetailsAccumulator.values())
			
 
				+		}
			
 
				+
			
 
				+		if (lastUsage) {
			
 
				+			yield {
			
 
				+				type: "usage",
			
 
				+				inputTokens: lastUsage.prompt_tokens || 0,
			
 
				+				outputTokens: lastUsage.completion_tokens || 0,
			
 
				+				cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,
			
 
				+				reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens,
			
 
				+				totalCost: this.getTotalCost(lastUsage),
			
 
				+				inferenceProvider,
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	getReasoningDetails(): any[] | undefined {
			
 
				+		return this.currentReasoningDetails.length > 0 ? this.currentReasoningDetails : undefined
			
 
				+	}
			
 
				+	public async fetchModel() {
			
 
				+		const models = await getModels({ provider: "zenmux" })
			
 
				+		this.models = models
			
 
				+		return this.getModel()
			
 
				+	}
			
 
				+
			
 
				+	override getModel() {
			
 
				+		const id = this.options.zenmuxModelId ?? zenmuxDefaultModelId
			
 
				+		let info = this.models[id] ?? zenmuxDefaultModelInfo
			
 
				+
			
 
				+		const isDeepSeekR1 = id.startsWith("deepseek/deepseek-r1") || id === "perplexity/sonar-reasoning"
			
 
				+
			
 
				+		const params = getModelParams({
			
 
				+			format: "zenmux",
			
 
				+			modelId: id,
			
 
				+			model: info,
			
 
				+			settings: this.options,
			
 
				+			defaultTemperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0,
			
 
				+		})
			
 
				+
			
 
				+		return { id, info, topP: isDeepSeekR1 ? 0.95 : undefined, ...params }
			
 
				+	}
			
 
				+
			
 
				+	async completePrompt(prompt: string) {
			
 
				+		let { id: modelId, maxTokens, temperature, reasoning, verbosity } = await this.fetchModel()
			
 
				+
			
 
				+		// ZenMux `verbosity` supports "low" | "medium" | "high" (and sometimes null),
			
 
				+		// while our shared model params may include "max". Map "max" to the closest
			
 
				+		// supported value to satisfy the API/SDK typing.
			
 
				+		const zenMuxVerbosity: "low" | "medium" | "high" | null | undefined = verbosity === "max" ? "high" : verbosity
			
 
				+
			
 
				+		const completionParams: ZenMuxChatCompletionParams = {
			
 
				+			model: modelId,
			
 
				+			max_tokens: maxTokens,
			
 
				+			temperature,
			
 
				+			messages: [{ role: "user", content: prompt }],
			
 
				+			stream: false,
			
 
				+			...(reasoning && { reasoning }),
			
 
				+			verbosity: zenMuxVerbosity,
			
 
				+		}
			
 
				+
			
 
				+		let response
			
 
				+
			
 
				+		try {
			
 
				+			response = await this.client.chat.completions.create(completionParams)
			
 
				+		} catch (error) {
			
 
				+			const errorMessage = error instanceof Error ? error.message : String(error)
			
 
				+			const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "completePrompt")
			
 
				+			TelemetryService.instance.captureException(apiError)
			
 
				+			throw error
			
 
				+		}
			
 
				+
			
 
				+		if ("error" in response) {
			
 
				+			this.handleStreamingError(response.error as ZenMuxErrorResponse, modelId, "completePrompt")
			
 
				+		}
			
 
				+
			
 
				+		const completion = response as OpenAI.Chat.ChatCompletion
			
 
				+		return completion.choices[0]?.message?.content || ""
			
 
				+	}
			
 
				+}
			
--- a/src/api/transform/__tests__/ai-sdk.spec.ts
+++ b/src/api/transform/__tests__/ai-sdk.spec.ts
@@ -284,6 +284,139 @@ describe("AI SDK conversion utilities", () => {
 
				 			})
			
 
				 		})
			
 
				 
			
 
				+		// kilocode_change start
			
 
				+		it("preserves assistant text/tool-call/text ordering", () => {
			
 
				+			const messages: Anthropic.Messages.MessageParam[] = [
			
 
				+				{
			
 
				+					role: "assistant",
			
 
				+					content: [
			
 
				+						{ type: "text", text: "Before tool call" },
			
 
				+						{
			
 
				+							type: "tool_use",
			
 
				+							id: "call_789",
			
 
				+							name: "read_file",
			
 
				+							input: { path: "before.ts" },
			
 
				+						},
			
 
				+						{ type: "text", text: "After tool call" },
			
 
				+					],
			
 
				+				},
			
 
				+			]
			
 
				+
			
 
				+			const result = convertToAiSdkMessages(messages)
			
 
				+
			
 
				+			expect(result).toHaveLength(1)
			
 
				+			expect(result[0]).toEqual({
			
 
				+				role: "assistant",
			
 
				+				content: [
			
 
				+					{ type: "text", text: "Before tool call" },
			
 
				+					{
			
 
				+						type: "tool-call",
			
 
				+						toolCallId: "call_789",
			
 
				+						toolName: "read_file",
			
 
				+						input: { path: "before.ts" },
			
 
				+					},
			
 
				+					{ type: "text", text: "After tool call" },
			
 
				+				],
			
 
				+			})
			
 
				+		})
			
 
				+
			
 
				+		it("preserves user text before tool results", () => {
			
 
				+			const messages: Anthropic.Messages.MessageParam[] = [
			
 
				+				{
			
 
				+					role: "assistant",
			
 
				+					content: [
			
 
				+						{
			
 
				+							type: "tool_use",
			
 
				+							id: "call_999",
			
 
				+							name: "read_file",
			
 
				+							input: { path: "ordered.ts" },
			
 
				+						},
			
 
				+					],
			
 
				+				},
			
 
				+				{
			
 
				+					role: "user",
			
 
				+					content: [
			
 
				+						{ type: "text", text: "Context before tool result" },
			
 
				+						{
			
 
				+							type: "tool_result",
			
 
				+							tool_use_id: "call_999",
			
 
				+							content: "ordered-result",
			
 
				+						},
			
 
				+					],
			
 
				+				},
			
 
				+			]
			
 
				+
			
 
				+			const result = convertToAiSdkMessages(messages)
			
 
				+
			
 
				+			expect(result).toHaveLength(3)
			
 
				+			expect(result[0]).toEqual({
			
 
				+				role: "assistant",
			
 
				+				content: [
			
 
				+					{
			
 
				+						type: "tool-call",
			
 
				+						toolCallId: "call_999",
			
 
				+						toolName: "read_file",
			
 
				+						input: { path: "ordered.ts" },
			
 
				+					},
			
 
				+				],
			
 
				+			})
			
 
				+			expect(result[1]).toEqual({
			
 
				+				role: "user",
			
 
				+				content: [{ type: "text", text: "Context before tool result" }],
			
 
				+			})
			
 
				+			expect(result[2]).toEqual({
			
 
				+				role: "tool",
			
 
				+				content: [
			
 
				+					{
			
 
				+						type: "tool-result",
			
 
				+						toolCallId: "call_999",
			
 
				+						toolName: "read_file",
			
 
				+						output: { type: "text", value: "ordered-result" },
			
 
				+					},
			
 
				+				],
			
 
				+			})
			
 
				+		})
			
 
				+
			
 
				+		it("preserves assistant reasoning blocks via openaiCompatible metadata", () => {
			
 
				+			const messages = [
			
 
				+				{
			
 
				+					role: "assistant",
			
 
				+					content: [
			
 
				+						{ type: "reasoning", text: "Step 1 reasoning" },
			
 
				+						{ type: "text", text: "I will call a tool" },
			
 
				+						{
			
 
				+							type: "tool_use",
			
 
				+							id: "call_reasoning",
			
 
				+							name: "read_file",
			
 
				+							input: { path: "reasoning.ts" },
			
 
				+						},
			
 
				+					],
			
 
				+				},
			
 
				+			] as Anthropic.Messages.MessageParam[]
			
 
				+
			
 
				+			const result = convertToAiSdkMessages(messages as any)
			
 
				+
			
 
				+			expect(result).toHaveLength(1)
			
 
				+			expect(result[0]).toMatchObject({
			
 
				+				role: "assistant",
			
 
				+				content: [
			
 
				+					{ type: "text", text: "I will call a tool" },
			
 
				+					{
			
 
				+						type: "tool-call",
			
 
				+						toolCallId: "call_reasoning",
			
 
				+						toolName: "read_file",
			
 
				+						input: { path: "reasoning.ts" },
			
 
				+					},
			
 
				+				],
			
 
				+				providerOptions: {
			
 
				+					openaiCompatible: {
			
 
				+						reasoning_content: "Step 1 reasoning",
			
 
				+					},
			
 
				+				},
			
 
				+			})
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				+
			
 
				 		it("handles empty assistant content", () => {
			
 
				 			const messages: Anthropic.Messages.MessageParam[] = [
			
 
				 				{
			
--- a/src/api/transform/__tests__/model-params.spec.ts
+++ b/src/api/transform/__tests__/model-params.spec.ts
@@ -684,6 +684,42 @@ describe("getModelParams", () => {
 
				 		})
			
 
				 	})
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	describe("Adaptive thinking models", () => {
			
 
				+		it("should default to thinking temperature when adaptive thinking is enabled and unset", () => {
			
 
				+			const model: ModelInfo = {
			
 
				+				...baseModel,
			
 
				+				supportsAdaptiveThinking: true,
			
 
				+				defaultTemperature: 0.6,
			
 
				+			}
			
 
				+
			
 
				+			const result = getModelParams({
			
 
				+				...openaiParams,
			
 
				+				settings: {},
			
 
				+				model,
			
 
				+			})
			
 
				+
			
 
				+			expect(result.temperature).toBe(1.0)
			
 
				+		})
			
 
				+
			
 
				+		it("should use default non-thinking temperature when adaptive thinking is explicitly disabled", () => {
			
 
				+			const model: ModelInfo = {
			
 
				+				...baseModel,
			
 
				+				supportsAdaptiveThinking: true,
			
 
				+				defaultTemperature: 0.6,
			
 
				+			}
			
 
				+
			
 
				+			const result = getModelParams({
			
 
				+				...openaiParams,
			
 
				+				settings: { enableReasoningEffort: false },
			
 
				+				model,
			
 
				+			})
			
 
				+
			
 
				+			expect(result.temperature).toBe(0.6)
			
 
				+		})
			
 
				+	})
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	describe("Hybrid reasoning models (supportsReasoningEffort)", () => {
			
 
				 		const model: ModelInfo = {
			
 
				 			...baseModel,
			
--- a/src/api/transform/__tests__/reasoning.spec.ts
+++ b/src/api/transform/__tests__/reasoning.spec.ts
@@ -838,6 +838,65 @@ describe("reasoning.ts", () => {
 
				 			const result = getGeminiReasoning(options) as GeminiReasoningParams | undefined
			
 
				 			expect(result).toEqual({ thinkingLevel: "medium", includeThoughts: true })
			
 
				 		})
			
 
				+
			
 
				+		// kilocode_change start
			
 
				+		it("should return undefined for budget-only models when budget is not enabled (fixes issue #4490)", () => {
			
 
				+			// This test covers the bug where gemini-2.5-flash would fail with
			
 
				+			// "Thinking level is not supported for this model" because thinkingLevel
			
 
				+			// was being sent to a model that only supports thinkingBudget
			
 
				+			const geminiFlashModel: ModelInfo = {
			
 
				+				...baseModel,
			
 
				+				// gemini-2.5-flash supports budget but NOT effort-based reasoning
			
 
				+				supportsReasoningBudget: true,
			
 
				+				// Note: no supportsReasoningEffort, no requiredReasoningBudget
			
 
				+			}
			
 
				+
			
 
				+			const settings: ProviderSettings = {
			
 
				+				apiProvider: "gemini",
			
 
				+				// User may have a reasoningEffort set from a different model
			
 
				+				reasoningEffort: "high",
			
 
				+				// But enableReasoningEffort is not true, so budget won't be used
			
 
				+			}
			
 
				+
			
 
				+			const options: GetModelReasoningOptions = {
			
 
				+				model: geminiFlashModel,
			
 
				+				reasoningBudget: 4096,
			
 
				+				reasoningEffort: "high",
			
 
				+				settings,
			
 
				+			}
			
 
				+
			
 
				+			const result = getGeminiReasoning(options)
			
 
				+			// Should return undefined, NOT { thinkingLevel: "high", includeThoughts: true }
			
 
				+			// because this model doesn't support thinkingLevel
			
 
				+			expect(result).toBeUndefined()
			
 
				+		})
			
 
				+
			
 
				+		it("should return undefined for budget-only models even with explicit effort setting", () => {
			
 
				+			// Models like gemini-2.5-flash only support budget-based reasoning
			
 
				+			const budgetOnlyModel: ModelInfo = {
			
 
				+				...baseModel,
			
 
				+				supportsReasoningBudget: true,
			
 
				+				maxThinkingTokens: 24576,
			
 
				+				// Critically: no supportsReasoningEffort
			
 
				+			}
			
 
				+
			
 
				+			const settings: ProviderSettings = {
			
 
				+				apiProvider: "gemini",
			
 
				+				reasoningEffort: "medium",
			
 
				+			}
			
 
				+
			
 
				+			const options: GetModelReasoningOptions = {
			
 
				+				model: budgetOnlyModel,
			
 
				+				reasoningBudget: 8192,
			
 
				+				reasoningEffort: "medium",
			
 
				+				settings,
			
 
				+			}
			
 
				+
			
 
				+			const result = getGeminiReasoning(options)
			
 
				+			// Must not send thinkingLevel to a model that doesn't support it
			
 
				+			expect(result).toBeUndefined()
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				 	})
			
 
				 
			
 
				 	describe("Integration scenarios", () => {
			
--- a/src/api/transform/ai-sdk.ts
+++ b/src/api/transform/ai-sdk.ts
@@ -5,7 +5,13 @@
 
				 
			
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import OpenAI from "openai"
			
 
				-import { tool as createTool, jsonSchema, type ModelMessage, type TextStreamPart } from "ai"
			
 
				+import {
			
 
				+	tool as createTool,
			
 
				+	jsonSchema,
			
 
				+	type AssistantModelMessage,
			
 
				+	type ModelMessage,
			
 
				+	type TextStreamPart,
			
 
				+} from "ai"
			
 
				 import type { ApiStreamChunk } from "./stream"
			
 
				 
			
 
				 /**
			
@@ -38,6 +44,8 @@ export function convertToAiSdkMessages(messages: Anthropic.Messages.MessageParam
 
				 			})
			
 
				 		} else {
			
 
				 			if (message.role === "user") {
			
 
				+				// kilocode_change start
			
 
				+				// Keep user text/image parts and tool results in their original order.
			
 
				 				const parts: Array<
			
 
				 					{ type: "text"; text: string } | { type: "image"; image: string; mimeType?: string }
			
 
				 				> = []
			
@@ -48,10 +56,34 @@ export function convertToAiSdkMessages(messages: Anthropic.Messages.MessageParam
 
				 					output: { type: "text"; value: string }
			
 
				 				}> = []
			
 
				 
			
 
				+				const flushUserParts = () => {
			
 
				+					if (parts.length === 0) {
			
 
				+						return
			
 
				+					}
			
 
				+					modelMessages.push({
			
 
				+						role: "user",
			
 
				+						content: [...parts],
			
 
				+					} as ModelMessage)
			
 
				+					parts.length = 0
			
 
				+				}
			
 
				+
			
 
				+				const flushToolResults = () => {
			
 
				+					if (toolResults.length === 0) {
			
 
				+						return
			
 
				+					}
			
 
				+					modelMessages.push({
			
 
				+						role: "tool",
			
 
				+						content: [...toolResults],
			
 
				+					} as ModelMessage)
			
 
				+					toolResults.length = 0
			
 
				+				}
			
 
				+
			
 
				 				for (const part of message.content) {
			
 
				 					if (part.type === "text") {
			
 
				+						flushToolResults()
			
 
				 						parts.push({ type: "text", text: part.text })
			
 
				 					} else if (part.type === "image") {
			
 
				+						flushToolResults()
			
 
				 						// Handle both base64 and URL source types
			
 
				 						const source = part.source as { type: string; media_type?: string; data?: string; url?: string }
			
 
				 						if (source.type === "base64" && source.media_type && source.data) {
			
@@ -67,6 +99,7 @@ export function convertToAiSdkMessages(messages: Anthropic.Messages.MessageParam
 
				 							})
			
 
				 						}
			
 
				 					} else if (part.type === "tool_result") {
			
 
				+						flushUserParts()
			
 
				 						// Convert tool results to string content
			
 
				 						let content: string
			
 
				 						if (typeof part.content === "string") {
			
@@ -92,59 +125,69 @@ export function convertToAiSdkMessages(messages: Anthropic.Messages.MessageParam
 
				 					}
			
 
				 				}
			
 
				 
			
 
				-				// AI SDK requires tool results in separate "tool" role messages
			
 
				-				// UserContent only supports: string | Array<TextPart | ImagePart | FilePart>
			
 
				-				// ToolContent (for role: "tool") supports: Array<ToolResultPart | ToolApprovalResponse>
			
 
				-				if (toolResults.length > 0) {
			
 
				-					modelMessages.push({
			
 
				-						role: "tool",
			
 
				-						content: toolResults,
			
 
				-					} as ModelMessage)
			
 
				-				}
			
 
				-
			
 
				-				// Add user message with only text/image content (no tool results)
			
 
				-				if (parts.length > 0) {
			
 
				-					modelMessages.push({
			
 
				-						role: "user",
			
 
				-						content: parts,
			
 
				-					} as ModelMessage)
			
 
				-				}
			
 
				+				flushToolResults()
			
 
				+				flushUserParts()
			
 
				+				// kilocode_change end
			
 
				 			} else if (message.role === "assistant") {
			
 
				+				// kilocode_change start
			
 
				+				// Keep assistant text and tool calls in original order.
			
 
				 				const textParts: string[] = []
			
 
				-				const toolCalls: Array<{
			
 
				-					type: "tool-call"
			
 
				-					toolCallId: string
			
 
				-					toolName: string
			
 
				-					input: unknown
			
 
				-				}> = []
			
 
				+				const content: Array<
			
 
				+					| { type: "text"; text: string }
			
 
				+					| { type: "tool-call"; toolCallId: string; toolName: string; input: unknown }
			
 
				+				> = []
			
 
				+				const reasoningParts: string[] = []
			
 
				+
			
 
				+				const flushText = () => {
			
 
				+					if (textParts.length === 0) {
			
 
				+						return
			
 
				+					}
			
 
				+					content.push({ type: "text", text: textParts.join("\n") })
			
 
				+					textParts.length = 0
			
 
				+				}
			
 
				 
			
 
				 				for (const part of message.content) {
			
 
				 					if (part.type === "text") {
			
 
				 						textParts.push(part.text)
			
 
				 					} else if (part.type === "tool_use") {
			
 
				-						toolCalls.push({
			
 
				+						flushText()
			
 
				+						const toolCall = {
			
 
				 							type: "tool-call",
			
 
				 							toolCallId: part.id,
			
 
				 							toolName: part.name,
			
 
				 							input: part.input,
			
 
				-						})
			
 
				+						} as const
			
 
				+						content.push(toolCall)
			
 
				+					} else if (
			
 
				+						(part as { type?: string }).type === "reasoning" &&
			
 
				+						typeof (part as { text?: unknown }).text === "string"
			
 
				+					) {
			
 
				+						const reasoningPart = part as { text?: unknown }
			
 
				+						reasoningParts.push(reasoningPart.text as string)
			
 
				 					}
			
 
				 				}
			
 
				 
			
 
				-				const content: Array<
			
 
				-					| { type: "text"; text: string }
			
 
				-					| { type: "tool-call"; toolCallId: string; toolName: string; input: unknown }
			
 
				-				> = []
			
 
				-
			
 
				-				if (textParts.length > 0) {
			
 
				-					content.push({ type: "text", text: textParts.join("\n") })
			
 
				-				}
			
 
				-				content.push(...toolCalls)
			
 
				+				flushText()
			
 
				 
			
 
				-				modelMessages.push({
			
 
				+				const aiSdkAssistantMessage: AssistantModelMessage = {
			
 
				 					role: "assistant",
			
 
				 					content: content.length > 0 ? content : [{ type: "text", text: "" }],
			
 
				-				} as ModelMessage)
			
 
				+				}
			
 
				+
			
 
				+				const messageWithReasoning = message as { reasoning_content?: string }
			
 
				+				const reasoningContent = messageWithReasoning.reasoning_content || reasoningParts.join("\n").trim()
			
 
				+				if (reasoningContent) {
			
 
				+					aiSdkAssistantMessage.providerOptions = {
			
 
				+						...(aiSdkAssistantMessage.providerOptions || {}),
			
 
				+						// OpenAI-compatible AI SDK models read per-message metadata from providerOptions.openaiCompatible.
			
 
				+						openaiCompatible: {
			
 
				+							reasoning_content: reasoningContent,
			
 
				+						},
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				modelMessages.push(aiSdkAssistantMessage)
			
 
				+				// kilocode_change end
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
--- a/src/api/transform/model-params.ts
+++ b/src/api/transform/model-params.ts
@@ -25,8 +25,7 @@ import {
 
				 	getGeminiReasoning,
			
 
				 	getOpenRouterReasoning,
			
 
				 } from "./reasoning"
			
 
				-
			
 
				-type Format = "anthropic" | "openai" | "gemini" | "openrouter"
			
 
				+type Format = "anthropic" | "openai" | "gemini" | "openrouter" | "zenmux"
			
 
				 
			
 
				 type GetModelParamsOptions<T extends Format> = {
			
 
				 	format: T
			
@@ -65,13 +64,26 @@ type OpenRouterModelParams = {
 
				 	reasoning: OpenRouterReasoningParams | undefined
			
 
				 } & BaseModelParams
			
 
				 
			
 
				-export type ModelParams = AnthropicModelParams | OpenAiModelParams | GeminiModelParams | OpenRouterModelParams
			
 
				+// kilocode_change start
			
 
				+type ZenMuxModelParams = {
			
 
				+	format: "zenmux"
			
 
				+	reasoning: OpenRouterReasoningParams | undefined
			
 
				+} & BaseModelParams
			
 
				+// kilocode_change end
			
 
				+
			
 
				+export type ModelParams =
			
 
				+	| AnthropicModelParams
			
 
				+	| OpenAiModelParams
			
 
				+	| GeminiModelParams
			
 
				+	| OpenRouterModelParams
			
 
				+	| ZenMuxModelParams // kilocode_change
			
 
				 
			
 
				 // Function overloads for specific return types
			
 
				 export function getModelParams(options: GetModelParamsOptions<"anthropic">): AnthropicModelParams
			
 
				 export function getModelParams(options: GetModelParamsOptions<"openai">): OpenAiModelParams
			
 
				 export function getModelParams(options: GetModelParamsOptions<"gemini">): GeminiModelParams
			
 
				 export function getModelParams(options: GetModelParamsOptions<"openrouter">): OpenRouterModelParams
			
 
				+export function getModelParams(options: GetModelParamsOptions<"zenmux">): OpenRouterModelParams
			
 
				 export function getModelParams({
			
 
				 	format,
			
 
				 	modelId,
			
--- a/src/api/transform/openai-format.ts
+++ b/src/api/transform/openai-format.ts
@@ -273,6 +273,22 @@ export interface ConvertToOpenAiMessagesOptions {
 
				 	mergeToolResultText?: boolean
			
 
				 }
			
 
				 
			
 
				+// kilocode_change start
			
 
				+type ReasoningBlockParam = {
			
 
				+	/**
			
 
				+	 * Non-Anthropic block type used by some providers. We preserve it so we can
			
 
				+	 * round-trip it through OpenAI-format messages.
			
 
				+	 */
			
 
				+	type: "reasoning"
			
 
				+	text?: string
			
 
				+	thinking?: string
			
 
				+}
			
 
				+// kilocode_change end
			
 
				+
			
 
				+function isReasoningBlockParam(part: unknown): part is ReasoningBlockParam {
			
 
				+	return typeof part === "object" && part !== null && (part as { type?: unknown }).type === "reasoning"
			
 
				+}
			
 
				+
			
 
				 export function convertToOpenAiMessages(
			
 
				 	anthropicMessages: Anthropic.Messages.MessageParam[],
			
 
				 	options?: ConvertToOpenAiMessagesOptions,
			
@@ -442,7 +458,14 @@ export function convertToOpenAiMessages(
 
				 				}
			
 
				 			} else if (anthropicMessage.role === "assistant") {
			
 
				 				const { nonToolMessages, toolMessages } = anthropicMessage.content.reduce<{
			
 
				-					nonToolMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[]
			
 
				+					// kilocode_change start
			
 
				+					nonToolMessages: (
			
 
				+						| Anthropic.TextBlockParam
			
 
				+						| Anthropic.ImageBlockParam
			
 
				+						| Anthropic.ThinkingBlockParam
			
 
				+						| ReasoningBlockParam
			
 
				+					)[]
			
 
				+					// kilocode_change end
			
 
				 					toolMessages: Anthropic.ToolUseBlockParam[]
			
 
				 				}>(
			
 
				 					(acc, part) => {
			
@@ -450,7 +473,11 @@ export function convertToOpenAiMessages(
 
				 							acc.toolMessages.push(part)
			
 
				 						} else if (part.type === "text" || part.type === "image") {
			
 
				 							acc.nonToolMessages.push(part)
			
 
				+							// kilocode_change start
			
 
				+						} else if (part.type === "thinking" || isReasoningBlockParam(part)) {
			
 
				+							acc.nonToolMessages.push(part)
			
 
				 						} // assistant cannot send tool_result messages
			
 
				+						// kilocode_change end
			
 
				 						return acc
			
 
				 					},
			
 
				 					{ nonToolMessages: [], toolMessages: [] },
			
@@ -463,6 +490,11 @@ export function convertToOpenAiMessages(
 
				 						.map((part) => {
			
 
				 							if (part.type === "image") {
			
 
				 								return "" // impossible as the assistant cannot send images
			
 
				+							} else if (part.type === "thinking") {
			
 
				+								return "<think>" + part.thinking + "</think>"
			
 
				+							} else if (part.type === "reasoning") {
			
 
				+								// kilocode_change - support custom "reasoning" type used by some providers
			
 
				+								return "<think>" + (part.text || part.thinking || "") + "</think>"
			
 
				 							}
			
 
				 							return part.text
			
 
				 						})
			
--- a/src/api/transform/reasoning.ts
+++ b/src/api/transform/reasoning.ts
@@ -147,6 +147,12 @@ export const getGeminiReasoning = ({
 
				 		return { thinkingBudget: reasoningBudget!, includeThoughts: true }
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	if (!model.supportsReasoningEffort) {
			
 
				+		return undefined
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	// For effort-based Gemini models, rely directly on the selected effort value.
			
 
				 	// We intentionally ignore enableReasoningEffort here so that explicitly chosen
			
 
				 	// efforts in the UI (e.g. "High" for gemini-3-pro-preview) always translate
			
--- a/src/core/assistant-message/NativeToolCallParser.ts
+++ b/src/core/assistant-message/NativeToolCallParser.ts
@@ -87,7 +87,11 @@ export class NativeToolCallParser {
 
				 		arguments?: string
			
 
				 	}): ToolCallStreamEvent[] {
			
 
				 		const events: ToolCallStreamEvent[] = []
			
 
				-		const { index, id, name, arguments: args } = chunk
			
 
				+		// kilocode_change start: Some providers (e.g. MiniMax) return tool call id as a number; coerce to string.
			
 
				+		const { index, id: rawId, name, arguments: args } = chunk
			
 
				+
			
 
				+		const id = rawId != null ? String(rawId) : undefined
			
 
				+		// kilocode_change end
			
 
				 
			
 
				 		let tracked = this.rawChunkTracker.get(index)
			
 
				 
			
--- a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts
+++ b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts
@@ -238,4 +238,51 @@ describe("NativeToolCallParser", () => {
 
				 			})
			
 
				 		})
			
 
				 	})
			
 
				+
			
 
				+	// kilocode_change start
			
 
				+	describe("processRawChunk", () => {
			
 
				+		it("should coerce numeric tool call id to string", () => {
			
 
				+			const events = NativeToolCallParser.processRawChunk({
			
 
				+				index: 0,
			
 
				+				id: 42 as unknown as string,
			
 
				+				name: "read_file",
			
 
				+				arguments: '{"path":"test.ts"}',
			
 
				+			})
			
 
				+
			
 
				+			expect(events).toHaveLength(2) // start + delta
			
 
				+			expect(events[0]).toMatchObject({
			
 
				+				type: "tool_call_start",
			
 
				+				id: "42",
			
 
				+				name: "read_file",
			
 
				+			})
			
 
				+			expect(typeof events[0].id).toBe("string")
			
 
				+		})
			
 
				+
			
 
				+		it("should leave undefined id as undefined", () => {
			
 
				+			const events = NativeToolCallParser.processRawChunk({
			
 
				+				index: 0,
			
 
				+				id: undefined,
			
 
				+				name: "read_file",
			
 
				+			})
			
 
				+
			
 
				+			// No id means no tracking is initialized, so no events emitted
			
 
				+			expect(events).toHaveLength(0)
			
 
				+		})
			
 
				+
			
 
				+		it("should pass through string id unchanged", () => {
			
 
				+			const events = NativeToolCallParser.processRawChunk({
			
 
				+				index: 0,
			
 
				+				id: "call_abc123",
			
 
				+				name: "read_file",
			
 
				+			})
			
 
				+
			
 
				+			expect(events).toHaveLength(1)
			
 
				+			expect(events[0]).toMatchObject({
			
 
				+				type: "tool_call_start",
			
 
				+				id: "call_abc123",
			
 
				+				name: "read_file",
			
 
				+			})
			
 
				+		})
			
 
				+	})
			
 
				+	// kilocode_change end
			
 
				 })
			
--- a/src/core/assistant-message/presentAssistantMessage.ts
+++ b/src/core/assistant-message/presentAssistantMessage.ts
@@ -336,6 +336,9 @@ export async function presentAssistantMessage(cline: Task) {
 
				 				content = content.replace(/<thinking>\s?/g, "")
			
 
				 				content = content.replace(/\s?<\/thinking>/g, "")
			
 
				 
			
 
				+				// Remove internal verification tags (for skill evaluation control flow)
			
 
				+				content = content.replace(/<internal_verification>[\s\S]*?<\/internal_verification>/g, "") // kilocode_change
			
 
				+
			
 
				 				// Remove partial XML tag at the very end of the content (for
			
 
				 				// tool use and thinking tags), Prevents scrollview from
			
 
				 				// jumping when tags are automatically removed.
			
--- a/src/core/auto-approval/__tests__/checkAutoApproval.test.ts
+++ b/src/core/auto-approval/__tests__/checkAutoApproval.test.ts
@@ -0,0 +1,65 @@
 
				+import { checkAutoApproval } from "../index"
			
 
				+import { ExtensionState } from "@roo-code/types"
			
 
				+
			
 
				+describe("checkAutoApproval", () => {
			
 
				+	const mockAsk = "tool"
			
 
				+
			
 
				+	it("should approve deleteFile when alwaysAllowDelete is true", async () => {
			
 
				+		const state = {
			
 
				+			alwaysAllowDelete: true,
			
 
				+			autoApprovalEnabled: true,
			
 
				+		} as ExtensionState
			
 
				+
			
 
				+		const text = JSON.stringify({
			
 
				+			tool: "deleteFile",
			
 
				+			path: "/path/to/file",
			
 
				+		})
			
 
				+
			
 
				+		const result = await checkAutoApproval({ state, ask: mockAsk, text })
			
 
				+		expect(result).toEqual({ decision: "approve" })
			
 
				+	})
			
 
				+
			
 
				+	it("should ask for deleteFile when alwaysAllowDelete is false", async () => {
			
 
				+		const state = {
			
 
				+			alwaysAllowDelete: false,
			
 
				+			autoApprovalEnabled: true,
			
 
				+		} as ExtensionState
			
 
				+
			
 
				+		const text = JSON.stringify({
			
 
				+			tool: "deleteFile",
			
 
				+			path: "/path/to/file",
			
 
				+		})
			
 
				+
			
 
				+		const result = await checkAutoApproval({ state, ask: mockAsk, text })
			
 
				+		expect(result).toEqual({ decision: "ask" })
			
 
				+	})
			
 
				+
			
 
				+	it("should ask for deleteFile when alwaysAllowDelete is undefined", async () => {
			
 
				+		const state = {
			
 
				+			autoApprovalEnabled: true,
			
 
				+		} as ExtensionState
			
 
				+
			
 
				+		const text = JSON.stringify({
			
 
				+			tool: "deleteFile",
			
 
				+			path: "/path/to/file",
			
 
				+		})
			
 
				+
			
 
				+		const result = await checkAutoApproval({ state, ask: mockAsk, text })
			
 
				+		expect(result).toEqual({ decision: "ask" })
			
 
				+	})
			
 
				+
			
 
				+	it("should ask when autoApprovalEnabled is false even if alwaysAllowDelete is true", async () => {
			
 
				+		const state = {
			
 
				+			alwaysAllowDelete: true,
			
 
				+			autoApprovalEnabled: false,
			
 
				+		} as ExtensionState
			
 
				+
			
 
				+		const text = JSON.stringify({
			
 
				+			tool: "deleteFile",
			
 
				+			path: "/path/to/file",
			
 
				+		})
			
 
				+
			
 
				+		const result = await checkAutoApproval({ state, ask: mockAsk, text })
			
 
				+		expect(result).toEqual({ decision: "ask" })
			
 
				+	})
			
 
				+})
			
--- a/src/core/prompts/responses.ts
+++ b/src/core/prompts/responses.ts
@@ -1,6 +1,7 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import * as path from "path"
			
 
				 import * as diff from "diff"
			
 
				+import * as fs from "fs" // kilocode_change
			
 
				 import { RooIgnoreController, LOCK_TEXT_SYMBOL } from "../ignore/RooIgnoreController"
			
 
				 import { RooProtectedController } from "../protect/RooProtectedController"
			
 
				 import * as vscode from "vscode"
			
@@ -233,6 +234,34 @@ Otherwise, if you have not completed the task and do not need additional informa
 
				 				}
			
 
				 			}
			
 
				 		}
			
 
				+
			
 
				+		// kilocode_change start: Append character count to each file in the list
			
 
				+		rooIgnoreParsed = rooIgnoreParsed.map((entry) => {
			
 
				+			// Extract actual path by removing prefix symbols
			
 
				+			let actualPath = entry
			
 
				+			if (entry.startsWith(LOCK_TEXT_SYMBOL)) {
			
 
				+				actualPath = entry.slice((LOCK_TEXT_SYMBOL + " ").length)
			
 
				+			} else if (entry.startsWith("🛡️")) {
			
 
				+				actualPath = entry.slice("🛡️ ".length)
			
 
				+			}
			
 
				+
			
 
				+			// Skip directories (end with /)
			
 
				+			if (actualPath.endsWith("/")) {
			
 
				+				return entry
			
 
				+			}
			
 
				+
			
 
				+			// Read file and get character count
			
 
				+			try {
			
 
				+				const absoluteFilePath = path.resolve(absolutePath, actualPath)
			
 
				+				const content = fs.readFileSync(absoluteFilePath, "utf-8")
			
 
				+				return `${entry}  # ${content.length} chars`
			
 
				+			} catch {
			
 
				+				// If reading fails, return original entry
			
 
				+				return entry
			
 
				+			}
			
 
				+		})
			
 
				+		// kilocode_change end
			
 
				+
			
 
				 		if (didHitLimit) {
			
 
				 			return `${rooIgnoreParsed.join(
			
 
				 				"\n",
			
--- a/src/core/prompts/sections/__tests__/tool-use-guidelines.spec.ts
+++ b/src/core/prompts/sections/__tests__/tool-use-guidelines.spec.ts
@@ -54,9 +54,8 @@ describe("getToolUseGuidelinesSection", () => {
 
				 				expect(guidelines).toContain("1. Assess what information")
			
 
				 				expect(guidelines).toContain("2. Choose the most appropriate tool")
			
 
				 				expect(guidelines).toContain("3. If multiple actions are needed")
			
 
				-				expect(guidelines).toContain("4. After each tool use")
			
 
				+				expect(guidelines).toContain("5. After each tool use")
			
 
				 			})
			
 
				-
			
 
				 			it("should include single-tool-per-message guidance when experiment disabled", () => {
			
 
				 				const guidelines = getToolUseGuidelinesSection(TOOL_PROTOCOL.NATIVE, {})
			
 
				 
			
--- a/src/core/prompts/sections/tool-use-guidelines.ts
+++ b/src/core/prompts/sections/tool-use-guidelines.ts
@@ -37,6 +37,10 @@ export function getToolUseGuidelinesSection(
 
				 				`${itemNumber++}. If multiple actions are needed, use one tool at a time per message to accomplish the task iteratively, with each tool use being informed by the result of the previous tool use. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result.`,
			
 
				 			)
			
 
				 		}
			
 
				+
			
 
				+		guidelinesList.push(
			
 
				+			`${itemNumber++}. CRITICAL: You must use the API's native tool format. Do NOT simply write text describing the tool use (e.g., "[Tool Use: ...]" or JSON blocks in text). The system will strictly reject any text that mimics a tool call. You must use the proper API structure for function calling.`,
			
 
				+		)
			
 
				 	} else {
			
 
				 		guidelinesList.push(
			
 
				 			`${itemNumber++}. If multiple actions are needed, use one tool at a time per message to accomplish the task iteratively, with each tool use being informed by the result of the previous tool use. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result.`,
			
--- a/src/core/task-persistence/taskMetadata.ts
+++ b/src/core/task-persistence/taskMetadata.ts
@@ -30,6 +30,13 @@ export type TaskMetadataOptions = {
 
				 	 * continue using this protocol even if user settings change.
			
 
				 	 */
			
 
				 	toolProtocol?: ToolProtocol
			
 
				+	// kilocode_change start
			
 
				+	/**
			
 
				+	 * cumulative total cost including deleted messages.
			
 
				+	 * if provided, this overrides the calculated totalCost from messages.
			
 
				+	 */
			
 
				+	cumulativeTotalCost?: number
			
 
				+	// kilocode_change end
			
 
				 }
			
 
				 
			
 
				 export async function taskMetadata({
			
@@ -44,6 +51,7 @@ export async function taskMetadata({
 
				 	apiConfigName,
			
 
				 	initialStatus,
			
 
				 	toolProtocol,
			
 
				+	cumulativeTotalCost, // kilocode_change
			
 
				 }: TaskMetadataOptions) {
			
 
				 	const taskDir = await getTaskDirectoryPath(globalStoragePath, id)
			
 
				 
			
@@ -114,7 +122,7 @@ export async function taskMetadata({
 
				 		tokensOut: tokenUsage.totalTokensOut,
			
 
				 		cacheWrites: tokenUsage.totalCacheWrites,
			
 
				 		cacheReads: tokenUsage.totalCacheReads,
			
 
				-		totalCost: tokenUsage.totalCost,
			
 
				+		totalCost: cumulativeTotalCost !== undefined ? cumulativeTotalCost : tokenUsage.totalCost, // kilocode_change
			
 
				 		size: taskDirSize,
			
 
				 		workspace,
			
 
				 		mode,
			
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -158,6 +158,9 @@ const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes
 
				 const DEFAULT_USAGE_COLLECTION_TIMEOUT_MS = 5000 // 5 seconds
			
 
				 const FORCED_CONTEXT_REDUCTION_PERCENT = 75 // Keep 75% of context (remove 25%) on context window errors
			
 
				 const MAX_CONTEXT_WINDOW_RETRIES = 3 // Maximum retries for context window errors
			
 
				+// kilocode_change start
			
 
				+const MAX_CHUTES_TERMINATED_RETRY_ATTEMPTS = 2 // Allow up to 2 retries (3 total attempts) before failing fast
			
 
				+// kilocode_change end
			
 
				 
			
 
				 export interface TaskOptions extends CreateTaskOptions {
			
 
				 	context: vscode.ExtensionContext // kilocode_change
			
@@ -369,6 +372,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 	apiConversationHistory: ApiMessage[] = []
			
 
				 	clineMessages: ClineMessage[] = []
			
 
				 
			
 
				+	/**
			
 
				+	 * cumulative cost of API calls from messages that were deleted during this session.
			
 
				+	 * this ensures the total cost displayed to the user reflects all API usage,
			
 
				+	 * even if messages are removed from the conversation history.
			
 
				+	 */
			
 
				+	private _deletedApiCost: number = 0 // kilocode_change
			
 
				+
			
 
				 	// Ask
			
 
				 	private askResponse?: ClineAskResponse
			
 
				 	private askResponseText?: string
			
@@ -1320,6 +1330,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 				apiConfigName: this._taskApiConfigName, // Use the task's own provider profile, not the current provider profile.
			
 
				 				initialStatus: this.initialStatus,
			
 
				 				toolProtocol: this._taskToolProtocol, // Persist the locked tool protocol.
			
 
				+				cumulativeTotalCost: this.getCumulativeTotalCost(), // kilocode_change: include deleted message costs.
			
 
				 			})
			
 
				 
			
 
				 			// Emit token/tool usage updates using debounced function
			
@@ -3553,6 +3564,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 
			
 
				 						// Clean up partial state
			
 
				 						await abortStream(cancelReason, streamingFailedMessage)
			
 
				+						// kilocode_change start
			
 
				+						// Bound retries for repeated Chutes "terminated" stream failures
			
 
				+						// to prevent indefinite thinking/retry loops.
			
 
				+						const retryAttempt = currentItem.retryAttempt ?? 0
			
 
				+						if (this.hasExceededChutesTerminatedRetryLimit(error, retryAttempt)) {
			
 
				+							console.error(
			
 
				+								`[Task#${this.taskId}.${this.instanceId}] Chutes stream terminated repeatedly. Stopping retries after attempt ${retryAttempt}.`,
			
 
				+							)
			
 
				+							throw error
			
 
				+						}
			
 
				+						// kilocode_change end
			
 
				 
			
 
				 						if (this.abort) {
			
 
				 							// User cancelled - abort the entire task
			
@@ -3804,6 +3826,11 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 					)
			
 
				 
			
 
				 					if (!didToolUse) {
			
 
				+						// Check for hallucinated tool use pattern
			
 
				+						const hallucinatedTool = this.assistantMessageContent.find(
			
 
				+							(block) => block.type === "text" && block.content.trim().match(/^\[Tool Use: .+\]/i),
			
 
				+						)
			
 
				+
			
 
				 						// Increment consecutive no-tool-use counter
			
 
				 						this.consecutiveNoToolUseCount++
			
 
				 
			
@@ -3814,10 +3841,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 							this.consecutiveMistakeCount++
			
 
				 						}
			
 
				 
			
 
				+						let responseText = formatResponse.noToolsUsed(this._taskToolProtocol ?? "xml")
			
 
				+
			
 
				+						if (hallucinatedTool) {
			
 
				+							responseText +=
			
 
				+								"\n\n[ERROR] You are outputting tool calls as text (e.g. '[Tool Use: ...]'). This is invalid. You MUST use the native tool calling capability provided by the API. Do not write the tool use in the text response."
			
 
				+						}
			
 
				+
			
 
				 						// Use the task's locked protocol for consistent behavior
			
 
				 						this.userMessageContent.push({
			
 
				 							type: "text",
			
 
				-							text: formatResponse.noToolsUsed(this._taskToolProtocol ?? "xml"),
			
 
				+							text: responseText,
			
 
				 						})
			
 
				 					} else {
			
 
				 						// Reset counter when tools are used successfully
			
@@ -4279,6 +4313,22 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	// kilocode_change start
			
 
				+	private isChutesTerminatedError(error: unknown): boolean {
			
 
				+		if (this.apiConfiguration?.apiProvider !== "chutes") {
			
 
				+			return false
			
 
				+		}
			
 
				+
			
 
				+		const message =
			
 
				+			error instanceof Error ? error.message : typeof error === "string" ? error : JSON.stringify(error)
			
 
				+		return /\bterminated\b/i.test(message || "")
			
 
				+	}
			
 
				+
			
 
				+	private hasExceededChutesTerminatedRetryLimit(error: unknown, retryAttempt: number): boolean {
			
 
				+		return this.isChutesTerminatedError(error) && retryAttempt >= MAX_CHUTES_TERMINATED_RETRY_ATTEMPTS
			
 
				+	}
			
 
				+	// kilocode_change end
			
 
				+
			
 
				 	public async *attemptApiRequest(
			
 
				 		retryAttempt: number = 0,
			
 
				 		options: { skipProviderRateLimit?: boolean } = {},
			
@@ -4654,6 +4704,14 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 				return
			
 
				 			}
			
 
				 			// kilocode_change end
			
 
				+			// kilocode_change start
			
 
				+			// Chutes can occasionally terminate streams abruptly; avoid recursive
			
 
				+			// first-chunk auto-retries here and delegate retry policy to the
			
 
				+			// outer request loop, which applies a bounded retry cap.
			
 
				+			if (this.isChutesTerminatedError(error)) {
			
 
				+				throw error
			
 
				+			}
			
 
				+			// kilocode_change end
			
 
				 			// note that this api_req_failed ask is unique in that we only present this option if the api hasn't streamed any content yet (ie it fails on the first chunk due), as it would allow them to hit a retry button. However if the api failed mid-stream, it could be in any arbitrary state where some tools may have executed, so that error is handled differently and requires cancelling the task entirely.
			
 
				 			if (autoApprovalEnabled) {
			
 
				 				// Apply shared exponential backoff and countdown UX
			
@@ -4945,9 +5003,35 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 
				 		return combineApiRequests(combineCommandSequences(messages))
			
 
				 	}
			
 
				 
			
 
				+	// kilocod_change start
			
 
				 	public getTokenUsage(): TokenUsage {
			
 
				-		return getApiMetrics(this.combineMessages(this.clineMessages.slice(1)))
			
 
				+		const metrics = getApiMetrics(this.combineMessages(this.clineMessages.slice(1)))
			
 
				+		// add deleted API costs to the total cost
			
 
				+		return {
			
 
				+			...metrics,
			
 
				+			totalCost: metrics.totalCost + this._deletedApiCost,
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+	 * get cumulative total cost including deleted messages.
			
 
				+	 * this is the cost that should be persisted in history.
			
 
				+	 */
			
 
				+	public getCumulativeTotalCost(): number {
			
 
				+		const metrics = getApiMetrics(this.combineMessages(this.clineMessages.slice(1)))
			
 
				+		return metrics.totalCost + this._deletedApiCost
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+	 * add cost from deleted messages to the cumulative total.
			
 
				+	 * called by message deletion handlers to preserve true session cost.
			
 
				+	 */
			
 
				+	public addDeletedApiCost(cost: number): void {
			
 
				+		if (cost > 0) {
			
 
				+			this._deletedApiCost += cost
			
 
				+		}
			
 
				 	}
			
 
				+	// kilocod_change end
			
 
				 
			
 
				 	public recordToolUsage(toolName: ToolName) {
			
 
				 		if (!this.toolUsage[toolName]) {
			
--- a/src/core/task/__tests__/Task.spec.ts
+++ b/src/core/task/__tests__/Task.spec.ts
@@ -900,6 +900,82 @@ describe("Cline", () => {
 
				 				await task.catch(() => {})
			
 
				 			})
			
 
				 
			
 
				+			// kilocode_change start
			
 
				+			it("attemptApiRequest should not recursively auto-retry first-chunk Chutes terminated errors", async () => {
			
 
				+				const chutesConfig = {
			
 
				+					...mockApiConfig,
			
 
				+					apiProvider: "chutes" as const,
			
 
				+					apiModelId: "moonshotai/Kimi-K2.5-TEE",
			
 
				+				}
			
 
				+
			
 
				+				const task = new Task({
			
 
				+					provider: mockProvider,
			
 
				+					apiConfiguration: chutesConfig,
			
 
				+					task: "test task",
			
 
				+					startTask: false,
			
 
				+					context: mockExtensionContext,
			
 
				+				})
			
 
				+
			
 
				+				const terminatedError = new Error("terminated")
			
 
				+				const mockFailedStream = {
			
 
				+					// eslint-disable-next-line require-yield
			
 
				+					async *[Symbol.asyncIterator]() {
			
 
				+						throw terminatedError
			
 
				+					},
			
 
				+					async next() {
			
 
				+						throw terminatedError
			
 
				+					},
			
 
				+					async return() {
			
 
				+						return { done: true, value: undefined }
			
 
				+					},
			
 
				+					async throw(e: any) {
			
 
				+						throw e
			
 
				+					},
			
 
				+					async [Symbol.asyncDispose]() {
			
 
				+						// Cleanup
			
 
				+					},
			
 
				+				} as AsyncGenerator<ApiStreamChunk>
			
 
				+
			
 
				+				const createMessageSpy = vi.spyOn(task.api, "createMessage").mockReturnValue(mockFailedStream)
			
 
				+				const backoffSpy = vi.spyOn(task as any, "backoffAndAnnounce").mockResolvedValue(undefined)
			
 
				+
			
 
				+				mockProvider.getState = vi.fn().mockResolvedValue({
			
 
				+					apiConfiguration: chutesConfig,
			
 
				+					autoApprovalEnabled: true,
			
 
				+					requestDelaySeconds: 1,
			
 
				+					mode: "code",
			
 
				+				})
			
 
				+
			
 
				+				const iterator = task.attemptApiRequest(0, { skipProviderRateLimit: true })
			
 
				+				await expect(iterator.next()).rejects.toThrow("terminated")
			
 
				+
			
 
				+				expect(createMessageSpy).toHaveBeenCalledTimes(1)
			
 
				+				expect(backoffSpy).not.toHaveBeenCalled()
			
 
				+			})
			
 
				+
			
 
				+			it("should apply Chutes terminated retry cap at the configured threshold", async () => {
			
 
				+				const chutesConfig = {
			
 
				+					...mockApiConfig,
			
 
				+					apiProvider: "chutes" as const,
			
 
				+					apiModelId: "moonshotai/Kimi-K2.5-TEE",
			
 
				+				}
			
 
				+
			
 
				+				const task = new Task({
			
 
				+					provider: mockProvider,
			
 
				+					apiConfiguration: chutesConfig,
			
 
				+					task: "test task",
			
 
				+					startTask: false,
			
 
				+					context: mockExtensionContext,
			
 
				+				})
			
 
				+
			
 
				+				const terminatedError = new Error("terminated")
			
 
				+
			
 
				+				expect((task as any).hasExceededChutesTerminatedRetryLimit(terminatedError, 0)).toBe(false)
			
 
				+				expect((task as any).hasExceededChutesTerminatedRetryLimit(terminatedError, 1)).toBe(false)
			
 
				+				expect((task as any).hasExceededChutesTerminatedRetryLimit(terminatedError, 2)).toBe(true)
			
 
				+			})
			
 
				+			// kilocode_change end
			
 
				+
			
 
				 			describe("processUserContentMentions", () => {
			
 
				 				it("should process mentions in task and feedback tags", async () => {
			
 
				 					const [cline, task] = Task.create({
			
--- a/src/core/tools/ExecuteCommandTool.ts
+++ b/src/core/tools/ExecuteCommandTool.ts
@@ -308,14 +308,20 @@ export async function executeCommandInTerminal(
 
				 				clearTimeout(timeoutId)
			
 
				 			}
			
 
				 
			
 
				-			task.terminalProcess = undefined
			
 
				+			// Don't clear if running in background - user may still want to kill it
			
 
				+			if (!runInBackground) {
			
 
				+				task.terminalProcess = undefined
			
 
				+			}
			
 
				 		}
			
 
				 	} else {
			
 
				 		// No timeout - just wait for the process to complete.
			
 
				 		try {
			
 
				 			await process
			
 
				 		} finally {
			
 
				-			task.terminalProcess = undefined
			
 
				+			// Don't clear if running in background - user may still want to kill it
			
 
				+			if (!runInBackground) {
			
 
				+				task.terminalProcess = undefined
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/src/core/webview/ClineProvider.ts
+++ b/src/core/webview/ClineProvider.ts
@@ -1993,8 +1993,8 @@ export class ClineProvider
 
				 		try {
			
 
				 			// get the task directory full path
			
 
				 			const { taskDirPath } = await this.getTaskWithId(id)
			
 
				-	
			
 
				-				// remove task from stack if it's the current task
			
 
				+
			
 
				+			// remove task from stack if it's the current task
			
 
				 			if (id === this.getCurrentTask()?.taskId) {
			
 
				 				// Close the current task instance; delegation flows will be handled via metadata if applicable.
			
 
				 				await this.removeClineFromStack()
			
@@ -2397,6 +2397,7 @@ export class ClineProvider
 
				 				: undefined,
			
 
				 			clineMessages: this.getCurrentTask()?.clineMessages || [],
			
 
				 			currentTaskTodos: this.getCurrentTask()?.todoList || [],
			
 
				+			currentTaskCumulativeCost: this.getCurrentTask()?.getCumulativeTotalCost(), // kilocode_change
			
 
				 			messageQueue: this.getCurrentTask()?.messageQueueService?.messages,
			
 
				 			taskHistoryFullLength: taskHistory.length, // kilocode_change
			
 
				 			taskHistoryVersion: this.kiloCodeTaskHistoryVersion, // kilocode_change
			
@@ -3391,6 +3392,13 @@ export class ClineProvider
 
				 				return
			
 
				 			}
			
 
				 
			
 
				+			// Phase 1: Show dialog immediately with loading state
			
 
				+			await this.postMessageToWebview({
			
 
				+				type: "askReviewScope",
			
 
				+				reviewScopeInfo: undefined,
			
 
				+			})
			
 
				+
			
 
				+			// Phase 2: Compute scope info and hydrate
			
 
				 			const { ReviewService } = await import("../../services/review")
			
 
				 			const reviewService = new ReviewService({ cwd })
			
 
				 			const scopeInfo = await reviewService.getScopeInfo()
			
--- a/src/core/webview/__tests__/ClineProvider.apiHandlerRebuild.spec.ts
+++ b/src/core/webview/__tests__/ClineProvider.apiHandlerRebuild.spec.ts
@@ -118,6 +118,7 @@ vi.mock("../../task/Task", () => ({
 
				 			updateApiConfiguration: vi.fn().mockImplementation(function (this: any, newConfig: any) {
			
 
				 				this.apiConfiguration = newConfig
			
 
				 			}),
			
 
				+			getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 		}
			
 
				 		// Define apiConfiguration as a property so tests can read it
			
 
				 		Object.defineProperty(mockTask, "apiConfiguration", {
			
--- a/src/core/webview/__tests__/ClineProvider.spec.ts
+++ b/src/core/webview/__tests__/ClineProvider.spec.ts
@@ -235,6 +235,7 @@ vi.mock("../../task/Task", () => ({
 
				 		setRootTask: vi.fn(),
			
 
				 		taskId: options?.historyItem?.id || "test-task-id",
			
 
				 		emit: vi.fn(),
			
 
				+		getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 	})),
			
 
				 }))
			
 
				 
			
@@ -379,6 +380,7 @@ describe("ClineProvider", () => {
 
				 				setRootTask: vi.fn(),
			
 
				 				taskId: options?.historyItem?.id || "test-task-id",
			
 
				 				emit: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			Object.defineProperty(task, "messageManager", {
			
@@ -2783,7 +2785,12 @@ describe("ClineProvider - Router Models", () => {
 
				 			apiKey: "litellm-key",
			
 
				 			baseUrl: "http://localhost:4000",
			
 
				 		})
			
 
				-		expect(getModels).toHaveBeenCalledWith({ provider: "chutes" })
			
 
				+		expect(getModels).toHaveBeenCalledWith({ provider: "chutes", apiKey: undefined })
			
 
				+		expect(getModels).toHaveBeenCalledWith({
			
 
				+			provider: "zenmux",
			
 
				+			apiKey: undefined,
			
 
				+			baseUrl: "https://zenmux.ai/api/v1",
			
 
				+		})
			
 
				 
			
 
				 		// Verify response was sent
			
 
				 		expect(mockPostMessage).toHaveBeenCalledWith({
			
@@ -2809,6 +2816,7 @@ describe("ClineProvider - Router Models", () => {
 
				 				"sap-ai-core": {}, // kilocode_change
			
 
				 				huggingface: {},
			
 
				 				"io-intelligence": {},
			
 
				+				zenmux: mockModels,
			
 
				 			},
			
 
				 			values: undefined,
			
 
				 		})
			
@@ -2861,6 +2869,7 @@ describe("ClineProvider - Router Models", () => {
 
				 			.mockResolvedValueOnce(mockModels) // kilocode_change: synthetic success
			
 
				 			.mockResolvedValueOnce(mockModels) // roo success
			
 
				 			.mockRejectedValueOnce(new Error("Chutes API error")) // chutes fail
			
 
				+			.mockResolvedValueOnce(mockModels) // zenmux success
			
 
				 			.mockRejectedValueOnce(new Error("LiteLLM connection failed")) // litellm fail
			
 
				 
			
 
				 		await messageHandler({ type: "requestRouterModels" })
			
@@ -2889,6 +2898,7 @@ describe("ClineProvider - Router Models", () => {
 
				 				"sap-ai-core": {}, // kilocode_change
			
 
				 				huggingface: {},
			
 
				 				"io-intelligence": {},
			
 
				+				zenmux: mockModels,
			
 
				 			},
			
 
				 			values: undefined,
			
 
				 		})
			
@@ -3045,6 +3055,7 @@ describe("ClineProvider - Router Models", () => {
 
				 				"sap-ai-core": {}, // kilocode_change
			
 
				 				huggingface: {},
			
 
				 				"io-intelligence": {},
			
 
				+				zenmux: mockModels,
			
 
				 			},
			
 
				 			values: undefined,
			
 
				 		})
			
--- a/src/core/webview/__tests__/ClineProvider.sticky-mode.spec.ts
+++ b/src/core/webview/__tests__/ClineProvider.sticky-mode.spec.ts
@@ -82,6 +82,7 @@ vi.mock("../../task/Task", () => ({
 
				 		emit: vi.fn(),
			
 
				 		parentTask: options.parentTask,
			
 
				 		updateApiConfiguration: vi.fn(),
			
 
				+		getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 	})),
			
 
				 }))
			
 
				 
			
@@ -359,6 +360,7 @@ describe("ClineProvider - Sticky Mode", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add task to provider stack
			
@@ -812,6 +814,7 @@ describe("ClineProvider - Sticky Mode", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add task to provider stack
			
@@ -879,6 +882,7 @@ describe("ClineProvider - Sticky Mode", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add task to provider stack
			
@@ -931,6 +935,7 @@ describe("ClineProvider - Sticky Mode", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add task to provider stack
			
@@ -966,6 +971,7 @@ describe("ClineProvider - Sticky Mode", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add task to provider stack
			
@@ -1022,6 +1028,7 @@ describe("ClineProvider - Sticky Mode", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add task to provider stack
			
@@ -1069,6 +1076,7 @@ describe("ClineProvider - Sticky Mode", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			const task2 = {
			
@@ -1079,6 +1087,7 @@ describe("ClineProvider - Sticky Mode", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			const task3 = {
			
@@ -1089,6 +1098,7 @@ describe("ClineProvider - Sticky Mode", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add tasks to provider stack
			
@@ -1231,6 +1241,7 @@ describe("ClineProvider - Sticky Mode", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}))
			
 
				 
			
 
				 			// Add all tasks to provider
			
--- a/src/core/webview/__tests__/ClineProvider.sticky-profile.spec.ts
+++ b/src/core/webview/__tests__/ClineProvider.sticky-profile.spec.ts
@@ -82,6 +82,7 @@ vi.mock("../../task/Task", () => ({
 
				 		setTaskApiConfigName: vi.fn(),
			
 
				 		_taskApiConfigName: options.historyItem?.apiConfigName,
			
 
				 		taskApiConfigName: options.historyItem?.apiConfigName,
			
 
				+		getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 	})),
			
 
				 }))
			
 
				 
			
@@ -300,6 +301,7 @@ describe("ClineProvider - Sticky Provider Profile", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add task to provider stack
			
@@ -365,6 +367,7 @@ describe("ClineProvider - Sticky Provider Profile", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add task to provider stack
			
@@ -421,6 +424,7 @@ describe("ClineProvider - Sticky Provider Profile", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			await provider.addClineToStack(mockTask as any)
			
@@ -610,6 +614,7 @@ describe("ClineProvider - Sticky Provider Profile", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Mock getGlobalState to return task history with our task
			
@@ -677,6 +682,7 @@ describe("ClineProvider - Sticky Provider Profile", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Create task 2 with profile B
			
@@ -691,6 +697,7 @@ describe("ClineProvider - Sticky Provider Profile", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add task 1 to stack
			
@@ -775,6 +782,7 @@ describe("ClineProvider - Sticky Provider Profile", () => {
 
				 				clineMessages: [],
			
 
				 				apiConversationHistory: [],
			
 
				 				updateApiConfiguration: vi.fn(),
			
 
				+				getCumulativeTotalCost: vi.fn().mockReturnValue(0), // kilocode_change
			
 
				 			}
			
 
				 
			
 
				 			// Add task to provider stack
			
--- a/src/core/webview/__tests__/webviewMessageHandler.spec.ts
+++ b/src/core/webview/__tests__/webviewMessageHandler.spec.ts
@@ -310,6 +310,11 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 		expect(mockGetModels).toHaveBeenCalledWith({ provider: "requesty", apiKey: "requesty-key" })
			
 
				 		expect(mockGetModels).toHaveBeenCalledWith({ provider: "glama" }) // kilocode_change
			
 
				 		expect(mockGetModels).toHaveBeenCalledWith({ provider: "unbound", apiKey: "unbound-key" })
			
 
				+		expect(mockGetModels).toHaveBeenCalledWith({
			
 
				+			provider: "zenmux",
			
 
				+			apiKey: undefined,
			
 
				+			baseUrl: "https://zenmux.ai/api/v1",
			
 
				+		})
			
 
				 		// kilocode_change start
			
 
				 		expect(mockGetModels).toHaveBeenCalledWith({ provider: "chutes", apiKey: "chutes-key" })
			
 
				 		expect(mockGetModels).toHaveBeenCalledWith({
			
@@ -360,6 +365,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 				"nano-gpt": mockModels, // kilocode_change
			
 
				 				roo: mockModels,
			
 
				 				chutes: mockModels,
			
 
				+				zenmux: mockModels,
			
 
				 				ollama: mockModels, // kilocode_change
			
 
				 				lmstudio: {},
			
 
				 				"vercel-ai-gateway": mockModels,
			
@@ -463,6 +469,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 				unbound: mockModels,
			
 
				 				roo: mockModels,
			
 
				 				chutes: mockModels,
			
 
				+				zenmux: mockModels,
			
 
				 				litellm: {},
			
 
				 				kilocode: mockModels,
			
 
				 				"nano-gpt": mockModels, // kilocode_change
			
@@ -506,6 +513,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 			.mockRejectedValueOnce(new Error("Synthetic API error")) // kilocode_change
			
 
				 			.mockResolvedValueOnce(mockModels) // roo
			
 
				 			.mockRejectedValueOnce(new Error("Chutes API error")) // chutes
			
 
				+			.mockResolvedValueOnce(mockModels) // zenmux
			
 
				 			.mockRejectedValueOnce(new Error("LiteLLM connection failed")) // litellm
			
 
				 
			
 
				 		await webviewMessageHandler(mockClineProvider, {
			
@@ -568,6 +576,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 				unbound: {},
			
 
				 				roo: mockModels,
			
 
				 				chutes: {},
			
 
				+				zenmux: mockModels,
			
 
				 				litellm: {},
			
 
				 				ollama: {},
			
 
				 				lmstudio: {},
			
@@ -606,6 +615,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 
				 			.mockRejectedValueOnce(new Error("Synthetic API error")) // kilocode_change synthetic
			
 
				 			.mockRejectedValueOnce(new Error("Roo API error")) // roo
			
 
				 			.mockRejectedValueOnce(new Error("Chutes API error")) // chutes
			
 
				+			.mockResolvedValueOnce({}) // zenmux
			
 
				 			.mockRejectedValueOnce(new Error("LiteLLM connection failed")) // litellm
			
 
				 
			
 
				 		await webviewMessageHandler(mockClineProvider, {
			
--- a/src/core/webview/sttHandlers.ts
+++ b/src/core/webview/sttHandlers.ts
@@ -4,7 +4,7 @@ import type { STTCommand, STTSegment as ContractSTTSegment, MicrophoneDevice } f
 
				 import { STTService } from "../../services/stt"
			
 
				 import { STTEventEmitter } from "../../services/stt/types"
			
 
				 import { getOpenAiApiKey } from "../../services/stt/utils/getOpenAiCredentials"
			
 
				-import { VisibleCodeTracker } from "../../services/ghost/context/VisibleCodeTracker"
			
 
				+import { VisibleCodeTracker } from "../../services/autocomplete/context/VisibleCodeTracker"
			
 
				 import { extractCodeGlossary, formatGlossaryAsPrompt } from "../../services/stt/context/codeGlossaryExtractor"
			
 
				 import { listMicrophoneDevices } from "../../services/stt/FFmpegDeviceEnumerator"
			
 
				 import { checkSpeechToTextAvailable } from "./speechToTextCheck"
			
--- a/src/core/webview/webviewMessageHandler.ts
+++ b/src/core/webview/webviewMessageHandler.ts
@@ -30,7 +30,7 @@ import {
 
				 	type EditQueuedMessagePayload,
			
 
				 	TelemetryEventName,
			
 
				 	// kilocode_change start
			
 
				-	ghostServiceSettingsSchema,
			
 
				+	autocompleteServiceSettingsSchema,
			
 
				 	fastApplyModelSchema,
			
 
				 	// kilocode_change end
			
 
				 	DEFAULT_CHECKPOINT_TIMEOUT_SECONDS,
			
@@ -90,9 +90,9 @@ import {
 
				 	fetchKilocodeNotificationsHandler,
			
 
				 	deviceAuthMessageHandler,
			
 
				 } from "../kilocode/webview/webviewMessageHandlerUtils"
			
 
				-import { GhostServiceManager } from "../../services/ghost/GhostServiceManager"
			
 
				-import { handleChatCompletionRequest } from "../../services/ghost/chat-autocomplete/handleChatCompletionRequest"
			
 
				-import { handleChatCompletionAccepted } from "../../services/ghost/chat-autocomplete/handleChatCompletionAccepted"
			
 
				+import { AutocompleteServiceManager } from "../../services/autocomplete/AutocompleteServiceManager"
			
 
				+import { handleChatCompletionRequest } from "../../services/autocomplete/chat-autocomplete/handleChatCompletionRequest"
			
 
				+import { handleChatCompletionAccepted } from "../../services/autocomplete/chat-autocomplete/handleChatCompletionAccepted"
			
 
				 // kilocode_change end
			
 
				 
			
 
				 const ALLOWED_VSCODE_SETTINGS = new Set(["terminal.integrated.inheritEnv"])
			
@@ -256,6 +256,28 @@ export const webviewMessageHandler = async (
 
				 					vscode.window.showWarningMessage("No checkpoint found before this message")
			
 
				 				}
			
 
				 			} else {
			
 
				+				// kilocode_change start: calculate the cost of messages being deleted before removing them
			
 
				+				const messagesToDelete = currentCline.clineMessages.slice(messageIndex)
			
 
				+				let deletedCost = 0
			
 
				+				for (const msg of messagesToDelete) {
			
 
				+					if (msg.say === "api_req_started" && msg.text) {
			
 
				+						try {
			
 
				+							const apiReqInfo = JSON.parse(msg.text)
			
 
				+							if (apiReqInfo.cost && typeof apiReqInfo.cost === "number") {
			
 
				+								deletedCost += apiReqInfo.cost
			
 
				+							}
			
 
				+						} catch {
			
 
				+							// ignore parse errors
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				// add the deleted cost to the task's cumulative total
			
 
				+				if (deletedCost > 0) {
			
 
				+					currentCline.addDeletedApiCost(deletedCost)
			
 
				+				}
			
 
				+				// kilocode_change end
			
 
				+
			
 
				 				// For non-checkpoint deletes, preserve checkpoint associations for remaining messages
			
 
				 				// Store checkpoints from messages that will be preserved
			
 
				 				const preservedCheckpoints = new Map<number, any>()
			
@@ -806,9 +828,7 @@ export const webviewMessageHandler = async (
 
				 					await provider.postStateToWebview()
			
 
				 					console.log(`Batch deletion completed: ${ids.length} tasks processed`)
			
 
				 				} catch (error) {
			
 
				-					console.log(
			
 
				-						`Batch deletion failed: ${error instanceof Error ? error.message : String(error)}`,
			
 
				-					)
			
 
				+					console.log(`Batch deletion failed: ${error instanceof Error ? error.message : String(error)}`)
			
 
				 				}
			
 
				 				// kilocode_change end
			
 
				 			}
			
@@ -904,8 +924,8 @@ export const webviewMessageHandler = async (
 
				 						"sap-ai-core": {}, // kilocode_change
			
 
				 						chutes: {},
			
 
				 						"nano-gpt": {}, // kilocode_change
			
 
				+						zenmux: {},
			
 
				 					}
			
 
				-
			
 
				 			const safeGetModels = async (options: GetModelsOptions): Promise<ModelRecord> => {
			
 
				 				try {
			
 
				 					return await getModels(options)
			
@@ -1006,6 +1026,14 @@ export const webviewMessageHandler = async (
 
				 					key: "chutes",
			
 
				 					options: { provider: "chutes", apiKey: apiConfiguration.chutesApiKey },
			
 
				 				},
			
 
				+				{
			
 
				+					key: "zenmux",
			
 
				+					options: {
			
 
				+						provider: "zenmux",
			
 
				+						apiKey: apiConfiguration.zenmuxApiKey,
			
 
				+						baseUrl: apiConfiguration.zenmuxBaseUrl ?? "https://zenmux.ai/api/v1",
			
 
				+					},
			
 
				+				},
			
 
				 			]
			
 
				 			// kilocode_change end
			
 
				 
			
@@ -1054,7 +1082,6 @@ export const webviewMessageHandler = async (
 
				 
			
 
				 			results.forEach((result, index) => {
			
 
				 				const routerName = modelFetchPromises[index].key
			
 
				-
			
 
				 				if (result.status === "fulfilled") {
			
 
				 					routerModels[routerName] = result.value.models
			
 
				 
			
@@ -1970,16 +1997,16 @@ export const webviewMessageHandler = async (
 
				 				return
			
 
				 			}
			
 
				 			// Validate ghostServiceSettings structure
			
 
				-			const ghostServiceSettings = ghostServiceSettingsSchema.parse(message.values)
			
 
				-			await updateGlobalState("ghostServiceSettings", ghostServiceSettings)
			
 
				+			const validatedSettings = autocompleteServiceSettingsSchema.parse(message.values)
			
 
				+			await updateGlobalState("ghostServiceSettings", validatedSettings)
			
 
				 			await provider.postStateToWebview()
			
 
				-			vscode.commands.executeCommand("kilo-code.ghost.reload")
			
 
				+			vscode.commands.executeCommand("kilo-code.autocomplete.reload")
			
 
				 			break
			
 
				 		case "snoozeAutocomplete":
			
 
				 			if (typeof message.value === "number" && message.value > 0) {
			
 
				-				await GhostServiceManager.getInstance()?.snooze(message.value)
			
 
				+				await AutocompleteServiceManager.getInstance()?.snooze(message.value)
			
 
				 			} else {
			
 
				-				await GhostServiceManager.getInstance()?.unsnooze()
			
 
				+				await AutocompleteServiceManager.getInstance()?.unsnooze()
			
 
				 			}
			
 
				 			break
			
 
				 		// kilocode_change end
			
@@ -2223,7 +2250,7 @@ export const webviewMessageHandler = async (
 
				 					await provider.providerSettingsManager.saveConfig(message.text, message.apiConfiguration)
			
 
				 					const listApiConfig = await provider.providerSettingsManager.listConfig()
			
 
				 					await updateGlobalState("listApiConfigMeta", listApiConfig)
			
 
				-					vscode.commands.executeCommand("kilo-code.ghost.reload") // kilocode_change: Reload ghost model when API provider settings change
			
 
				+					vscode.commands.executeCommand("kilo-code.autocomplete.reload") // kilocode_change: Reload autocomplete model when API provider settings change
			
 
				 				} catch (error) {
			
 
				 					provider.log(
			
 
				 						`Error save api configuration: ${JSON.stringify(error, Object.getOwnPropertyNames(error), 2)}`,
			
@@ -2286,7 +2313,7 @@ export const webviewMessageHandler = async (
 
				 				const currentApiConfigName = getGlobalState("currentApiConfigName") || "default"
			
 
				 				const isActiveProfile = message.text === currentApiConfigName
			
 
				 				await provider.upsertProviderProfile(message.text, configToSave, isActiveProfile) // Activate if it's the current active profile
			
 
				-				vscode.commands.executeCommand("kilo-code.ghost.reload")
			
 
				+				vscode.commands.executeCommand("kilo-code.autocomplete.reload")
			
 
				 				// kilocode_change end
			
 
				 
			
 
				 				// Ensure state is posted to webview after profile update to reflect organization mode changes
			
@@ -2294,8 +2321,8 @@ export const webviewMessageHandler = async (
 
				 					await provider.postStateToWebview()
			
 
				 				}
			
 
				 
			
 
				-				// kilocode_change: Reload ghost model when API provider settings change
			
 
				-				vscode.commands.executeCommand("kilo-code.ghost.reload")
			
 
				+				// kilocode_change: Reload autocomplete model when API provider settings change
			
 
				+				vscode.commands.executeCommand("kilo-code.autocomplete.reload")
			
 
				 			}
			
 
				 			// kilocode_change end: check for kilocodeToken change to remove organizationId and fetch organization modes
			
 
				 			break
			
@@ -2321,8 +2348,8 @@ export const webviewMessageHandler = async (
 
				 					// currently activated provider profile.
			
 
				 					await provider.activateProviderProfile({ name: newName })
			
 
				 
			
 
				-					// kilocode_change: Reload ghost model when API provider settings change
			
 
				-					vscode.commands.executeCommand("kilo-code.ghost.reload")
			
 
				+					// kilocode_change: Reload autocomplete model when API provider settings change
			
 
				+					vscode.commands.executeCommand("kilo-code.autocomplete.reload")
			
 
				 				} catch (error) {
			
 
				 					provider.log(
			
 
				 						`Error rename api configuration: ${JSON.stringify(error, Object.getOwnPropertyNames(error), 2)}`,
			
@@ -2398,8 +2425,8 @@ export const webviewMessageHandler = async (
 
				 					await provider.providerSettingsManager.deleteConfig(oldName)
			
 
				 					await provider.activateProviderProfile({ name: newName })
			
 
				 
			
 
				-					// kilocode_change: Reload ghost model when API provider settings change
			
 
				-					vscode.commands.executeCommand("kilo-code.ghost.reload")
			
 
				+					// kilocode_change: Reload autocomplete model when API provider settings change
			
 
				+					vscode.commands.executeCommand("kilo-code.autocomplete.reload")
			
 
				 				} catch (error) {
			
 
				 					provider.log(
			
 
				 						`Error delete api configuration: ${JSON.stringify(error, Object.getOwnPropertyNames(error), 2)}`,
			
--- a/src/esbuild.mjs
+++ b/src/esbuild.mjs
@@ -88,7 +88,7 @@ async function main() {
 
				 					copyPaths([["walkthrough", "walkthrough"]], srcDir, distDir)
			
 
				 
			
 
				 					// Copy tree-sitter files to dist directory
			
 
				-					copyPaths([["services/continuedev/tree-sitter", "tree-sitter"]], srcDir, distDir)
			
 
				+					copyPaths([["services/autocomplete/continuedev/tree-sitter", "tree-sitter"]], srcDir, distDir)
			
 
				 
			
 
				 					// Copy JSDOM xhr-sync-worker.js to fix runtime resolution
			
 
				 					const jsdomWorkerDest = path.join(distDir, "xhr-sync-worker.js")
			
--- a/src/eslint.config.mjs
+++ b/src/eslint.config.mjs
@@ -37,6 +37,6 @@ export default [
 
				 		},
			
 
				 	},
			
 
				 	{
			
 
				-		ignores: ["webview-ui", "out", "services/continuedev/core/llm/llamaTokenizer.js", "**/__fixtures__"],
			
 
				+		ignores: ["webview-ui", "out", "services/autocomplete/continuedev/core/llm/llamaTokenizer.js", "**/__fixtures__"],
			
 
				 	},
			
 
				 ]
			
--- a/src/extension.ts
+++ b/src/extension.ts
@@ -46,7 +46,7 @@ import {
 
				 	CodeActionProvider,
			
 
				 } from "./activate"
			
 
				 import { initializeI18n } from "./i18n"
			
 
				-import { registerGhostProvider } from "./services/ghost" // kilocode_change
			
 
				+import { registerAutocompleteProvider } from "./services/autocomplete" // kilocode_change
			
 
				 import { registerMainThreadForwardingLogger } from "./utils/fowardingLogger" // kilocode_change
			
 
				 import { getKiloCodeWrapperProperties } from "./core/kilocode/wrapper" // kilocode_change
			
 
				 import { checkAnthropicApiKeyConflict } from "./utils/anthropicApiKeyWarning" // kilocode_change
			
@@ -400,9 +400,9 @@ export async function activate(context: vscode.ExtensionContext) {
 
				 			// Enable autocomplete by default for new installs, but not for JetBrains IDEs
			
 
				 			// JetBrains users can manually enable it if they want to test the feature
			
 
				 			const { kiloCodeWrapperJetbrains } = getKiloCodeWrapperProperties()
			
 
				-			const currentGhostSettings = contextProxy.getValue("ghostServiceSettings")
			
 
				+			const currentAutocompleteSettings = contextProxy.getValue("ghostServiceSettings")
			
 
				 			await contextProxy.setValue("ghostServiceSettings", {
			
 
				-				...currentGhostSettings,
			
 
				+				...currentAutocompleteSettings,
			
 
				 				enableAutoTrigger: !kiloCodeWrapperJetbrains,
			
 
				 				enableSmartInlineTaskKeybinding: true,
			
 
				 			})
			
@@ -514,9 +514,9 @@ export async function activate(context: vscode.ExtensionContext) {
 
				 		// Only foward logs in Jetbrains
			
 
				 		registerMainThreadForwardingLogger(context)
			
 
				 	}
			
 
				-	// Don't register the ghost provider for the CLI
			
 
				+	// Don't register the autocomplete provider for the CLI
			
 
				 	if (kiloCodeWrapperCode !== "cli") {
			
 
				-		registerGhostProvider(context, provider)
			
 
				+		registerAutocompleteProvider(context, provider)
			
 
				 	}
			
 
				 	registerCommitMessageProvider(context, outputChannel) // kilocode_change
			
 
				 	// kilocode_change end - Kilo Code specific registrations
			
--- a/src/i18n/locales/ar/kilocode.json
+++ b/src/i18n/locales/ar/kilocode.json
@@ -101,7 +101,7 @@
 
				 			"generateButtonTooltip": "ينشئ رسالة الالتزام (commit message) باستخدام الذكاء الاصطناعي لتحليل تغييرات الكود الخاصة بك"
			
 
				 		}
			
 
				 	},
			
 
				-	"ghost": {
			
 
				+	"autocomplete": {
			
 
				 		"statusBar": {
			
 
				 			"enabled": "$(kilo-logo) Autocomplete",
			
 
				 			"snoozed": "متوقف مؤقتاً",
			
--- a/src/i18n/locales/ca/kilocode.json
+++ b/src/i18n/locales/ca/kilocode.json
@@ -97,7 +97,7 @@
 
				 			"generateButton": "Genera Missatge de Commit"
			
 
				 		}
			
 
				 	},
			
 
				-	"ghost": {
			
 
				+	"autocomplete": {
			
 
				 		"statusBar": {
			
 
				 			"enabled": "$(kilo-logo) Autocomplete",
			
 
				 			"snoozed": "pausat",
			
--- a/src/i18n/locales/cs/kilocode.json
+++ b/src/i18n/locales/cs/kilocode.json
@@ -127,7 +127,7 @@
 
				 			"authReminder": "If this is your first install, run `kilocode auth` to sign in before starting the Agent Manager."
			
 
				 		}
			
 
				 	},
			
 
				-	"ghost": {
			
 
				+	"autocomplete": {
			
 
				 		"statusBar": {
			
 
				 			"enabled": "$(kilo-logo) Autocomplete",
			
 
				 			"snoozed": "pozastaveno",
			
--- a/src/i18n/locales/de/kilocode.json
+++ b/src/i18n/locales/de/kilocode.json
@@ -97,7 +97,7 @@
 
				 			"generateButton": "Commit-Nachricht generieren"
			
 
				 		}
			
 
				 	},
			
 
				-	"ghost": {
			
 
				+	"autocomplete": {
			
 
				 		"statusBar": {
			
 
				 			"enabled": "$(kilo-logo) Autocomplete",
			
 
				 			"snoozed": "pausiert",