2 дней назад · da0ba04aad
--- a/.changeset/add-apertis-provider.md
+++ b/.changeset/add-apertis-provider.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": minor
			
 
				+---
			
 
				+
			
 
				+Add Apertis as a new API provider
			
--- a/.changeset/clever-moles-smile.md
+++ b/.changeset/clever-moles-smile.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix Moonshot coding endpoint model selection so it includes all Moonshot models while keeping `kimi-for-coding` hidden on non-coding endpoints.
			
--- a/.changeset/cool-students-battle.md
+++ b/.changeset/cool-students-battle.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Add dev container persistence for threads and settings
			
--- a/.changeset/easy-stamps-shop.md
+++ b/.changeset/easy-stamps-shop.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+fix: treat maxReadFileLine=0 as unlimited (same as -1)
			
--- a/.changeset/fifty-baboons-shine.md
+++ b/.changeset/fifty-baboons-shine.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Updates some visual bugs in Agent Behaviour settings page
			
--- a/.changeset/filter-internal-verification-tags.md
+++ b/.changeset/filter-internal-verification-tags.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Filter internal verification tags from assistant messages before displaying to users
			
--- a/.changeset/fix-context-flickering.md
+++ b/.changeset/fix-context-flickering.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+fix: prevent context token indicator flickering
			
--- a/.changeset/fix-file-deletion-auto-approve.md
+++ b/.changeset/fix-file-deletion-auto-approve.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix file deletion auto-approve checkbox not being clickable
			
--- a/.changeset/fix-model-no-tools-used.md
+++ b/.changeset/fix-model-no-tools-used.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix recurring MODEL_NO_TOOLS_USED error loop by detecting text-based tool call hallucinations and instructing the model to use the native API.
			
--- a/.changeset/fix-settings-search-ui.md
+++ b/.changeset/fix-settings-search-ui.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fixed UI issues in Settings search bar: clipping of results and layout shift when expanding
			
--- a/.changeset/fix-user-message-visibility.md
+++ b/.changeset/fix-user-message-visibility.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix user message visibility by using distinctive theme-aware colors
			
--- a/.changeset/flat-eels-press.md
+++ b/.changeset/flat-eels-press.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": minor
			
 
				+---
			
 
				+
			
 
				+Added Voyage AI embedder support
			
--- a/.changeset/free-toes-hammer.md
+++ b/.changeset/free-toes-hammer.md
@@ -0,0 +1,14 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+fix(mentions): process slash commands in tool_result blocks
			
 
				+
			
 
				+Previously, parseKiloSlashCommands was only called for text blocks,
			
 
				+causing slash commands in tool_result blocks to be ignored. This fix
			
 
				+extends the processing to tool_result blocks by using the new
			
 
				+processTextContent helper function that combines parseMentions and
			
 
				+parseKiloSlashCommands.
			
 
				+
			
 
				+The regression test ensures that slash commands in tool responses are
			
 
				+properly processed and transformed.
			
--- a/.changeset/gentle-laws-allow.md
+++ b/.changeset/gentle-laws-allow.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+feat: support preserving reasoning content in OpenAI format conversion
			
--- a/.changeset/heavy-cases-sing.md
+++ b/.changeset/heavy-cases-sing.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix: Honor explicit 'disable' for reasoning effort
			
--- a/.changeset/kill-command-fix.md
+++ b/.changeset/kill-command-fix.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix: "Kill Command" button now reliably terminates processes on all platforms, including those running in the background.
			
--- a/.changeset/lucky-lands-tickle.md
+++ b/.changeset/lucky-lands-tickle.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+fix(nano-gpt): Add native reasoning field extraction
			
--- a/.changeset/old-planes-start.md
+++ b/.changeset/old-planes-start.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Support custom embed dimensions for Ollama provider
			
--- a/.changeset/persist-deleted-api-costs.md
+++ b/.changeset/persist-deleted-api-costs.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix: Persist total API cost after message deletion
			
--- a/.changeset/shiny-zebras-jump.md
+++ b/.changeset/shiny-zebras-jump.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+fix: prevent duplicate tool_use/tool_result IDs in conversation history (#4482)
			
--- a/.changeset/strange-files-unite.md
+++ b/.changeset/strange-files-unite.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Enhance Anthropic extended thinking compatibility
			
--- a/.changeset/thin-forks-draw.md
+++ b/.changeset/thin-forks-draw.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Fix tool use failure for providers returning numeric tool call IDs (e.g. MiniMax) by coercing ID to string in the shared stream parser
			
--- a/.changeset/weak-seas-add.md
+++ b/.changeset/weak-seas-add.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+fix: improve symlink handling in skills directory
			
--- a/.changeset/young-emus-obey.md
+++ b/.changeset/young-emus-obey.md
@@ -0,0 +1,5 @@
 
				+---
			
 
				+"kilo-code": patch
			
 
				+---
			
 
				+
			
 
				+Implement better formatting for low cost values
			
--- a/.devcontainer/README.md
+++ b/.devcontainer/README.md
@@ -0,0 +1,87 @@
 
				+# Kilo Code Development Container
			
 
				+
			
 
				+This development container provides a standardized environment for developing Kilo Code.
			
 
				+
			
 
				+## Persistence
			
 
				+
			
 
				+Kilo Code stores thread conversations, settings, and caches in the following locations:
			
 
				+
			
 
				+- **Threads/Conversations**: `~/.vscode-remote/data/User/globalStorage/kilocode.kilo-code/`
			
 
				+- **Settings**: `~/.vscode-remote/data/User/settings/`
			
 
				+- **Cache**: `~/.vscode-remote/data/User/globalStorage/kilocode.kilo-code/cache/`
			
 
				+- **Vector Store**: `~/.vscode-remote/data/User/globalStorage/kilocode.kilo-code/vector/`
			
 
				+
			
 
				+### Volume Mounts
			
 
				+
			
 
				+The dev container is configured with named volumes to persist this data across container rebuilds:
			
 
				+
			
 
				+| Volume                    | Target                                                            | Purpose                      |
			
 
				+| ------------------------- | ----------------------------------------------------------------- | ---------------------------- |
			
 
				+| `kilocode-global-storage` | `/root/.vscode-remote/data/User/globalStorage/kilocode.kilo-code` | Threads, cache, vector store |
			
 
				+| `kilocode-settings`       | `/root/.vscode-remote/data/User/settings`                         | VS Code settings             |
			
 
				+
			
 
				+### Preserving Threads Across Rebuilds
			
 
				+
			
 
				+When you rebuild the dev container, these volumes persist your data:
			
 
				+
			
 
				+1. **Before rebuilding**: Your threads are automatically preserved in the named volumes
			
 
				+2. **After rebuilding**: Threads restore automatically when you reopen the container
			
 
				+3. **If threads disappear**: Check that the volumes are still attached
			
 
				+
			
 
				+### Troubleshooting Thread Recovery
			
 
				+
			
 
				+If threads don't appear after a container rebuild:
			
 
				+
			
 
				+1. **Verify volumes exist**:
			
 
				+
			
 
				+    ```bash
			
 
				+    docker volume ls | grep kilocode
			
 
				+    ```
			
 
				+
			
 
				+2. **Inspect volume contents**:
			
 
				+
			
 
				+    ```bash
			
 
				+    docker volume inspect kilocode-global-storage
			
 
				+    ```
			
 
				+
			
 
				+3. **Reattach volumes**: If volumes were detached, rebuild with:
			
 
				+
			
 
				+    ```bash
			
 
				+    devcontainer rebuild
			
 
				+    ```
			
 
				+
			
 
				+4. **Manual recovery**: If volumes are lost, threads cannot be recovered. Start new conversations and consider backing up important threads.
			
 
				+
			
 
				+### Backing Up Threads
			
 
				+
			
 
				+To back up your threads:
			
 
				+
			
 
				+1. Copy the global storage directory:
			
 
				+
			
 
				+    ```bash
			
 
				+    cp -r ~/.vscode-remote/data/User/globalStorage/kilocode.kilo-code ~/kilocode-backup
			
 
				+    ```
			
 
				+
			
 
				+2. Store the backup outside the dev container environment.
			
 
				+
			
 
				+### Custom Storage Path
			
 
				+
			
 
				+If you need threads stored in a different location, configure a custom storage path in VS Code settings:
			
 
				+
			
 
				+1. Open VS Code settings (`Ctrl+,` or `Cmd+,`)
			
 
				+2. Search for "Kilo Code: Custom Storage Path"
			
 
				+3. Enter an absolute path that's mounted into the container
			
 
				+
			
 
				+Example `devcontainer.json` mount for custom path:
			
 
				+
			
 
				+```json
			
 
				+"mounts": [
			
 
				+  {
			
 
				+    "source": "/path/on/host/kilocode-data",
			
 
				+    "target": "/home/vscode/kilocode-data",
			
 
				+    "type": "bind"
			
 
				+  }
			
 
				+]
			
 
				+```
			
 
				+
			
 
				+Then set the custom storage path to `/home/vscode/kilocode-data`.
			
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -54,5 +54,15 @@
 
				 	"remoteUser": "root",
			
 
				 	"containerUser": "root",
			
 
				 
			
 
				-	"mounts": ["source=${localWorkspaceFolder}/.git,target=/workspace/.git,type=bind,consistency=cached"]
			
 
				+	// Mounts for persisting Kilo Code state across container rebuilds
			
 
				+	// These mounts preserve threads, settings, and caches
			
 
				+	"mounts": [
			
 
				+		"source=${localWorkspaceFolder}/.git,target=/workspace/.git,type=bind,consistency=cached",
			
 
				+		"source=kilocode-global-storage,target=/root/.vscode-remote/data/User/globalStorage/kilocode.kilo-code,type=volume",
			
 
				+		"source=kilocode-settings,target=/root/.vscode-remote/data/User,type=volume"
			
 
				+	],
			
 
				+
			
 
				+	// Configure custom properties for workspace storage
			
 
				+	"workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind",
			
 
				+	"workspaceFolder": "/workspace"
			
 
				 }
			
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -65,5 +65,5 @@ If you're creating a completely new file that doesn't exist in Roo, add this com
 
				 - all the following folders are kilocode-specific and need no marking with comments:
			
 
				     - jetbrains/
			
 
				     - cli/
			
 
				-    - src/services/ghost/
			
 
				+    - src/services/autocomplete/
			
 
				     - src/services/continuedev/
			
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -180,7 +180,7 @@ Code in these directories is Kilo Code-specific and doesn't need markers:
 
				 - `jetbrains/` - JetBrains plugin
			
 
				 - `agent-manager/` directories
			
 
				 - Any path containing `kilocode` in filename or directory name
			
 
				-- `src/services/ghost/` - Ghost service
			
 
				+- `src/services/autocomplete/ - Autocomplete service
			
 
				 
			
 
				 ### When markers ARE needed
			
 
				 
			
--- a/apps/kilocode-docs/components/CodeBlock.tsx
+++ b/apps/kilocode-docs/components/CodeBlock.tsx
@@ -58,10 +58,10 @@ export function CodeBlock({ children, "data-language": language }) {
 
				 						top: 8px;
			
 
				 						right: 8px;
			
 
				 						padding: 6px 8px;
			
 
				-						background: #1e1e1e;
			
 
				-						border: 1px solid rgba(255, 255, 255, 0.2);
			
 
				+						background: rgba(0, 0, 0, 0.05);
			
 
				+						border: 1px solid rgba(0, 0, 0, 0.1);
			
 
				 						border-radius: 4px;
			
 
				-						color: rgba(255, 255, 255, 0.7);
			
 
				+						color: rgba(0, 0, 0, 0.4);
			
 
				 						cursor: pointer;
			
 
				 						display: flex;
			
 
				 						align-items: center;
			
@@ -71,9 +71,21 @@ export function CodeBlock({ children, "data-language": language }) {
 
				 					}
			
 
				 
			
 
				 					.copy-button:hover {
			
 
				-						background: #2d2d2d;
			
 
				-						color: rgba(255, 255, 255, 1);
			
 
				-						border-color: rgba(255, 255, 255, 0.3);
			
 
				+						background: rgba(0, 0, 0, 0.1);
			
 
				+						color: rgba(0, 0, 0, 0.6);
			
 
				+						border-color: rgba(0, 0, 0, 0.2);
			
 
				+					}
			
 
				+
			
 
				+					:global(.dark) .copy-button {
			
 
				+						background: rgba(255, 255, 255, 0.05);
			
 
				+						border-color: rgba(255, 255, 255, 0.1);
			
 
				+						color: rgba(255, 255, 255, 0.4);
			
 
				+					}
			
 
				+
			
 
				+					:global(.dark) .copy-button:hover {
			
 
				+						background: rgba(255, 255, 255, 0.1);
			
 
				+						color: rgba(255, 255, 255, 0.7);
			
 
				+						border-color: rgba(255, 255, 255, 0.2);
			
 
				 					}
			
 
				 
			
 
				 					.copy-button:active {
			
--- a/apps/kilocode-docs/components/SideNav.tsx
+++ b/apps/kilocode-docs/components/SideNav.tsx
@@ -15,6 +15,7 @@ const sectionNavItems: SectionNav = {
 
				 	"deploy-secure": Nav.DeploySecureNav,
			
 
				 	contributing: Nav.ContributingNav,
			
 
				 	"ai-providers": Nav.AiProvidersNav,
			
 
				+	gateway: Nav.GatewayNav,
			
 
				 }
			
 
				 
			
 
				 // Main nav items with their section keys
			
@@ -26,6 +27,7 @@ const mainNavItems = [
 
				 	{ label: "Collaborate", href: "/collaborate", sectionKey: "collaborate" },
			
 
				 	{ label: "Automate", href: "/automate", sectionKey: "automate" },
			
 
				 	{ label: "Deploy & Secure", href: "/deploy-secure", sectionKey: "deploy-secure" },
			
 
				+	{ label: "AI Gateway", href: "/gateway", sectionKey: "gateway" },
			
 
				 	{ label: "Contributing", href: "/contributing", sectionKey: "contributing" },
			
 
				 ]
			
 
				 
			
--- a/apps/kilocode-docs/components/TopNav.tsx
+++ b/apps/kilocode-docs/components/TopNav.tsx
@@ -30,6 +30,7 @@ const mainNavItems: NavItem[] = [
 
				 	{ label: "Collaborate", href: "/collaborate" },
			
 
				 	{ label: "Automate", href: "/automate" },
			
 
				 	{ label: "Deploy & Secure", href: "/deploy-secure" },
			
 
				+	{ label: "Kilo Gateway", href: "/gateway" },
			
 
				 	{ label: "Contributing", href: "/contributing" },
			
 
				 ]
			
 
				 
			
@@ -331,6 +332,16 @@ export function TopNav({ onMobileMenuToggle, isMobileMenuOpen = false, showMobil
 
				 				</div>
			
 
				 			</div>
			
 
				 
			
 
				+			{/* Announcement banner */}
			
 
				+			<div className="announcement-banner">
			
 
				+				<p>
			
 
				+					We're{" "}
			
 
				+					<Link href="https://blog.kilo.ai/p/kilo-cli">replatforming our extensions on the new Kilo CLI</Link>
			
 
				+					. Contribute to the new CLI and pre-release extensions at{" "}
			
 
				+					<Link href="https://github.com/Kilo-Org/kilo">Kilo-Org/kilo</Link>.
			
 
				+				</p>
			
 
				+			</div>
			
 
				+
			
 
				 			<style jsx>{`
			
 
				 				.top-header {
			
 
				 					position: fixed;
			
@@ -517,6 +528,35 @@ export function TopNav({ onMobileMenuToggle, isMobileMenuOpen = false, showMobil
 
				 						gap: 0.5rem;
			
 
				 					}
			
 
				 				}
			
 
				+
			
 
				+				.announcement-banner {
			
 
				+					background: #1a1a18;
			
 
				+					color: #a3a3a2;
			
 
				+					padding: 0.5rem 1rem;
			
 
				+					text-align: center;
			
 
				+					font-size: 0.875rem;
			
 
				+					border-bottom: 1px solid #3f3f3f;
			
 
				+				}
			
 
				+
			
 
				+				.announcement-banner p {
			
 
				+					margin: 0;
			
 
				+				}
			
 
				+
			
 
				+				.announcement-banner :global(a) {
			
 
				+					color: #f8f674;
			
 
				+					text-decoration: underline;
			
 
				+					text-underline-offset: 2px;
			
 
				+				}
			
 
				+
			
 
				+				.announcement-banner :global(a:hover) {
			
 
				+					color: #ffff8d;
			
 
				+				}
			
 
				+
			
 
				+				@media (max-width: 768px) {
			
 
				+					.announcement-banner {
			
 
				+						font-size: 0.8rem;
			
 
				+					}
			
 
				+				}
			
 
				 			`}</style>
			
 
				 		</header>
			
 
				 	)
			
--- a/apps/kilocode-docs/docs/getting-started/devcontainer-persistence.md
+++ b/apps/kilocode-docs/docs/getting-started/devcontainer-persistence.md
@@ -0,0 +1,107 @@
 
				+---
			
 
				+title: Dev Container Persistence
			
 
				+description: How to preserve Kilo Code threads and settings in dev containers
			
 
				+---
			
 
				+
			
 
				+# Dev Container Persistence
			
 
				+
			
 
				+When using Kilo Code in development containers (VS Code Dev Containers, GitHub Codespaces, etc.), your threads and settings can persist across container rebuilds by properly configuring volume mounts.
			
 
				+
			
 
				+## Why Persistence Matters
			
 
				+
			
 
				+Dev containers are ephemeral by default - when you rebuild the container, all data is lost unless explicitly persisted. Kilo Code stores important data including:
			
 
				+
			
 
				+- **Conversation threads**: Your ongoing discussions with Kilo Code
			
 
				+- **Settings**: API configurations, custom modes, and preferences
			
 
				+- **Cache**: Vector store for code indexing and browser tool data
			
 
				+
			
 
				+## Required Configuration
			
 
				+
			
 
				+The Kilo Code dev container is pre-configured with named volumes to preserve your data. If you're setting up your own dev container, add these mounts to your `devcontainer.json`:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"name": "Your Project",
			
 
				+	"image": "mcr.microsoft.com/devcontainers/base:ubuntu",
			
 
				+	"mounts": [
			
 
				+		{
			
 
				+			"source": "kilocode-global-storage",
			
 
				+			"target": "/root/.vscode-remote/data/User/globalStorage/kilocode.kilo-code",
			
 
				+			"type": "volume"
			
 
				+		},
			
 
				+		{
			
 
				+			"source": "kilocode-settings",
			
 
				+			"target": "/root/.vscode-remote/data/User/settings",
			
 
				+			"type": "volume"
			
 
				+		}
			
 
				+	]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## Storage Locations
			
 
				+
			
 
				+| Data Type    | Container Path                                                            |
			
 
				+| ------------ | ------------------------------------------------------------------------- |
			
 
				+| Threads      | `/root/.vscode-remote/data/User/globalStorage/kilocode.kilo-code/tasks/`  |
			
 
				+| Settings     | `/root/.vscode-remote/data/User/settings/`                                |
			
 
				+| Cache        | `/root/.vscode-remote/data/User/globalStorage/kilocode.kilo-code/cache/`  |
			
 
				+| Vector Store | `/root/.vscode-remote/data/User/globalStorage/kilocode.kilo-code/vector/` |
			
 
				+
			
 
				+## Troubleshooting
			
 
				+
			
 
				+### Threads Don't Appear After Rebuild
			
 
				+
			
 
				+1. **Check volume attachment**: Ensure the dev container has the volumes attached
			
 
				+2. **Verify volume contents**: Check that the volume contains your data
			
 
				+3. **Rebuild with volumes**: Use `devcontainer rebuild` instead of `devcontainer up --rebuild`
			
 
				+
			
 
				+### Volumes Lost
			
 
				+
			
 
				+If named volumes are accidentally deleted:
			
 
				+
			
 
				+1. Threads cannot be automatically recovered
			
 
				+2. Start new conversations with Kilo Code
			
 
				+3. Consider implementing a backup strategy for important threads
			
 
				+
			
 
				+### Manual Backup
			
 
				+
			
 
				+To manually back up your threads:
			
 
				+
			
 
				+```bash
			
 
				+# Copy thread data from the container
			
 
				+docker cp <container-name>:/root/.vscode-remote/data/User/globalStorage/kilocode.kilo-code ./kilocode-backup
			
 
				+```
			
 
				+
			
 
				+## Custom Storage Path
			
 
				+
			
 
				+For advanced configurations, you can specify a custom storage path:
			
 
				+
			
 
				+1. Add a bind mount to your `devcontainer.json`:
			
 
				+
			
 
				+```json
			
 
				+"mounts": [
			
 
				+  {
			
 
				+    "source": "${localWorkspaceFolder}/.kilocode-data",
			
 
				+    "target": "/home/vscode/kilocode-data",
			
 
				+    "type": "bind"
			
 
				+  }
			
 
				+]
			
 
				+```
			
 
				+
			
 
				+2. Set the custom storage path in VS Code settings:
			
 
				+    - Open Settings (`Ctrl+,` or `Cmd+,`)
			
 
				+    - Search for "Kilo Code: Custom Storage Path"
			
 
				+    - Enter: `/home/vscode/kilocode-data`
			
 
				+
			
 
				+## Best Practices
			
 
				+
			
 
				+1. **Use named volumes** for automatic persistence
			
 
				+2. **Back up important threads** before major container changes
			
 
				+3. **Avoid deleting volumes** during cleanup
			
 
				+4. **Test persistence** by rebuilding and verifying threads remain
			
 
				+
			
 
				+## GitHub Codespaces
			
 
				+
			
 
				+GitHub Codespaces automatically persists your VS Code settings and extensions. For Kilo Code threads, the pre-configured dev container includes the necessary volume mounts.
			
 
				+
			
 
				+If using a custom Codespace configuration, ensure the mounts from the Required Configuration section are included.
			
--- a/apps/kilocode-docs/lib/nav/customize.ts
+++ b/apps/kilocode-docs/lib/nav/customize.ts
@@ -33,10 +33,15 @@ export const CustomizeNav: NavSection[] = [
 
				 				href: "/customize/context/codebase-indexing",
			
 
				 				children: "Codebase Indexing",
			
 
				 			},
			
 
				+			{
			
 
				+				href: "/customize/context/context-condensing",
			
 
				+				children: "Context Condensing",
			
 
				+			},
			
 
				 			{
			
 
				 				href: "/customize/context/kilocodeignore",
			
 
				 				children: ".kilocodeignore",
			
 
				 			},
			
 
				+			{ href: "/customize/context/memory-bank", children: "Memory Bank" },
			
 
				 			{
			
 
				 				href: "/customize/context/large-projects",
			
 
				 				children: "Large Projects",
			
--- a/apps/kilocode-docs/lib/nav/gateway.ts
+++ b/apps/kilocode-docs/lib/nav/gateway.ts
@@ -0,0 +1,32 @@
 
				+import { NavSection } from "../types"
			
 
				+
			
 
				+export const GatewayNav: NavSection[] = [
			
 
				+	{
			
 
				+		title: "Introduction",
			
 
				+		links: [
			
 
				+			{ href: "/gateway", children: "Overview" },
			
 
				+			{ href: "/gateway/quickstart", children: "Quickstart" },
			
 
				+		],
			
 
				+	},
			
 
				+	{
			
 
				+		title: "Configuration",
			
 
				+		links: [
			
 
				+			{ href: "/gateway/authentication", children: "Authentication" },
			
 
				+			{ href: "/gateway/models-and-providers", children: "Models & Providers" },
			
 
				+		],
			
 
				+	},
			
 
				+	{
			
 
				+		title: "Features",
			
 
				+		links: [
			
 
				+			{ href: "/gateway/streaming", children: "Streaming" },
			
 
				+			{ href: "/gateway/usage-and-billing", children: "Usage & Billing" },
			
 
				+		],
			
 
				+	},
			
 
				+	{
			
 
				+		title: "Reference",
			
 
				+		links: [
			
 
				+			{ href: "/gateway/api-reference", children: "API Reference" },
			
 
				+			{ href: "/gateway/sdks-and-frameworks", children: "SDKs & Frameworks" },
			
 
				+		],
			
 
				+	},
			
 
				+]
			
--- a/apps/kilocode-docs/lib/nav/index.ts
+++ b/apps/kilocode-docs/lib/nav/index.ts
@@ -5,6 +5,7 @@ import { CollaborateNav } from "./collaborate"
 
				 import { ContributingNav } from "./contributing"
			
 
				 import { CustomizeNav } from "./customize"
			
 
				 import { DeploySecureNav } from "./deploy-secure"
			
 
				+import { GatewayNav } from "./gateway"
			
 
				 import { GettingStartedNav } from "./getting-started"
			
 
				 import { ToolsNav } from "./tools"
			
 
				 
			
@@ -17,5 +18,6 @@ export const Nav = {
 
				 	DeploySecureNav,
			
 
				 	ContributingNav,
			
 
				 	AiProvidersNav,
			
 
				+	GatewayNav,
			
 
				 	ToolsNav,
			
 
				 }
			
--- a/apps/kilocode-docs/pages/_app.tsx
+++ b/apps/kilocode-docs/pages/_app.tsx
@@ -6,8 +6,16 @@ import posthog from "posthog-js"
 
				 import { CopyPageButton, SideNav, TableOfContents, TopNav } from "../components"
			
 
				 
			
 
				 import "prismjs"
			
 
				-// Import other Prism themes here
			
 
				 import "prismjs/components/prism-bash.min"
			
 
				+import "prismjs/components/prism-javascript.min"
			
 
				+import "prismjs/components/prism-typescript.min"
			
 
				+import "prismjs/components/prism-python.min"
			
 
				+import "prismjs/components/prism-json.min"
			
 
				+import "prismjs/components/prism-yaml.min"
			
 
				+import "prismjs/components/prism-go.min"
			
 
				+import "prismjs/components/prism-ruby.min"
			
 
				+import "prismjs/components/prism-markdown.min"
			
 
				+import "prismjs/components/prism-powershell.min"
			
 
				 import "prismjs/themes/prism.css"
			
 
				 
			
 
				 import "@docsearch/css"
			
--- a/apps/kilocode-docs/pages/ai-providers/kilocode.md
+++ b/apps/kilocode-docs/pages/ai-providers/kilocode.md
@@ -35,11 +35,13 @@ Kilo Code offers a streamlined registration that connects you directly to fronti
 
				 
			
 
				 Kilo Code provides access to the latest frontier coding models through its built-in provider. The specific models available are automatically updated and managed by the Kilo Code service, ensuring you always have access to the most capable models for coding tasks.
			
 
				 
			
 
				-## BYOK support
			
 
				+## Kilo Gateway integration
			
 
				 
			
 
				-You can also bring your own key (BYOK) to the Kilo Gateway. We currently support the following providers: Anthropic, OpenAI, Google AI Studio, MiniMax, Mistral AI, xAI, and Z.ai.
			
 
				+Kilo Code routes requests through the Kilo Gateway for model access, usage tracking, and organization controls. For BYOK setup, provider routing, and full model availability, use the Gateway docs as the source of truth:
			
 
				 
			
 
				-You can access the BYOK section of the Kilo Gateway [here](https://app.kilo.ai/byok).
			
 
				+- [Kilo Gateway overview](/docs/gateway)
			
 
				+- [Models & Providers](/docs/gateway/models-and-providers)
			
 
				+- [Authentication & BYOK](/docs/gateway/authentication)
			
 
				 
			
 
				 ## Configuration in Kilo Code
			
 
				 
			
@@ -49,25 +51,6 @@ Once you've completed the registration process, Kilo Code is automatically confi
 
				 2. **No API Key Management:** Your authentication is handled seamlessly through the registration process
			
 
				 3. **Model Selection:** Access to frontier models is provided automatically through your Kilo Code account
			
 
				 
			
 
				-### Provider Routing
			
 
				-
			
 
				-Kilo Code can route to many different inference providers. For personal accounts, provider routing behavior can be controlled in the API Provider settings under Provider Routing.
			
 
				-
			
 
				-#### Provider Sorting
			
 
				-
			
 
				-- Default provider sorting: at time of writing equivalent to prefer providers with lower price
			
 
				-- Prefer providers with lower price
			
 
				-- Prefer providers with higher throughput (i.e. more tokens per seconds)
			
 
				-- Prefer providers with lower latency (i.e. shorter time to first token)
			
 
				-- A specific provider preference can also be chosen.
			
 
				-
			
 
				-#### Data Policy
			
 
				-
			
 
				-- Allow prompt training (free only): providers that may train on your prompts or completions are only allowed for free models.
			
 
				-- Allow prompt training: providers that may train on your prompts or completions are allowed.
			
 
				-- Deny prompt training: providers that may train on your prompts or completions are not allowed.
			
 
				-- Zero data retention: only providers with a strict zero data retention policy are allowed. This option is not recommended, as it will disable many popular providers, such as Anthropic and OpenAI.
			
 
				-
			
 
				 ## Connected Accounts
			
 
				 
			
 
				 With the Kilo Code provider, if you sign up with Google you can also connect other sign in accounts - like GitHub - by:
			
@@ -77,7 +60,7 @@ With the Kilo Code provider, if you sign up with Google you can also connect oth
 
				 3. Under "Link a New account" select the type of account to link
			
 
				 4. Complete the OAuth authorization, and you'll see your connected accounts!
			
 
				 
			
 
				-<img src="/docs/img/kilo-provider/connected-accounts.png" alt="Connect account screen" width="600" />
			
 
				+<!-- <img src="/docs/img/kilo-provider/connected-accounts.png" alt="Connect account screen" width="600" /> -->
			
 
				 
			
 
				 ## Tips and Notes
			
 
				 
			
--- a/apps/kilocode-docs/pages/ai-providers/ollama.md
+++ b/apps/kilocode-docs/pages/ai-providers/ollama.md
@@ -8,8 +8,8 @@ Kilo Code supports running models locally using Ollama. This provides privacy, o
 
				 
			
 
				 **Website:** [https://ollama.com/](https://ollama.com/)
			
 
				 
			
 
				-<img src="/docs/img/providers/ollama-devstral-snake.png" alt="Vibe coding a Snake game using devstral" width="500" />
			
 
				-*Vibe coding a Snake game using devstral*
			
 
				+<!-- <image src="/docs/img/providers/ollama-devstral-snake.png" alt="Vibe coding a Snake game using devstral" width="500" />
			
 
				+*Vibe coding a Snake game using devstral* -->
			
 
				 
			
 
				 ## Managing Expectations
			
 
				 
			
@@ -72,7 +72,7 @@ By default, API requests time out after 10 minutes. Local models can be slow, if
 
				 
			
 
				 ### Configure Kilo Code
			
 
				 
			
 
				-- Open the Kilo Code sidebar (<img src="/docs/img/kilo-v1.svg" width="12" /> icon).
			
 
				+- Open the Kilo Code panel ({% kiloCodeIcon size="1em" /%}).
			
 
				 - Click the Settings gear icon ({% codicon name="gear" /%}).
			
 
				 - Select "Ollama" as the API Provider.
			
 
				 - Select the model configured in the previous step.
			
--- a/apps/kilocode-docs/pages/ai-providers/v0.md
+++ b/apps/kilocode-docs/pages/ai-providers/v0.md
@@ -24,7 +24,7 @@ Setting up v0 in Kilo Code is straightforward:
 
				     - Set the **Model ID**: `v0-1.0-md`
			
 
				     - Click **Verify** to confirm the connection
			
 
				 
			
 
				-<img src="/docs/img/providers/v0-setup.png" alt="v0 configuration in Kilo Code settings" width="600" />
			
 
				+<!-- <img src="/docs/img/providers/v0-setup.png" alt="v0 configuration in Kilo Code settings" width="600" /> -->
			
 
				 
			
 
				 ## Troubleshooting
			
 
				 
			
--- a/apps/kilocode-docs/pages/ai-providers/virtual-quota-fallback.md
+++ b/apps/kilocode-docs/pages/ai-providers/virtual-quota-fallback.md
@@ -26,7 +26,7 @@ Before configuring this provider, you must have at least one other API provider
 
				 1.  **Open Kilo Code Settings:** Click the gear icon ({% codicon name="gear" /%}) in the Kilo Code panel.
			
 
				 2.  **Select Provider:** Choose "Virtual Quota Fallback" from the "API Provider" dropdown. This will open its dedicated configuration panel.
			
 
				 
			
 
				-<img src="/docs/img/providers/virtualQuotaSelectDropdown.png" alt="virtuaQuotaFallback dropdown selection in Kilo Code settings" width="600" />
			
 
				+<!-- <img src="/docs/img/providers/virtualQuotaSelectDropdown.png" alt="virtuaQuotaFallback dropdown selection in Kilo Code settings" width="600" /> -->
			
 
				 
			
 
				 3.  **Add a Provider Profile:**
			
 
				 
			
@@ -46,7 +46,7 @@ Before configuring this provider, you must have at least one other API provider
 
				 
			
 
				 6.  **Add More Providers:** Repeat steps 3-5 to build your complete fallback chain. You can add as many profiles as you have configured.
			
 
				 
			
 
				-<img src="/docs/img/providers/virtualQuotaFullConfig.png" alt="virtuaQuotaFallback configuration in Kilo Code settings" width="600" />
			
 
				+<!-- <img src="/docs/img/providers/virtualQuotaFullConfig.png" alt="virtuaQuotaFallback configuration in Kilo Code settings" width="600" /> -->
			
 
				 
			
 
				 ## Usage Monitoring
			
 
				 
			
--- a/apps/kilocode-docs/pages/automate/extending/shell-integration.md
+++ b/apps/kilocode-docs/pages/automate/extending/shell-integration.md
@@ -31,7 +31,7 @@ Shell integration is built into Kilo Code and works automatically in most cases.
 
				 
			
 
				 ## Terminal Integration Settings
			
 
				 
			
 
				-Kilo Code provides several settings to fine-tune shell integration. Access these in the Kilo Code sidebar under Settings → Terminal.
			
 
				+Kilo Code provides several settings to fine-tune shell integration. Access these in the Kilo Code panel under Settings → Terminal.
			
 
				 
			
 
				 ### Basic Settings
			
 
				 
			
--- a/apps/kilocode-docs/pages/code-with-ai/features/browser-use.md
+++ b/apps/kilocode-docs/pages/code-with-ai/features/browser-use.md
@@ -37,8 +37,6 @@ For example:
 
				 - `Can you check if my website at https://kilocode.ai is displaying correctly?`
			
 
				 - `Browse http://localhost:3000, scroll down to the bottom of the page and check if the footer information is displaying correctly.`
			
 
				 
			
 
				-{% image src="/docs/img/browser-use/KiloCodeBrowser.png" alt="Browser use example" width="300" /%}
			
 
				-
			
 
				 ## How Browser Actions Work
			
 
				 
			
 
				 The browser_action tool controls a browser instance that returns screenshots and console logs after each action, allowing you to see the results of interactions.
			
@@ -78,8 +76,6 @@ To change Browser / Computer Use settings in Kilo:
 
				 
			
 
				 1. Open Settings by clicking the gear icon {% codicon name="gear" /%} → Browser / Computer Use
			
 
				 
			
 
				-    {% image src="/docs/img/browser-use/browser-use.png" alt="Browser settings menu" width="600" /%}
			
 
				-
			
 
				 ### Enable/Disable Browser Use
			
 
				 
			
 
				 **Purpose**: Master toggle that enables Kilo to interact with websites using a Puppeteer-controlled browser.
			
@@ -88,8 +84,6 @@ To change this setting:
 
				 
			
 
				 1. Check or uncheck the "Enable browser tool" checkbox within your Browser / Computer Use settings
			
 
				 
			
 
				-    {% image src="/docs/img/browser-use/browser-use-2.png" alt="Enable browser tool setting" width="300" /%}
			
 
				-
			
 
				 ### Viewport Size
			
 
				 
			
 
				 **Purpose**: Determines the resolution of the browser session Kilo Code uses.
			
@@ -106,8 +100,6 @@ To change this setting:
 
				     - Mobile (360x640)
			
 
				 3. Select your desired resolution.
			
 
				 
			
 
				-    {% image src="/docs/img/browser-use/browser-use-3.png" alt="Viewport size setting" width="600" /%}
			
 
				-
			
 
				 ### Screenshot Quality
			
 
				 
			
 
				 **Purpose**: Controls the WebP compression quality of browser screenshots.
			
@@ -124,8 +116,6 @@ To change this setting:
 
				     - 60-70%: Balanced for most general browsing
			
 
				     - 80%+: Use when fine visual details are critical
			
 
				 
			
 
				-    {% image src="/docs/img/browser-use/browser-use-4.png" alt="Screenshot quality setting" width="600" /%}
			
 
				-
			
 
				 ### Remote Browser Connection
			
 
				 
			
 
				 **Purpose**: Connect Kilo to an existing Chrome browser instead of using the built-in browser.
			
@@ -144,8 +134,6 @@ To enable this feature:
 
				 1. Check the "Use remote browser connection" box in Browser / Computer Use settings
			
 
				 2. Click "Test Connection" to verify
			
 
				 
			
 
				-    {% image src="/docs/img/browser-use/browser-use-5.png" alt="Remote browser connection setting" width="600" /%}
			
 
				-
			
 
				 #### Common Use Cases
			
 
				 
			
 
				 - **DevContainers**: Connect from containerized VS Code to host Chrome browser
			
--- a/apps/kilocode-docs/pages/code-with-ai/platforms/cli.md
+++ b/apps/kilocode-docs/pages/code-with-ai/platforms/cli.md
@@ -276,6 +276,80 @@ Any directory allowed here inherits the same defaults as the current workspace.
 
				 }
			
 
				 ```
			
 
				 
			
 
				+**Aliases:** `/t` and `/history` can be used as shorthand for `/tasks`
			
 
				+
			
 
				+## Configuration
			
 
				+
			
 
				+The Kilo CLI is a fork of [OpenCode](https://opencode.ai) and supports the same configuration options. For comprehensive configuration documentation, see the [OpenCode Config documentation](https://opencode.ai/docs/config).
			
 
				+
			
 
				+### Config File Location
			
 
				+
			
 
				+| Scope      | Path                                |
			
 
				+| ---------- | ----------------------------------- |
			
 
				+| **Global** | `~/.config/kilocode/kilocode.json`  |
			
 
				+| **Project**| `./kilocode.json` (in project root) |
			
 
				+
			
 
				+Project-level configuration takes precedence over global settings.
			
 
				+
			
 
				+### Key Configuration Options
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "$schema": "https://opencode.ai/config.json",
			
 
				+  "model": "anthropic/claude-sonnet-4-20250514",
			
 
				+  "provider": {
			
 
				+    "anthropic": {
			
 
				+      "options": {
			
 
				+        "apiKey": "{env:ANTHROPIC_API_KEY}"
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+Common configuration options include:
			
 
				+
			
 
				+- **`model`** - Default model to use
			
 
				+- **`provider`** - Provider-specific settings (API keys, base URLs, custom models)
			
 
				+- **`mcp`** - MCP server configuration
			
 
				+- **`permission`** - Tool permission settings (`allow` or `ask`)
			
 
				+- **`instructions`** - Paths to instruction files (e.g., `["CONTRIBUTING.md", ".cursor/rules/*.md"]`)
			
 
				+- **`formatter`** - Code formatter configuration
			
 
				+- **`disabled_providers`** / **`enabled_providers`** - Control which providers are available
			
 
				+
			
 
				+### Environment Variables
			
 
				+
			
 
				+Use `{env:VARIABLE_NAME}` syntax in config files to reference environment variables:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "provider": {
			
 
				+    "openai": {
			
 
				+      "options": {
			
 
				+        "apiKey": "{env:OPENAI_API_KEY}"
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+For full details on all configuration options including compaction, file watchers, plugins, and experimental features, see the [OpenCode Config documentation](https://opencode.ai/docs/config).
			
 
				+
			
 
				+## Config reference for providers
			
 
				+
			
 
				+Kilo gives you the ability to bring your own keys for a number of model providers and AI gateways, like OpenRouter and Vercel AI Gateway. Each provider has unique configuration options and some let you set environment variables.
			
 
				+
			
 
				+You can reference the [Provider Configuration Guide](https://github.com/Kilo-Org/kilocode/blob/main/cli/docs/PROVIDER_CONFIGURATION.md) for examples if you want to edit .config files manually. You can also run:
			
 
				+
			
 
				+`kilocode config`
			
 
				+
			
 
				+to complete configuration with an interactive workflow on the command line.
			
 
				+
			
 
				+{% callout type="tip" %}
			
 
				+You can also use the `/config` slash command during an interactive session, which is equivalent to running `kilocode config`.
			
 
				+{% /callout %}
			
 
				+
			
 
				+## Parallel mode
			
 
				 ### Available Permissions
			
 
				 
			
 
				 Permissions are keyed by tool name, plus a couple of safety guards:
			
--- a/apps/kilocode-docs/pages/collaborate/adoption-dashboard/overview.md
+++ b/apps/kilocode-docs/pages/collaborate/adoption-dashboard/overview.md
@@ -7,7 +7,7 @@ description: "AI Adoption Dashboard overview"
 
				 
			
 
				 The AI Adoption Dashboard helps engineering leaders understand how deeply and consistently their teams are using AI across development workflows. It provides a single **AI Adoption Score** (0–100) that quantifies organizational AI maturity, plus detailed breakdowns by dimension.
			
 
				 
			
 
				-<img src="/docs/img/ai-adoption/adoption-dashboard.png" alt="Monthly view of the AI Adoption Dashboard"  />
			
 
				+<!-- <img src="/docs/img/ai-adoption/adoption-dashboard.png" alt="Monthly view of the AI Adoption Dashboard"  /> -->
			
 
				 
			
 
				 ## Who Is It For?
			
 
				 
			
@@ -33,7 +33,7 @@ The dashboard prominently displays your current AI Adoption Score as a percentag
 
				 
			
 
				 ### Timeline Visualization
			
 
				 
			
 
				-<img src="/docs/img/ai-adoption/stacked-chart.png" alt="Monthly view of the AI Adoption Dashboard" width="300"  />
			
 
				+<!-- <img src="/docs/img/ai-adoption/stacked-chart.png" alt="Monthly view of the AI Adoption Dashboard" width="300"  /> -->
			
 
				 
			
 
				 A stacked bar chart shows your daily adoption scores over time. The chart uses three colors representing the score's dimensions:
			
 
				 
			
--- a/apps/kilocode-docs/pages/collaborate/adoption-dashboard/understanding-your-score.md
+++ b/apps/kilocode-docs/pages/collaborate/adoption-dashboard/understanding-your-score.md
@@ -17,7 +17,7 @@ Your total score is calculated from three weighted dimensions:
 
				 
			
 
				 This dimension measures the regularity of AI tool usage across your team, normalized per-user and blended across the organization.
			
 
				 
			
 
				-<img src="/docs/img/ai-adoption/frequency.png" alt="Frequency view of the AI Adoption Dashboard" width="600" />
			
 
				+<!-- <img src="/docs/img/ai-adoption/frequency.png" alt="Frequency view of the AI Adoption Dashboard" width="600" /> -->
			
 
				 
			
 
				 **Signals measured:**
			
 
				 
			
@@ -34,7 +34,7 @@ This dimension measures the regularity of AI tool usage across your team, normal
 
				 
			
 
				 This dimension captures trust and dependency—whether AI is a side tool or an integral part of how your team ships code.
			
 
				 
			
 
				-<img src="/docs/img/ai-adoption/depth.png" alt="Depth view of the AI Adoption Dashboard" width="600" />
			
 
				+<!-- <img src="/docs/img/ai-adoption/depth.png" alt="Depth view of the AI Adoption Dashboard" width="600" /> -->
			
 
				 
			
 
				 **Signals measured:**
			
 
				 
			
@@ -52,7 +52,7 @@ This dimension captures trust and dependency—whether AI is a side tool or an i
 
				 
			
 
				 This dimension captures reach and rollout—how many team members are using AI and how consistently throughout the week.
			
 
				 
			
 
				-<img src="/docs/img/ai-adoption/coverage.png" alt="Coverage view of the AI Adoption Dashboard" width="600" />
			
 
				+<!-- <img src="/docs/img/ai-adoption/coverage.png" alt="Coverage view of the AI Adoption Dashboard" width="600" /> -->
			
 
				 
			
 
				 **Signals measured:**
			
 
				 
			
--- a/apps/kilocode-docs/pages/collaborate/enterprise/audit-logs.md
+++ b/apps/kilocode-docs/pages/collaborate/enterprise/audit-logs.md
@@ -16,7 +16,7 @@ Only **Owners** can view and filter through logs.
 
				 Go to **Enterprise Dashboard → Audit Logs** to view a searchable history of all organization events.
			
 
				 Use filters to narrow down results by action, user, or date range.
			
 
				 
			
 
				-<img width="900" height="551" alt="Audit-log-dashboard" src="https://github.com/user-attachments/assets/41fcf43f-4a47-4f47-a3d9-02d20a6427a6" />
			
 
				+{% image width="900" height="551" alt="Audit-log-dashboard" src="https://github.com/user-attachments/assets/41fcf43f-4a47-4f47-a3d9-02d20a6427a6" /%}
			
 
				 
			
 
				 ## Filters
			
 
				 
			
--- a/apps/kilocode-docs/pages/collaborate/enterprise/model-access-controls.md
+++ b/apps/kilocode-docs/pages/collaborate/enterprise/model-access-controls.md
@@ -14,7 +14,7 @@ Admins can **enable or disable** specific models, filter by attributes, and enfo
 
				 2. Toggle the checkbox beside any model or provider to enable or disable access.
			
 
				 3. Click "Save Changes" to apply
			
 
				 
			
 
				-<img width="800" alt="Model-Access-Select" src="https://github.com/user-attachments/assets/af71353d-facc-4d4b-a0cd-c7f2cea73e97" />
			
 
				+{% image width="800" alt="Model-Access-Select" src="https://github.com/user-attachments/assets/af71353d-facc-4d4b-a0cd-c7f2cea73e97" /%}
			
 
				 
			
 
				 ## Filtering Models
			
 
				 
			
--- a/apps/kilocode-docs/pages/collaborate/enterprise/sso.md
+++ b/apps/kilocode-docs/pages/collaborate/enterprise/sso.md
@@ -30,7 +30,7 @@ You’ll need:
 
				 ### 1. Open [Organization](https://app.kilo.ai/organizations) Dashboard
			
 
				 
			
 
				 Find the Single Sign-On (SSO) Configuration panel, and click "Set up SSO":
			
 
				-<img width="822" height="288" alt="Set-up-SSO screen" src="https://github.com/user-attachments/assets/b6ca5f83-4533-4d41-bcb1-0038b645c030" />
			
 
				+{% image width="822" height="288" alt="Set-up-SSO screen" src="https://github.com/user-attachments/assets/b6ca5f83-4533-4d41-bcb1-0038b645c030" /%}
			
 
				 
			
 
				 ### 2. Submit the SSO Request Form
			
 
				 
			
--- a/apps/kilocode-docs/pages/collaborate/teams/dashboard.md
+++ b/apps/kilocode-docs/pages/collaborate/teams/dashboard.md
@@ -7,9 +7,7 @@ description: "Manage your team from the Kilo Code dashboard"
 
				 
			
 
				 The Kilo seats dashboard is the first screen that comes up when you visit [the Kilo Code web app](https://app.kilo.ai). It provides complete visibility into your team's AI usage, costs, and management.
			
 
				 
			
 
				-<center>
			
 
				-<img src="/docs/img/teams/dashboard.png" alt="Invite your team members" width="700" />
			
 
				-</center>
			
 
				+{% image src="/docs/img/teams/dashboard.png" alt="Invite your team members" width="700" /%}
			
 
				 
			
 
				 ## Dashboard Navigation
			
 
				 
			
--- a/apps/kilocode-docs/pages/collaborate/teams/getting-started.md
+++ b/apps/kilocode-docs/pages/collaborate/teams/getting-started.md
@@ -23,7 +23,7 @@ seats for Kilo in the Teams or Enterprise subscription brings transparent AI cod
 
				     - Note: We recommend starting with your GitHub account rather than a personal Google account, but we can change it later.
			
 
				 3. Click **Organizations** in the left sidebar and then **Create New Organization**
			
 
				 
			
 
				-<img src="/docs/img/teams/create-team.png" alt="Create new organization button" width="600" />
			
 
				+{% image src="/docs/img/teams/create-team.png" alt="Create new organization button" width="600" /%}
			
 
				 
			
 
				 ### Step 2: Subscribe to Teams or Enterprise
			
 
				 
			
@@ -31,7 +31,7 @@ seats for Kilo in the Teams or Enterprise subscription brings transparent AI cod
 
				 2. Select your initial seat count and tier (Teams or Enterprise)
			
 
				 3. Complete checkout process
			
 
				 
			
 
				-<img src="/docs/img/teams/subscribe.png" alt="Create your organization and subscribe" width="600" />
			
 
				+{% image src="/docs/img/teams/subscribe.png" alt="Create your organization and subscribe" width="600" /%}
			
 
				 
			
 
				 ### Step 3: Invite Your Team
			
 
				 
			
@@ -43,7 +43,7 @@ seats for Kilo in the Teams or Enterprise subscription brings transparent AI cod
 
				     - **Admin** - Team management without billing
			
 
				     - **Member** - Standard usage access
			
 
				 
			
 
				-<img src="/docs/img/teams/invite-member.png" alt="Invite your team members" width="600" />
			
 
				+{% image src="/docs/img/teams/invite-member.png" alt="Invite your team members" width="600" /%}
			
 
				 
			
 
				 ### Step 4: Team Members Install Extension
			
 
				 
			
@@ -61,7 +61,7 @@ Team members receive invitation emails with these steps:
 
				 - **Transparent billing** - see exactly what each request costs
			
 
				 - **Team analytics** - understand usage patterns and optimization opportunities
			
 
				 
			
 
				-<img src="/docs/img/teams/usage-details.png" alt="Team usage details page" width="600" />
			
 
				+{% image src="/docs/img/teams/usage-details.png" alt="Team usage details page" width="600" /%}
			
 
				 
			
 
				 ## First Steps for Your Team
			
 
				 
			
--- a/apps/kilocode-docs/pages/contributing/development-environment.md
+++ b/apps/kilocode-docs/pages/contributing/development-environment.md
@@ -100,7 +100,7 @@ In development mode (NODE_ENV="development"), changing the core code will trigge
 
				 > 2. Kill any npm tasks running in the background (see screenshot below)
			
 
				 > 3. Start debugging again
			
 
				 
			
 
				-<img width="600" alt="Stopping background tasks" src="https://github.com/user-attachments/assets/466fb76e-664d-4066-a3f2-0df4d57dd9a4" />
			
 
				+{% image src="https://github.com/user-attachments/assets/466fb76e-664d-4066-a3f2-0df4d57dd9a4" alt="Stopping background tasks" width="600" /%}
			
 
				 
			
 
				 ### Installing the Built Extension
			
 
				 
			
--- a/apps/kilocode-docs/pages/customize/context/codebase-indexing.md
+++ b/apps/kilocode-docs/pages/customize/context/codebase-indexing.md
@@ -193,6 +193,12 @@ The indexer automatically excludes:
 
				 - **Dependencies**: Requires external services (embedding provider + Qdrant)
			
 
				 - **Language Coverage**: Limited to Tree-sitter supported languages for optimal parsing
			
 
				 
			
 
				+## Troubleshooting
			
 
				+
			
 
				+### Embeddings fail or indexing stalls (llama.cpp / Ollama)
			
 
				+
			
 
				+If your local embedding server is based on llama.cpp (including Ollama), indexing can fail with errors about `n_ubatch` or `GGML_ASSERT`. Ensure both batch size (`-b`) and micro-batch size (`-ub`) are set to the same value for embedding models, then restart the server. For Ollama, configure `num_batch` in your Modelfile or request options to match the same effective value.
			
 
				+
			
 
				 ## Using the Search Feature
			
 
				 
			
 
				 Once indexed, Kilo Code can use the [`codebase_search`](/docs/automate/tools/codebase-search) tool to find relevant code:
			
--- a/apps/kilocode-docs/pages/customize/context/context-condensing.md
+++ b/apps/kilocode-docs/pages/customize/context/context-condensing.md
@@ -0,0 +1,102 @@
 
				+---
			
 
				+title: "Context Condensing"
			
 
				+description: "Manage conversation context to optimize token usage and maintain long sessions"
			
 
				+---
			
 
				+
			
 
				+# Context Condensing
			
 
				+
			
 
				+## Overview
			
 
				+
			
 
				+When working on complex tasks, conversations with Kilo Code can grow long and consume a significant portion of the AI model's context window. **Context Condensing** is a feature that intelligently summarizes your conversation history, reducing token usage while preserving the essential information needed to continue your work effectively.
			
 
				+
			
 
				+## The Problem: Context Window Limits
			
 
				+
			
 
				+Every AI model has a maximum context window - a limit on how much text it can process at once. As your conversation grows with code snippets, file contents, and back-and-forth discussions, you may approach this limit. When this happens, you might experience:
			
 
				+
			
 
				+- Slower responses as the model processes more tokens
			
 
				+- Higher API costs due to increased token usage
			
 
				+- Eventually hitting the context limit and being unable to continue
			
 
				+
			
 
				+## The Solution: Intelligent Condensing
			
 
				+
			
 
				+Context Condensing solves this problem by creating a concise summary of your conversation that captures:
			
 
				+
			
 
				+- The original task or goal
			
 
				+- Key decisions made during the session
			
 
				+- Important code changes and their context
			
 
				+- Current progress and next steps
			
 
				+
			
 
				+This summary replaces the detailed conversation history, freeing up context window space while maintaining continuity in your work.
			
 
				+
			
 
				+## How Context Condensing Works
			
 
				+
			
 
				+### Automatic Triggering
			
 
				+
			
 
				+Kilo Code monitors your context usage and may suggest condensing when you approach the context window limit. You'll see a notification indicating that condensing is recommended.
			
 
				+
			
 
				+### Manual Condensing
			
 
				+
			
 
				+You can also trigger context condensing manually at any time using:
			
 
				+
			
 
				+- **Chat Command**: Type `/condense` in the chat
			
 
				+- **Settings**: Access condensing options through the Context Condensing settings
			
 
				+
			
 
				+### The Condensing Process
			
 
				+
			
 
				+When condensing is triggered:
			
 
				+
			
 
				+1. **Analysis**: Kilo Code analyzes the entire conversation history
			
 
				+2. **Summarization**: A summary is generated using the configured API, capturing essential context
			
 
				+3. **Replacement**: The detailed history is replaced with the condensed summary
			
 
				+4. **Continuation**: You can continue working with the freed-up context space
			
 
				+
			
 
				+## Configuration Options
			
 
				+
			
 
				+### API Configuration
			
 
				+
			
 
				+Context Condensing uses an AI model to generate summaries. You can configure which API to use for condensing operations:
			
 
				+
			
 
				+- Use the same API as your main coding assistant
			
 
				+- Configure a separate, potentially more cost-effective API for condensing
			
 
				+
			
 
				+### Profile-Specific Settings
			
 
				+
			
 
				+You can configure context condensing thresholds and behavior on a per-profile basis, allowing different settings for different projects or use cases.
			
 
				+
			
 
				+## Best Practices
			
 
				+
			
 
				+### When to Condense
			
 
				+
			
 
				+- **Long sessions**: If you've been working for an extended period on a complex task
			
 
				+- **Before major transitions**: When switching to a different aspect of your project
			
 
				+- **When prompted**: When Kilo Code suggests condensing due to context limits
			
 
				+
			
 
				+### Maintaining Context Quality
			
 
				+
			
 
				+- **Be specific in your initial task**: A clear task description helps create better summaries
			
 
				+- **Use Memory Bank**: Combine with [Memory Bank](/docs/customize/context/memory-bank) for persistent project context that doesn't need to be condensed
			
 
				+- **Review the summary**: After condensing, the summary is visible in your chat history
			
 
				+
			
 
				+## Troubleshooting
			
 
				+
			
 
				+### Context Condensing Error
			
 
				+
			
 
				+If you see a "Context Condensing Error" message:
			
 
				+
			
 
				+- Check your API configuration and ensure it's valid
			
 
				+- Verify you have sufficient credits or API quota
			
 
				+- Try using a different API for condensing operations
			
 
				+
			
 
				+### Summary Quality
			
 
				+
			
 
				+If the condensed summary doesn't capture important details:
			
 
				+
			
 
				+- Consider condensing earlier, before the conversation becomes too long
			
 
				+- Use clear, specific language when describing your tasks
			
 
				+- Important context can be reinforced after condensing by reminding Kilo Code of key details
			
 
				+
			
 
				+## Related Features
			
 
				+
			
 
				+- [Memory Bank](/docs/customize/context/memory-bank) - Persistent context storage across sessions
			
 
				+- [Large Projects](/docs/customize/context/large-projects) - Managing context for large codebases
			
 
				+- [Codebase Indexing](/docs/customize/context/codebase-indexing) - Efficient code search and retrieval
			
--- a/apps/kilocode-docs/pages/customize/custom-modes.md
+++ b/apps/kilocode-docs/pages/customize/custom-modes.md
@@ -62,7 +62,9 @@ Easily share, back up, and template your custom modes. This feature lets you exp
 
				 
			
 
				 **Exporting a Mode:**
			
 
				 
			
 
				-1. Navigate to the Modes view
			
 
				+Modes are accessed from the Prompts tab (notebook icon), which contains the Modes section.
			
 
				+
			
 
				+1. Open the Prompts Tab (click the <Codicon name="notebook" /> icon in the top menu bar)
			
 
				 2. Select the mode you wish to export
			
 
				 3. Click the Export Mode button (download icon)
			
 
				 4. Choose a location to save the `.yaml` file
			
@@ -70,9 +72,10 @@ Easily share, back up, and template your custom modes. This feature lets you exp
 
				 
			
 
				 **Importing a Mode:**
			
 
				 
			
 
				-1. Click the Import Mode button (upload icon) in the Modes view
			
 
				-2. Select the mode's YAML file
			
 
				-3. Choose the import level:
			
 
				+1. Open the Prompts Tab (click the <Codicon name="notebook" /> icon in the top menu bar)
			
 
				+2. Click the Import Mode button (upload icon)
			
 
				+3. Select the mode's YAML file
			
 
				+4. Choose the import level:
			
 
				     - **Project:** Available only in current workspace (saved to `.kilocodemodes` file)
			
 
				     - **Global:** Available in all projects (saved to global settings)
			
 
				 
			
--- a/apps/kilocode-docs/pages/customize/custom-rules.md
+++ b/apps/kilocode-docs/pages/customize/custom-rules.md
@@ -103,7 +103,7 @@ When both generic rules and mode-specific rules exist, the mode-specific rules a
 
				 
			
 
				 ### Using the UI Interface
			
 
				 
			
 
				-<img src="/docs/img/custom-rules/rules-ui.png" alt="Rules tab in Kilo Code" width="400" />
			
 
				+{% image src="/docs/img/custom-rules/rules-ui.png" alt="Rules tab in Kilo Code" width="400" /%}
			
 
				 
			
 
				 The easiest way to create and manage rules is through the built-in UI:
			
 
				 
			
@@ -158,7 +158,7 @@ Files in the list contain sensitive data, they MUST NOT be read
 
				 
			
 
				 This rule prevents the AI from reading or accessing sensitive files, even if explicitly requested to do so.
			
 
				 
			
 
				-<img src="/docs/img/custom-rules/custom-rules.png" alt="Kilo Code ignores request to read sensitive file" width="600" />
			
 
				+{% image src="/docs/img/custom-rules/custom-rules.png" alt="Kilo Code ignores request to read sensitive file" width="600" /%}
			
 
				 
			
 
				 ## Use Cases
			
 
				 
			
--- a/apps/kilocode-docs/pages/customize/index.md
+++ b/apps/kilocode-docs/pages/customize/index.md
@@ -1,4 +1,4 @@
 
				----
			
 
				+---
			
 
				 title: "Customize"
			
 
				 description: "Make Kilo Code work your way with custom modes, rules, instructions, and more"
			
 
				 ---
			
@@ -13,20 +13,22 @@ Kilo Code is highly customizable. Tailor its behavior to match your workflow, te
 
				 
			
 
				 Configure how Kilo Code behaves and responds:
			
 
				 
			
 
				-- [**Custom Modes**](/docs/customize/custom-modes) — Create specialized modes for different tasks (code review, documentation, testing, etc.)
			
 
				-- [**Custom Rules**](/docs/customize/custom-rules) — Define rules that apply to specific file types or situations
			
 
				-- [**Custom Instructions**](/docs/customize/custom-instructions) — Add project-specific guidelines and context
			
 
				-- [**agents.md**](/docs/customize/agents-md) — Configure agent behavior at the project level
			
 
				-- [**Workflows**](/docs/customize/workflows) — Automate multi-step processes
			
 
				-- [**Skills**](/docs/customize/skills) — Extend Kilo's capabilities with reusable skill definitions
			
 
				-- [**Prompt Engineering**](/docs/customize/prompt-engineering) — Write effective prompts for better results
			
 
				+- [**Custom Modes**](/docs/customize/custom-modes) - Create specialized modes for different tasks (code review, documentation, testing, etc.)
			
 
				+- [**Custom Rules**](/docs/customize/custom-rules) - Define rules that apply to specific file types or situations
			
 
				+- [**Custom Instructions**](/docs/customize/custom-instructions) - Add project-specific guidelines and context
			
 
				+- [**agents.md**](/docs/customize/agents-md) - Configure agent behavior at the project level
			
 
				+- [**Workflows**](/docs/customize/workflows) - Automate multi-step processes
			
 
				+- [**Skills**](/docs/customize/skills) - Extend Kilo's capabilities with reusable skill definitions
			
 
				+- [**Prompt Engineering**](/docs/customize/prompt-engineering) - Write effective prompts for better results
			
 
				 
			
 
				 ## Context & Indexing
			
 
				 
			
 
				 Help Kilo understand your codebase better:
			
 
				 
			
 
				-- [**Codebase Indexing**](/docs/customize/context/codebase-indexing) — Build a semantic index of your code for better context awareness
			
 
				-- [**Large Projects**](/docs/customize/context/large-projects) — Best practices for working with monorepos and large codebases
			
 
				+- [**Codebase Indexing**](/docs/customize/context/codebase-indexing) - Build a semantic index of your code for better context awareness
			
 
				+- [**Context Condensing**](/docs/customize/context/context-condensing) - Summarize older context to stay within limits
			
 
				+- [**Memory Bank**](/docs/customize/context/memory-bank) - Store project context, decisions, and important information
			
 
				+- [**Large Projects**](/docs/customize/context/large-projects) - Best practices for working with monorepos and large codebases
			
 
				 
			
 
				 ## Getting Started
			
 
				 
			
@@ -47,3 +49,6 @@ New to customization? Here's where to start:
 
				 - Check out [**Code with AI**](/docs/code-with-ai) to learn how to use Kilo effectively
			
 
				 - Explore [**Automate**](/docs/automate) for CI/CD integration and advanced automation
			
 
				 - Learn about [**Collaboration**](/docs/collaborate) features for teams
			
 
				+
			
 
				+
			
 
				+
			
--- a/apps/kilocode-docs/pages/deploy-secure/deploy.md
+++ b/apps/kilocode-docs/pages/deploy-secure/deploy.md
@@ -48,7 +48,7 @@ Enable the **GitHub Integration** before deploying:
 
				 - Choose **GitHub** in the Integration dropdown
			
 
				 - Select your repository and branch
			
 
				 
			
 
				-<img width="600" height="443" alt="DeploySelection" src="https://github.com/user-attachments/assets/e592a7c1-a2dd-42e3-ba5d-d86d9b61001f" />
			
 
				+{% image width="600" height="443" alt="DeploySelection" src="https://github.com/user-attachments/assets/e592a7c1-a2dd-42e3-ba5d-d86d9b61001f" /%}
			
 
				 
			
 
				 ### 3. Click **Deploy**
			
 
				 
			
@@ -61,7 +61,7 @@ Kilo Code will:
 
				 
			
 
				 Once complete, you’ll receive a **deployment URL** you can open or share.
			
 
				 
			
 
				-<img width="800" height="824" alt="DeploySuccess" src="https://github.com/user-attachments/assets/4a01ad52-1783-443f-9f9e-bfc2d4b77b43" />
			
 
				+{% image width="800" height="824" alt="DeploySuccess" src="https://github.com/user-attachments/assets/4a01ad52-1783-443f-9f9e-bfc2d4b77b43" /%}
			
 
				 
			
 
				 ---
			
 
				 
			
--- a/apps/kilocode-docs/pages/gateway/api-reference.md
+++ b/apps/kilocode-docs/pages/gateway/api-reference.md
@@ -0,0 +1,364 @@
 
				+---
			
 
				+title: "API Reference"
			
 
				+description: "Complete API reference for the Kilo AI Gateway, including chat completions, FIM completions, and model listing endpoints."
			
 
				+---
			
 
				+
			
 
				+# API Reference
			
 
				+
			
 
				+The Kilo AI Gateway provides an OpenAI-compatible API. All endpoints use the base URL:
			
 
				+
			
 
				+```
			
 
				+https://api.kilo.ai/api/gateway
			
 
				+```
			
 
				+
			
 
				+## Chat completions
			
 
				+
			
 
				+Create a chat completion. This is the primary endpoint for interacting with AI models.
			
 
				+
			
 
				+```
			
 
				+POST /chat/completions
			
 
				+```
			
 
				+
			
 
				+### Request body
			
 
				+
			
 
				+```typescript
			
 
				+type ChatCompletionRequest = {
			
 
				+	// Required
			
 
				+	model: string // Model ID (e.g., "anthropic/claude-sonnet-4.5")
			
 
				+	messages: Message[] // Array of conversation messages
			
 
				+
			
 
				+	// Streaming
			
 
				+	stream?: boolean // Enable SSE streaming (default: false)
			
 
				+
			
 
				+	// Generation parameters
			
 
				+	max_tokens?: number // Maximum tokens to generate
			
 
				+	temperature?: number // Sampling temperature (0-2)
			
 
				+	top_p?: number // Nucleus sampling (0-1)
			
 
				+	stop?: string | string[] // Stop sequences
			
 
				+	frequency_penalty?: number // Frequency penalty (-2 to 2)
			
 
				+	presence_penalty?: number // Presence penalty (-2 to 2)
			
 
				+
			
 
				+	// Tool calling
			
 
				+	tools?: Tool[] // Available tools/functions
			
 
				+	tool_choice?: ToolChoice // Tool selection strategy
			
 
				+
			
 
				+	// Structured output
			
 
				+	response_format?: ResponseFormat
			
 
				+
			
 
				+	// Other
			
 
				+	user?: string // End-user identifier for safety
			
 
				+	seed?: number // Deterministic sampling seed
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Message types
			
 
				+
			
 
				+```typescript
			
 
				+type Message =
			
 
				+	| { role: "system"; content: string }
			
 
				+	| { role: "user"; content: string | ContentPart[] }
			
 
				+	| { role: "assistant"; content: string | null; tool_calls?: ToolCall[] }
			
 
				+	| { role: "tool"; content: string; tool_call_id: string }
			
 
				+
			
 
				+type ContentPart = { type: "text"; text: string } | { type: "image_url"; image_url: { url: string; detail?: string } }
			
 
				+
			
 
				+type Tool = {
			
 
				+	type: "function"
			
 
				+	function: {
			
 
				+		name: string
			
 
				+		description?: string
			
 
				+		parameters: object // JSON Schema
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+type ToolChoice = "none" | "auto" | "required" | { type: "function"; function: { name: string } }
			
 
				+```
			
 
				+
			
 
				+### Response (non-streaming)
			
 
				+
			
 
				+```typescript
			
 
				+type ChatCompletionResponse = {
			
 
				+	id: string
			
 
				+	object: "chat.completion"
			
 
				+	created: number
			
 
				+	model: string
			
 
				+	choices: Array<{
			
 
				+		index: number
			
 
				+		message: {
			
 
				+			role: "assistant"
			
 
				+			content: string | null
			
 
				+			tool_calls?: ToolCall[]
			
 
				+		}
			
 
				+		finish_reason: "stop" | "length" | "tool_calls" | "content_filter"
			
 
				+	}>
			
 
				+	usage: {
			
 
				+		prompt_tokens: number
			
 
				+		completion_tokens: number
			
 
				+		total_tokens: number
			
 
				+	}
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Response (streaming)
			
 
				+
			
 
				+When `stream: true`, the response is a series of SSE events:
			
 
				+
			
 
				+```typescript
			
 
				+type ChatCompletionChunk = {
			
 
				+	id: string
			
 
				+	object: "chat.completion.chunk"
			
 
				+	created: number
			
 
				+	model: string
			
 
				+	choices: Array<{
			
 
				+		index: number
			
 
				+		delta: {
			
 
				+			role?: "assistant"
			
 
				+			content?: string
			
 
				+			tool_calls?: ToolCall[]
			
 
				+		}
			
 
				+		finish_reason: string | null
			
 
				+	}>
			
 
				+	// Only in the final chunk
			
 
				+	usage?: {
			
 
				+		prompt_tokens: number
			
 
				+		completion_tokens: number
			
 
				+		total_tokens: number
			
 
				+	}
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Example request
			
 
				+
			
 
				+```bash
			
 
				+curl -X POST "https://api.kilo.ai/api/gateway/chat/completions" \
			
 
				+  -H "Authorization: Bearer $KILO_API_KEY" \
			
 
				+  -H "Content-Type: application/json" \
			
 
				+  -d '{
			
 
				+    "model": "anthropic/claude-sonnet-4.5",
			
 
				+    "messages": [
			
 
				+      {"role": "system", "content": "You are a helpful assistant."},
			
 
				+      {"role": "user", "content": "What is quantum computing?"}
			
 
				+    ],
			
 
				+    "max_tokens": 500,
			
 
				+    "temperature": 0.7
			
 
				+  }'
			
 
				+```
			
 
				+
			
 
				+### Example response
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"id": "gen-abc123",
			
 
				+	"object": "chat.completion",
			
 
				+	"created": 1739000000,
			
 
				+	"model": "anthropic/claude-sonnet-4.5",
			
 
				+	"choices": [
			
 
				+		{
			
 
				+			"index": 0,
			
 
				+			"message": {
			
 
				+				"role": "assistant",
			
 
				+				"content": "Quantum computing is a type of computation that uses quantum mechanics..."
			
 
				+			},
			
 
				+			"finish_reason": "stop"
			
 
				+		}
			
 
				+	],
			
 
				+	"usage": {
			
 
				+		"prompt_tokens": 25,
			
 
				+		"completion_tokens": 150,
			
 
				+		"total_tokens": 175
			
 
				+	}
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## Tool calling
			
 
				+
			
 
				+The gateway supports function/tool calling with automatic repair for common issues like duplicate tool calls and orphan cleanup.
			
 
				+
			
 
				+### Request with tools
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"model": "anthropic/claude-sonnet-4.5",
			
 
				+	"messages": [{ "role": "user", "content": "What's the weather in San Francisco?" }],
			
 
				+	"tools": [
			
 
				+		{
			
 
				+			"type": "function",
			
 
				+			"function": {
			
 
				+				"name": "get_weather",
			
 
				+				"description": "Get the current weather for a location",
			
 
				+				"parameters": {
			
 
				+					"type": "object",
			
 
				+					"properties": {
			
 
				+						"location": {
			
 
				+							"type": "string",
			
 
				+							"description": "City name"
			
 
				+						}
			
 
				+					},
			
 
				+					"required": ["location"]
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	],
			
 
				+	"tool_choice": "auto"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Tool call response
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"choices": [
			
 
				+		{
			
 
				+			"message": {
			
 
				+				"role": "assistant",
			
 
				+				"content": null,
			
 
				+				"tool_calls": [
			
 
				+					{
			
 
				+						"id": "call_abc123",
			
 
				+						"type": "function",
			
 
				+						"function": {
			
 
				+							"name": "get_weather",
			
 
				+							"arguments": "{\"location\":\"San Francisco\"}"
			
 
				+						}
			
 
				+					}
			
 
				+				]
			
 
				+			},
			
 
				+			"finish_reason": "tool_calls"
			
 
				+		}
			
 
				+	]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Tool call repair
			
 
				+
			
 
				+The gateway automatically handles common tool calling issues:
			
 
				+
			
 
				+- **Deduplication**: Removes duplicate tool calls with the same ID
			
 
				+- **Orphan cleanup**: Removes tool result messages without matching tool calls
			
 
				+- **Missing results**: Inserts placeholder results for tool calls without responses
			
 
				+- **ID normalization**: Normalizes tool call IDs per provider requirements (Anthropic, Mistral)
			
 
				+
			
 
				+## FIM completions
			
 
				+
			
 
				+Fill-in-the-middle completions for code generation, powered by Mistral Codestral.
			
 
				+
			
 
				+```
			
 
				+POST /api/fim/completions
			
 
				+```
			
 
				+
			
 
				+### Request body
			
 
				+
			
 
				+```typescript
			
 
				+type FIMRequest = {
			
 
				+	model: string // Must be a Mistral model (e.g., "mistralai/codestral-2508")
			
 
				+	prompt: string // Code before the cursor
			
 
				+	suffix?: string // Code after the cursor
			
 
				+	max_tokens?: number // Maximum tokens (capped at 1000)
			
 
				+	temperature?: number
			
 
				+	stop?: string[]
			
 
				+	stream?: boolean
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Example request
			
 
				+
			
 
				+```bash
			
 
				+curl -X POST "https://api.kilo.ai/api/fim/completions" \
			
 
				+  -H "Authorization: Bearer $KILO_API_KEY" \
			
 
				+  -H "Content-Type: application/json" \
			
 
				+  -d '{
			
 
				+    "model": "mistralai/codestral-2508",
			
 
				+    "prompt": "def fibonacci(n):\n    if n <= 1:\n        return n\n    ",
			
 
				+    "suffix": "\n\nprint(fibonacci(10))",
			
 
				+    "max_tokens": 200,
			
 
				+    "stream": false
			
 
				+  }'
			
 
				+```
			
 
				+
			
 
				+{% callout type="info" %}
			
 
				+FIM completions are limited to Mistral models (model IDs starting with `mistralai/`). BYOK is supported with the `codestral` key type.
			
 
				+{% /callout %}
			
 
				+
			
 
				+## List models
			
 
				+
			
 
				+Retrieve the list of available models.
			
 
				+
			
 
				+```
			
 
				+GET /models
			
 
				+```
			
 
				+
			
 
				+No authentication required.
			
 
				+
			
 
				+### Response
			
 
				+
			
 
				+Returns an OpenAI-compatible model list:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"data": [
			
 
				+		{
			
 
				+			"id": "anthropic/claude-sonnet-4.5",
			
 
				+			"object": "model",
			
 
				+			"created": 1739000000,
			
 
				+			"owned_by": "anthropic",
			
 
				+			"name": "Claude Sonnet 4.5",
			
 
				+			"context_length": 200000,
			
 
				+			"pricing": {
			
 
				+				"prompt": "0.000003",
			
 
				+				"completion": "0.000015"
			
 
				+			}
			
 
				+		}
			
 
				+	]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## List providers
			
 
				+
			
 
				+Retrieve the list of available providers.
			
 
				+
			
 
				+```
			
 
				+GET /providers
			
 
				+```
			
 
				+
			
 
				+No authentication required.
			
 
				+
			
 
				+## Error codes
			
 
				+
			
 
				+| HTTP Status | Description                                             |
			
 
				+| ----------- | ------------------------------------------------------- |
			
 
				+| 400         | Bad request -- invalid parameters or model ID           |
			
 
				+| 401         | Unauthorized -- invalid or missing API key              |
			
 
				+| 402         | Insufficient balance -- add credits to continue         |
			
 
				+| 403         | Forbidden -- model not allowed by organization policy   |
			
 
				+| 429         | Rate limited -- too many requests                       |
			
 
				+| 500         | Internal server error                                   |
			
 
				+| 502         | Provider error -- upstream provider returned an error   |
			
 
				+| 503         | Service unavailable -- provider temporarily unavailable |
			
 
				+
			
 
				+### Error response format
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"error": {
			
 
				+		"message": "Human-readable error description",
			
 
				+		"code": 400
			
 
				+	}
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+{% callout type="info" %}
			
 
				+When the gateway receives a 402 (Payment Required) from an upstream provider, it returns 503 to the client to avoid exposing internal billing details.
			
 
				+{% /callout %}
			
 
				+
			
 
				+### Context length errors
			
 
				+
			
 
				+If your request exceeds the model's context window, you'll receive a descriptive error:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"error": {
			
 
				+		"message": "This request exceeds the model's context window of 200000 tokens. Your request contains approximately 250000 tokens.",
			
 
				+		"code": 400
			
 
				+	}
			
 
				+}
			
 
				+```
			
--- a/apps/kilocode-docs/pages/gateway/authentication.md
+++ b/apps/kilocode-docs/pages/gateway/authentication.md
@@ -0,0 +1,121 @@
 
				+---
			
 
				+title: "Authentication"
			
 
				+description: "Learn how to authenticate with the Kilo AI Gateway using API keys, session tokens, and Bring Your Own Key (BYOK)."
			
 
				+---
			
 
				+
			
 
				+# Authentication
			
 
				+
			
 
				+The Kilo AI Gateway supports multiple authentication methods depending on your use case.
			
 
				+
			
 
				+## API key authentication
			
 
				+
			
 
				+The primary authentication method is a Bearer token passed in the `Authorization` header:
			
 
				+
			
 
				+```bash
			
 
				+Authorization: Bearer <your_api_key>
			
 
				+```
			
 
				+
			
 
				+API keys are JWT tokens tied to your Kilo account. You can generate them from the [Kilo dashboard](https://app.kilo.ai).
			
 
				+
			
 
				+### Using your API key
			
 
				+
			
 
				+{% tabs %}
			
 
				+{% tab label="TypeScript" %}
			
 
				+
			
 
				+```typescript
			
 
				+import { createOpenAI } from "@ai-sdk/openai"
			
 
				+
			
 
				+const kilo = createOpenAI({
			
 
				+	baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+	apiKey: process.env.KILO_API_KEY,
			
 
				+})
			
 
				+```
			
 
				+
			
 
				+{% /tab %}
			
 
				+{% tab label="Python" %}
			
 
				+
			
 
				+```python
			
 
				+from openai import OpenAI
			
 
				+
			
 
				+client = OpenAI(
			
 
				+    api_key=os.getenv("KILO_API_KEY"),
			
 
				+    base_url="https://api.kilo.ai/api/gateway",
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+{% /tab %}
			
 
				+{% tab label="cURL" %}
			
 
				+
			
 
				+```bash
			
 
				+curl -X POST "https://api.kilo.ai/api/gateway/chat/completions" \
			
 
				+  -H "Authorization: Bearer $KILO_API_KEY" \
			
 
				+  -H "Content-Type: application/json" \
			
 
				+  -d '{"model": "anthropic/claude-sonnet-4.5", "messages": [{"role": "user", "content": "Hello"}]}'
			
 
				+```
			
 
				+
			
 
				+{% /tab %}
			
 
				+{% /tabs %}
			
 
				+
			
 
				+## Organization tokens
			
 
				+
			
 
				+When making requests on behalf of an organization, include the organization ID in the request header:
			
 
				+
			
 
				+```
			
 
				+X-KiloCode-OrganizationId: your_org_id
			
 
				+```
			
 
				+
			
 
				+Organization tokens are scoped with a 15-minute expiry and enforce the organization's policies, including model allow lists, provider restrictions, and per-user spending limits.
			
 
				+
			
 
				+## Anonymous access
			
 
				+
			
 
				+The gateway allows unauthenticated access for free models only. Anonymous requests are identified by IP address and are subject to rate limiting (200 requests per hour per IP).
			
 
				+
			
 
				+Free models include models tagged with `:free` in their model ID, such as `minimax/minimax-m2.1:free` and `z-ai/glm-5:free`.
			
 
				+
			
 
				+## Bring Your Own Key (BYOK)
			
 
				+
			
 
				+BYOK lets you use your own provider API keys with the Kilo AI Gateway. When a BYOK key is configured, the gateway routes requests through Vercel AI Gateway using your key. You are billed directly by the provider -- Kilo does not add any markup.
			
 
				+
			
 
				+### Supported BYOK providers
			
 
				+
			
 
				+| Provider         | BYOK Key ID |
			
 
				+| ---------------- | ----------- |
			
 
				+| Anthropic        | `anthropic` |
			
 
				+| OpenAI           | `openai`    |
			
 
				+| Google AI Studio | `google`    |
			
 
				+| Mistral          | `mistral`   |
			
 
				+| MiniMax          | `minimax`   |
			
 
				+| xAI              | `xai`       |
			
 
				+| Z.AI             | `zai`       |
			
 
				+| Codestral (FIM)  | `codestral` |
			
 
				+
			
 
				+### How BYOK works
			
 
				+
			
 
				+1. Add your provider API key in the Kilo dashboard or through your Kilo Code extension settings
			
 
				+2. Keys are encrypted at rest using AES encryption
			
 
				+3. When you make a request for a model from that provider, the gateway automatically uses your key
			
 
				+4. Usage is tracked but not billed to your Kilo balance (cost is set to $0)
			
 
				+5. If your BYOK key fails, the request will not automatically fall back to Kilo's keys
			
 
				+
			
 
				+### BYOK routing
			
 
				+
			
 
				+When a BYOK key is detected, the request is routed through Vercel AI Gateway with your credentials:
			
 
				+
			
 
				+```
			
 
				+Client → Kilo Gateway → Vercel AI Gateway (with your key) → Provider
			
 
				+```
			
 
				+
			
 
				+This provides the benefit of Vercel's reliability infrastructure while using your own billing relationship with the provider.
			
 
				+
			
 
				+## Request headers
			
 
				+
			
 
				+The gateway accepts the following headers:
			
 
				+
			
 
				+| Header                      | Required                | Description                                  |
			
 
				+| --------------------------- | ----------------------- | -------------------------------------------- |
			
 
				+| `Authorization`             | Yes (unless free model) | `Bearer <api_key>`                           |
			
 
				+| `Content-Type`              | Yes                     | `application/json`                           |
			
 
				+| `X-KiloCode-OrganizationId` | No                      | Organization context for org-scoped requests |
			
 
				+| `X-KiloCode-TaskId`         | No                      | Task identifier for prompt cache keying      |
			
 
				+| `X-KiloCode-Version`        | No                      | Client version string                        |
			
 
				+| `x-kilocode-mode`           | No                      | Mode hint for `kilo/auto` model routing      |
			
--- a/apps/kilocode-docs/pages/gateway/index.md
+++ b/apps/kilocode-docs/pages/gateway/index.md
@@ -0,0 +1,54 @@
 
				+---
			
 
				+title: "AI Gateway"
			
 
				+description: "A unified API to access hundreds of AI models through a single endpoint, with built-in usage tracking, BYOK support, and organization controls."
			
 
				+---
			
 
				+
			
 
				+# AI Gateway
			
 
				+
			
 
				+The Kilo AI Gateway provides a unified, OpenAI-compatible API to access hundreds of AI models through a single endpoint at `https://api.kilo.ai/api/gateway`. It gives you the ability to track usage, manage costs, bring your own API keys, and enforce organization-level controls.
			
 
				+
			
 
				+The gateway works seamlessly with the [Vercel AI SDK](https://ai-sdk.dev), the [OpenAI SDK](/docs/gateway/sdks-and-frameworks#openai-sdk), or any OpenAI-compatible client in any language.
			
 
				+
			
 
				+## Key features
			
 
				+
			
 
				+- **One key, hundreds of models**: Access models from Anthropic, OpenAI, Google, xAI, Mistral, MiniMax, and more with a single API key
			
 
				+- **OpenAI-compatible API**: Drop-in replacement for OpenAI's `/chat/completions` endpoint -- switch models by changing a single string
			
 
				+- **Streaming support**: Full Server-Sent Events (SSE) streaming with time-to-first-token tracking
			
 
				+- **BYOK (Bring Your Own Key)**: Use your own provider API keys with encrypted-at-rest storage
			
 
				+- **Usage tracking**: Per-request cost and token tracking with microdollar precision
			
 
				+- **Organization controls**: Model allow lists, provider restrictions, per-user daily spending limits, and balance management
			
 
				+- **Tool calling**: Robust function/tool calling with automatic repair for deduplication and orphan cleanup
			
 
				+- **FIM completions**: Fill-in-the-middle code completions via Mistral Codestral
			
 
				+
			
 
				+```typescript
			
 
				+import { streamText } from "ai"
			
 
				+import { createOpenAI } from "@ai-sdk/openai"
			
 
				+
			
 
				+const kilo = createOpenAI({
			
 
				+	baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+	apiKey: process.env.KILO_API_KEY,
			
 
				+})
			
 
				+
			
 
				+const result = streamText({
			
 
				+	model: kilo("anthropic/claude-sonnet-4.5"),
			
 
				+	prompt: "Why is the sky blue?",
			
 
				+})
			
 
				+```
			
 
				+
			
 
				+## Base URL
			
 
				+
			
 
				+All gateway API requests use the following base URL:
			
 
				+
			
 
				+```
			
 
				+https://api.kilo.ai/api/gateway
			
 
				+```
			
 
				+
			
 
				+## More resources
			
 
				+
			
 
				+- [Quickstart](/docs/gateway/quickstart) -- Get up and running in minutes
			
 
				+- [Authentication](/docs/gateway/authentication) -- API keys, sessions, and BYOK
			
 
				+- [Models & Providers](/docs/gateway/models-and-providers) -- Available models and routing behavior
			
 
				+- [Streaming](/docs/gateway/streaming) -- Real-time SSE streaming
			
 
				+- [API Reference](/docs/gateway/api-reference) -- Full request/response schemas
			
 
				+- [Usage & Billing](/docs/gateway/usage-and-billing) -- Cost tracking and organization controls
			
 
				+- [SDKs & Frameworks](/docs/gateway/sdks-and-frameworks) -- Integration guides for popular SDKs
			
--- a/apps/kilocode-docs/pages/gateway/models-and-providers.md
+++ b/apps/kilocode-docs/pages/gateway/models-and-providers.md
@@ -0,0 +1,155 @@
 
				+---
			
 
				+title: "Models & Providers"
			
 
				+description: "Learn about the AI models and providers available through the Kilo AI Gateway, including model IDs, routing behavior, and provider-specific features."
			
 
				+---
			
 
				+
			
 
				+# Models & Providers
			
 
				+
			
 
				+The Kilo AI Gateway provides access to hundreds of AI models from multiple providers through a single unified API. You can switch between models by changing the model ID string -- no code changes required.
			
 
				+
			
 
				+## Specifying a model
			
 
				+
			
 
				+Models are identified using the format `provider/model-name`. Pass this as the `model` parameter in your request:
			
 
				+
			
 
				+```typescript
			
 
				+const result = streamText({
			
 
				+	model: kilo("anthropic/claude-sonnet-4.5"),
			
 
				+	prompt: "Hello!",
			
 
				+})
			
 
				+```
			
 
				+
			
 
				+Or in a raw API request:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"model": "anthropic/claude-sonnet-4.5",
			
 
				+	"messages": [{ "role": "user", "content": "Hello!" }]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## Available models
			
 
				+
			
 
				+You can browse the full list of available models via the models endpoint:
			
 
				+
			
 
				+```
			
 
				+GET https://api.kilo.ai/api/gateway/models
			
 
				+```
			
 
				+
			
 
				+This returns model information including pricing, context window, and supported features. No authentication is required.
			
 
				+
			
 
				+### Popular models
			
 
				+
			
 
				+| Model ID                        | Provider  | Description                                     |
			
 
				+| ------------------------------- | --------- | ----------------------------------------------- |
			
 
				+| `anthropic/claude-opus-4.6`     | Anthropic | Most capable Claude model for complex reasoning |
			
 
				+| `anthropic/claude-sonnet-4.5`   | Anthropic | Balanced performance and cost                   |
			
 
				+| `anthropic/claude-haiku-4.5`    | Anthropic | Fast and cost-effective                         |
			
 
				+| `openai/gpt-5.2`                | OpenAI    | Latest GPT model                                |
			
 
				+| `google/gemini-3-pro-preview`   | Google    | Advanced reasoning with 1M context              |
			
 
				+| `google/gemini-3-flash-preview` | Google    | Fast and efficient                              |
			
 
				+| `x-ai/grok-code-fast-1`         | xAI       | Optimized for code tasks                        |
			
 
				+| `moonshotai/kimi-k2.5`          | Moonshot  | Strong multilingual model                       |
			
 
				+
			
 
				+### Free models
			
 
				+
			
 
				+Several models are available at no cost, subject to rate limits:
			
 
				+
			
 
				+| Model ID                              | Description               |
			
 
				+| ------------------------------------- | ------------------------- |
			
 
				+| `minimax/minimax-m2.1:free`           | MiniMax M2.1              |
			
 
				+| `z-ai/glm-5:free`                     | Z.AI GLM-5                |
			
 
				+| `giga-potato`                         | Community model           |
			
 
				+| `corethink:free`                      | CoreThink reasoning model |
			
 
				+| `arcee-ai/trinity-large-preview:free` | Arcee Trinity             |
			
 
				+
			
 
				+Free models are available to both authenticated and anonymous users. Anonymous users are rate-limited to 200 requests per hour per IP address.
			
 
				+
			
 
				+## The `kilo/auto` model
			
 
				+
			
 
				+The `kilo/auto` virtual model automatically selects the best model based on the task type. The selection is controlled by the `x-kilocode-mode` request header:
			
 
				+
			
 
				+| Mode                                                           | Resolved Model                |
			
 
				+| -------------------------------------------------------------- | ----------------------------- |
			
 
				+| `plan`, `general`, `architect`, `orchestrator`, `ask`, `debug` | `anthropic/claude-opus-4.6`   |
			
 
				+| `build`, `explore`, `code`                                     | `anthropic/claude-sonnet-4.5` |
			
 
				+| Default (no mode specified)                                    | `anthropic/claude-sonnet-4.5` |
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"model": "kilo/auto",
			
 
				+	"messages": [{ "role": "user", "content": "Help me design a database schema" }]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+With the mode header:
			
 
				+
			
 
				+```bash
			
 
				+curl -X POST "https://api.kilo.ai/api/gateway/chat/completions" \
			
 
				+  -H "Authorization: Bearer $KILO_API_KEY" \
			
 
				+  -H "x-kilocode-mode: plan" \
			
 
				+  -H "Content-Type: application/json" \
			
 
				+  -d '{"model": "kilo/auto", "messages": [{"role": "user", "content": "Design a database schema"}]}'
			
 
				+```
			
 
				+
			
 
				+## Providers
			
 
				+
			
 
				+The gateway routes requests to the appropriate provider based on the model and your configuration:
			
 
				+
			
 
				+| Provider          | Slug         | Description                         |
			
 
				+| ----------------- | ------------ | ----------------------------------- |
			
 
				+| OpenRouter        | `openrouter` | Primary gateway for most models     |
			
 
				+| Vercel AI Gateway | `vercel`     | BYOK routing and select A/B testing |
			
 
				+| Mistral           | `mistral`    | FIM completions (Codestral)         |
			
 
				+| xAI               | `x-ai`       | Grok models (direct)                |
			
 
				+| MiniMax           | `minimax`    | MiniMax models (direct)             |
			
 
				+| CoreThink         | `corethink`  | CoreThink reasoning model           |
			
 
				+| Inception         | `inception`  | InceptionLabs models                |
			
 
				+| Martian           | `martian`    | Optimized xAI models                |
			
 
				+| StreamLake        | `streamlake` | KAT-Coder models                    |
			
 
				+
			
 
				+## Provider routing
			
 
				+
			
 
				+The gateway uses the following priority for routing requests:
			
 
				+
			
 
				+1. **BYOK check**: If you have a BYOK key for the model's provider, the request is routed through Vercel AI Gateway using your key
			
 
				+2. **Free model routing**: If the model is a Kilo-hosted free model, it's routed to its designated provider
			
 
				+3. **Default routing**: All other requests go through OpenRouter
			
 
				+
			
 
				+### Preferred inference providers
			
 
				+
			
 
				+For models available through multiple providers, the gateway may use a preferred provider for better performance:
			
 
				+
			
 
				+| Model Family     | Preferred Provider   |
			
 
				+| ---------------- | -------------------- |
			
 
				+| Anthropic models | Amazon Bedrock       |
			
 
				+| MiniMax models   | MiniMax (direct)     |
			
 
				+| Mistral models   | Mistral (direct)     |
			
 
				+| Moonshot models  | Moonshot AI (direct) |
			
 
				+
			
 
				+These preferences are sent as hints to OpenRouter, which may override them based on availability and load.
			
 
				+
			
 
				+## Listing models
			
 
				+
			
 
				+### Models endpoint
			
 
				+
			
 
				+```
			
 
				+GET https://api.kilo.ai/api/gateway/models
			
 
				+```
			
 
				+
			
 
				+Returns an OpenAI-compatible list of all available models with metadata including pricing, context window, and capabilities.
			
 
				+
			
 
				+### Providers endpoint
			
 
				+
			
 
				+```
			
 
				+GET https://api.kilo.ai/api/gateway/providers
			
 
				+```
			
 
				+
			
 
				+Returns a list of all available inference providers.
			
 
				+
			
 
				+### Models by provider
			
 
				+
			
 
				+```
			
 
				+GET https://api.kilo.ai/api/gateway/models-by-provider
			
 
				+```
			
 
				+
			
 
				+Returns models grouped by their provider, useful for building model selection interfaces.
			
--- a/apps/kilocode-docs/pages/gateway/quickstart.md
+++ b/apps/kilocode-docs/pages/gateway/quickstart.md
@@ -0,0 +1,151 @@
 
				+---
			
 
				+title: "Quickstart"
			
 
				+description: "Get started with the Kilo AI Gateway in minutes. Make your first AI model request using the Vercel AI SDK, OpenAI SDK, Python, or cURL."
			
 
				+---
			
 
				+
			
 
				+# Quickstart
			
 
				+
			
 
				+This guide walks you through making your first AI model request with the Kilo AI Gateway. While this guide focuses on the [Vercel AI SDK](https://ai-sdk.dev), you can also use the [OpenAI SDK](/docs/gateway/sdks-and-frameworks#openai-sdk), [Python](/docs/gateway/sdks-and-frameworks#python), or [cURL](/docs/gateway/sdks-and-frameworks#curl).
			
 
				+
			
 
				+## Prerequisites
			
 
				+
			
 
				+You need a Kilo account with API credits. Sign up at [kilo.ai](https://kilo.ai) and add credits from your account dashboard.
			
 
				+
			
 
				+## Using the Vercel AI SDK
			
 
				+
			
 
				+### 1. Create your project
			
 
				+
			
 
				+```bash
			
 
				+mkdir my-ai-app
			
 
				+cd my-ai-app
			
 
				+npm init -y
			
 
				+```
			
 
				+
			
 
				+### 2. Install dependencies
			
 
				+
			
 
				+```bash
			
 
				+npm install ai @ai-sdk/openai dotenv
			
 
				+```
			
 
				+
			
 
				+### 3. Set up your API key
			
 
				+
			
 
				+Create a `.env` file and add your Kilo API key:
			
 
				+
			
 
				+```bash
			
 
				+KILO_API_KEY=your_api_key_here
			
 
				+```
			
 
				+
			
 
				+You can get your API key from the [Kilo dashboard](https://app.kilo.ai).
			
 
				+
			
 
				+### 4. Create and run your script
			
 
				+
			
 
				+Create an `index.mjs` file:
			
 
				+
			
 
				+```javascript
			
 
				+import { streamText } from "ai"
			
 
				+import { createOpenAI } from "@ai-sdk/openai"
			
 
				+import "dotenv/config"
			
 
				+
			
 
				+const kilo = createOpenAI({
			
 
				+	baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+	apiKey: process.env.KILO_API_KEY,
			
 
				+})
			
 
				+
			
 
				+async function main() {
			
 
				+	const result = streamText({
			
 
				+		model: kilo("anthropic/claude-sonnet-4.5"),
			
 
				+		prompt: "Invent a new holiday and describe its traditions.",
			
 
				+	})
			
 
				+
			
 
				+	for await (const textPart of result.textStream) {
			
 
				+		process.stdout.write(textPart)
			
 
				+	}
			
 
				+
			
 
				+	console.log()
			
 
				+	console.log("Token usage:", await result.usage)
			
 
				+	console.log("Finish reason:", await result.finishReason)
			
 
				+}
			
 
				+
			
 
				+main().catch(console.error)
			
 
				+```
			
 
				+
			
 
				+Run the script:
			
 
				+
			
 
				+```bash
			
 
				+node index.mjs
			
 
				+```
			
 
				+
			
 
				+You should see the model's response streamed to your terminal.
			
 
				+
			
 
				+## Using the OpenAI SDK
			
 
				+
			
 
				+The Kilo AI Gateway is fully OpenAI-compatible, so you can use the OpenAI SDK by pointing it to the Kilo base URL.
			
 
				+
			
 
				+{% tabs %}
			
 
				+{% tab label="TypeScript" %}
			
 
				+
			
 
				+```typescript
			
 
				+import OpenAI from "openai"
			
 
				+
			
 
				+const client = new OpenAI({
			
 
				+	apiKey: process.env.KILO_API_KEY,
			
 
				+	baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+})
			
 
				+
			
 
				+const response = await client.chat.completions.create({
			
 
				+	model: "anthropic/claude-sonnet-4.5",
			
 
				+	messages: [{ role: "user", content: "Why is the sky blue?" }],
			
 
				+})
			
 
				+
			
 
				+console.log(response.choices[0].message.content)
			
 
				+```
			
 
				+
			
 
				+{% /tab %}
			
 
				+{% tab label="Python" %}
			
 
				+
			
 
				+```python
			
 
				+import os
			
 
				+from openai import OpenAI
			
 
				+
			
 
				+client = OpenAI(
			
 
				+    api_key=os.getenv("KILO_API_KEY"),
			
 
				+    base_url="https://api.kilo.ai/api/gateway",
			
 
				+)
			
 
				+
			
 
				+response = client.chat.completions.create(
			
 
				+    model="anthropic/claude-sonnet-4.5",
			
 
				+    messages=[
			
 
				+        {"role": "user", "content": "Why is the sky blue?"}
			
 
				+    ],
			
 
				+)
			
 
				+
			
 
				+print(response.choices[0].message.content)
			
 
				+```
			
 
				+
			
 
				+{% /tab %}
			
 
				+{% /tabs %}
			
 
				+
			
 
				+## Using cURL
			
 
				+
			
 
				+```bash
			
 
				+curl -X POST "https://api.kilo.ai/api/gateway/chat/completions" \
			
 
				+  -H "Authorization: Bearer $KILO_API_KEY" \
			
 
				+  -H "Content-Type: application/json" \
			
 
				+  -d '{
			
 
				+    "model": "anthropic/claude-sonnet-4.5",
			
 
				+    "messages": [
			
 
				+      {
			
 
				+        "role": "user",
			
 
				+        "content": "Why is the sky blue?"
			
 
				+      }
			
 
				+    ],
			
 
				+    "stream": false
			
 
				+  }'
			
 
				+```
			
 
				+
			
 
				+## Next steps
			
 
				+
			
 
				+- [Authentication](/docs/gateway/authentication) -- Learn about API key management and BYOK
			
 
				+- [Models & Providers](/docs/gateway/models-and-providers) -- Browse available models and understand routing
			
 
				+- [Streaming](/docs/gateway/streaming) -- Implement real-time streaming responses
			
 
				+- [API Reference](/docs/gateway/api-reference) -- Full request and response schemas
			
--- a/apps/kilocode-docs/pages/gateway/sdks-and-frameworks.md
+++ b/apps/kilocode-docs/pages/gateway/sdks-and-frameworks.md
@@ -0,0 +1,326 @@
 
				+---
			
 
				+title: "SDKs & Frameworks"
			
 
				+description: "Integrate with the Kilo AI Gateway using the Vercel AI SDK, OpenAI SDK, Python, cURL, or any OpenAI-compatible client."
			
 
				+---
			
 
				+
			
 
				+# SDKs & Frameworks
			
 
				+
			
 
				+The Kilo AI Gateway is OpenAI-compatible, meaning any SDK or framework that works with the OpenAI API can work with the Kilo Gateway by changing the base URL.
			
 
				+
			
 
				+## Vercel AI SDK (Recommended)
			
 
				+
			
 
				+The [Vercel AI SDK](https://ai-sdk.dev) provides a high-level TypeScript interface for building AI applications with streaming, tool calling, and structured output support.
			
 
				+
			
 
				+### Installation
			
 
				+
			
 
				+```bash
			
 
				+npm install ai @ai-sdk/openai
			
 
				+```
			
 
				+
			
 
				+### Basic usage
			
 
				+
			
 
				+```typescript
			
 
				+import { streamText } from "ai"
			
 
				+import { createOpenAI } from "@ai-sdk/openai"
			
 
				+
			
 
				+const kilo = createOpenAI({
			
 
				+	baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+	apiKey: process.env.KILO_API_KEY,
			
 
				+})
			
 
				+
			
 
				+const result = streamText({
			
 
				+	model: kilo("anthropic/claude-sonnet-4.5"),
			
 
				+	prompt: "Write a haiku about programming.",
			
 
				+})
			
 
				+
			
 
				+for await (const textPart of result.textStream) {
			
 
				+	process.stdout.write(textPart)
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### With tool calling
			
 
				+
			
 
				+```typescript
			
 
				+import { streamText, tool } from "ai"
			
 
				+import { createOpenAI } from "@ai-sdk/openai"
			
 
				+import { z } from "zod"
			
 
				+
			
 
				+const kilo = createOpenAI({
			
 
				+	baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+	apiKey: process.env.KILO_API_KEY,
			
 
				+})
			
 
				+
			
 
				+const result = streamText({
			
 
				+	model: kilo("anthropic/claude-sonnet-4.5"),
			
 
				+	prompt: "What is the weather in San Francisco?",
			
 
				+	tools: {
			
 
				+		getWeather: tool({
			
 
				+			description: "Get the current weather for a location",
			
 
				+			parameters: z.object({
			
 
				+				location: z.string().describe("City name"),
			
 
				+			}),
			
 
				+			execute: async ({ location }) => {
			
 
				+				return { temperature: 72, condition: "sunny" }
			
 
				+			},
			
 
				+		}),
			
 
				+	},
			
 
				+})
			
 
				+
			
 
				+for await (const textPart of result.textStream) {
			
 
				+	process.stdout.write(textPart)
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### In a Next.js API route
			
 
				+
			
 
				+```typescript
			
 
				+import { streamText } from "ai"
			
 
				+import { createOpenAI } from "@ai-sdk/openai"
			
 
				+
			
 
				+const kilo = createOpenAI({
			
 
				+	baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+	apiKey: process.env.KILO_API_KEY,
			
 
				+})
			
 
				+
			
 
				+export async function POST(request: Request) {
			
 
				+	const { messages } = await request.json()
			
 
				+
			
 
				+	const result = streamText({
			
 
				+		model: kilo("anthropic/claude-sonnet-4.5"),
			
 
				+		messages,
			
 
				+	})
			
 
				+
			
 
				+	return result.toDataStreamResponse()
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## OpenAI SDK
			
 
				+
			
 
				+The official OpenAI SDKs work with the Kilo Gateway by setting the base URL.
			
 
				+
			
 
				+### TypeScript / JavaScript
			
 
				+
			
 
				+```bash
			
 
				+npm install openai
			
 
				+```
			
 
				+
			
 
				+```typescript
			
 
				+import OpenAI from "openai"
			
 
				+
			
 
				+const client = new OpenAI({
			
 
				+	apiKey: process.env.KILO_API_KEY,
			
 
				+	baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+})
			
 
				+
			
 
				+// Non-streaming
			
 
				+const response = await client.chat.completions.create({
			
 
				+	model: "anthropic/claude-sonnet-4.5",
			
 
				+	messages: [
			
 
				+		{ role: "system", content: "You are a helpful assistant." },
			
 
				+		{ role: "user", content: "Explain quantum entanglement simply." },
			
 
				+	],
			
 
				+})
			
 
				+
			
 
				+console.log(response.choices[0].message.content)
			
 
				+
			
 
				+// Streaming
			
 
				+const stream = await client.chat.completions.create({
			
 
				+	model: "anthropic/claude-sonnet-4.5",
			
 
				+	messages: [{ role: "user", content: "Write a poem about the ocean." }],
			
 
				+	stream: true,
			
 
				+})
			
 
				+
			
 
				+for await (const chunk of stream) {
			
 
				+	const content = chunk.choices[0]?.delta?.content
			
 
				+	if (content) process.stdout.write(content)
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Python
			
 
				+
			
 
				+```bash
			
 
				+pip install openai
			
 
				+```
			
 
				+
			
 
				+```python
			
 
				+import os
			
 
				+from openai import OpenAI
			
 
				+
			
 
				+client = OpenAI(
			
 
				+    api_key=os.getenv("KILO_API_KEY"),
			
 
				+    base_url="https://api.kilo.ai/api/gateway",
			
 
				+)
			
 
				+
			
 
				+# Non-streaming
			
 
				+response = client.chat.completions.create(
			
 
				+    model="anthropic/claude-sonnet-4.5",
			
 
				+    messages=[
			
 
				+        {"role": "system", "content": "You are a helpful assistant."},
			
 
				+        {"role": "user", "content": "Explain quantum entanglement simply."},
			
 
				+    ],
			
 
				+)
			
 
				+
			
 
				+print(response.choices[0].message.content)
			
 
				+
			
 
				+# Streaming
			
 
				+stream = client.chat.completions.create(
			
 
				+    model="anthropic/claude-sonnet-4.5",
			
 
				+    messages=[
			
 
				+        {"role": "user", "content": "Write a poem about the ocean."},
			
 
				+    ],
			
 
				+    stream=True,
			
 
				+)
			
 
				+
			
 
				+for chunk in stream:
			
 
				+    content = chunk.choices[0].delta.content
			
 
				+    if content:
			
 
				+        print(content, end="", flush=True)
			
 
				+```
			
 
				+
			
 
				+## cURL
			
 
				+
			
 
				+### Non-streaming request
			
 
				+
			
 
				+```bash
			
 
				+curl -X POST "https://api.kilo.ai/api/gateway/chat/completions" \
			
 
				+  -H "Authorization: Bearer $KILO_API_KEY" \
			
 
				+  -H "Content-Type: application/json" \
			
 
				+  -d '{
			
 
				+    "model": "anthropic/claude-sonnet-4.5",
			
 
				+    "messages": [
			
 
				+      {"role": "user", "content": "What is the capital of France?"}
			
 
				+    ]
			
 
				+  }'
			
 
				+```
			
 
				+
			
 
				+### Streaming request
			
 
				+
			
 
				+```bash
			
 
				+curl -N -X POST "https://api.kilo.ai/api/gateway/chat/completions" \
			
 
				+  -H "Authorization: Bearer $KILO_API_KEY" \
			
 
				+  -H "Content-Type: application/json" \
			
 
				+  -d '{
			
 
				+    "model": "anthropic/claude-sonnet-4.5",
			
 
				+    "messages": [
			
 
				+      {"role": "user", "content": "Write a short story about AI."}
			
 
				+    ],
			
 
				+    "stream": true
			
 
				+  }'
			
 
				+```
			
 
				+
			
 
				+The `-N` flag disables buffering so you see tokens as they arrive.
			
 
				+
			
 
				+## Other languages
			
 
				+
			
 
				+Any HTTP client that can send JSON POST requests and set headers can use the gateway. Here are examples in other languages:
			
 
				+
			
 
				+### Go
			
 
				+
			
 
				+```go
			
 
				+package main
			
 
				+
			
 
				+import (
			
 
				+    "bytes"
			
 
				+    "encoding/json"
			
 
				+    "fmt"
			
 
				+    "io"
			
 
				+    "net/http"
			
 
				+    "os"
			
 
				+)
			
 
				+
			
 
				+func main() {
			
 
				+    body := map[string]interface{}{
			
 
				+        "model": "anthropic/claude-sonnet-4.5",
			
 
				+        "messages": []map[string]string{
			
 
				+            {"role": "user", "content": "Why is the sky blue?"},
			
 
				+        },
			
 
				+    }
			
 
				+
			
 
				+    jsonBody, _ := json.Marshal(body)
			
 
				+
			
 
				+    req, _ := http.NewRequest("POST",
			
 
				+        "https://api.kilo.ai/api/gateway/chat/completions",
			
 
				+        bytes.NewBuffer(jsonBody))
			
 
				+
			
 
				+    req.Header.Set("Authorization", "Bearer "+os.Getenv("KILO_API_KEY"))
			
 
				+    req.Header.Set("Content-Type", "application/json")
			
 
				+
			
 
				+    resp, err := http.DefaultClient.Do(req)
			
 
				+    if err != nil {
			
 
				+        panic(err)
			
 
				+    }
			
 
				+    defer resp.Body.Close()
			
 
				+
			
 
				+    respBody, _ := io.ReadAll(resp.Body)
			
 
				+    fmt.Println(string(respBody))
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Ruby
			
 
				+
			
 
				+```ruby
			
 
				+require 'net/http'
			
 
				+require 'json'
			
 
				+
			
 
				+uri = URI('https://api.kilo.ai/api/gateway/chat/completions')
			
 
				+http = Net::HTTP.new(uri.host, uri.port)
			
 
				+http.use_ssl = true
			
 
				+
			
 
				+request = Net::HTTP::Post.new(uri)
			
 
				+request['Authorization'] = "Bearer #{ENV['KILO_API_KEY']}"
			
 
				+request['Content-Type'] = 'application/json'
			
 
				+request.body = {
			
 
				+  model: 'anthropic/claude-sonnet-4.5',
			
 
				+  messages: [
			
 
				+    { role: 'user', content: 'Why is the sky blue?' }
			
 
				+  ]
			
 
				+}.to_json
			
 
				+
			
 
				+response = http.request(request)
			
 
				+result = JSON.parse(response.body)
			
 
				+puts result['choices'][0]['message']['content']
			
 
				+```
			
 
				+
			
 
				+## Framework integrations
			
 
				+
			
 
				+The Kilo AI Gateway works with any framework that supports OpenAI-compatible APIs:
			
 
				+
			
 
				+| Framework                                                             | Integration                               |
			
 
				+| --------------------------------------------------------------------- | ----------------------------------------- |
			
 
				+| [Vercel AI SDK](https://ai-sdk.dev)                                   | Use `createOpenAI` with Kilo base URL     |
			
 
				+| [LangChain](https://langchain.com)                                    | Use `ChatOpenAI` with custom base URL     |
			
 
				+| [LlamaIndex](https://www.llamaindex.ai)                               | Use OpenAI-compatible configuration       |
			
 
				+| [Haystack](https://haystack.deepset.ai)                               | Use OpenAI generator with custom URL      |
			
 
				+| [Semantic Kernel](https://learn.microsoft.com/en-us/semantic-kernel/) | Use OpenAI connector with custom endpoint |
			
 
				+
			
 
				+### LangChain example
			
 
				+
			
 
				+```python
			
 
				+from langchain_openai import ChatOpenAI
			
 
				+
			
 
				+llm = ChatOpenAI(
			
 
				+    model="anthropic/claude-sonnet-4.5",
			
 
				+    api_key=os.getenv("KILO_API_KEY"),
			
 
				+    base_url="https://api.kilo.ai/api/gateway",
			
 
				+)
			
 
				+
			
 
				+response = llm.invoke("Explain photosynthesis in simple terms.")
			
 
				+print(response.content)
			
 
				+```
			
 
				+
			
 
				+### LangChain.js example
			
 
				+
			
 
				+```typescript
			
 
				+import { ChatOpenAI } from "@langchain/openai"
			
 
				+
			
 
				+const model = new ChatOpenAI({
			
 
				+	modelName: "anthropic/claude-sonnet-4.5",
			
 
				+	openAIApiKey: process.env.KILO_API_KEY,
			
 
				+	configuration: {
			
 
				+		baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+	},
			
 
				+})
			
 
				+
			
 
				+const response = await model.invoke("Explain photosynthesis in simple terms.")
			
 
				+console.log(response.content)
			
 
				+```
			
--- a/apps/kilocode-docs/pages/gateway/streaming.md
+++ b/apps/kilocode-docs/pages/gateway/streaming.md
@@ -0,0 +1,197 @@
 
				+---
			
 
				+title: "Streaming"
			
 
				+description: "Learn how to implement real-time streaming responses with the Kilo AI Gateway using Server-Sent Events (SSE)."
			
 
				+---
			
 
				+
			
 
				+# Streaming
			
 
				+
			
 
				+The Kilo AI Gateway supports streaming responses from all models using Server-Sent Events (SSE). Streaming allows your application to display tokens as they're generated, providing a more responsive user experience.
			
 
				+
			
 
				+## Enabling streaming
			
 
				+
			
 
				+Set `stream: true` in your request body to enable streaming:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"model": "anthropic/claude-sonnet-4.5",
			
 
				+	"messages": [{ "role": "user", "content": "Write a short story" }],
			
 
				+	"stream": true
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+{% callout type="info" %}
			
 
				+The gateway automatically injects `stream_options.include_usage = true` on all streaming requests, so you always receive token usage information in the final chunk.
			
 
				+{% /callout %}
			
 
				+
			
 
				+## Streaming with the Vercel AI SDK
			
 
				+
			
 
				+The Vercel AI SDK handles SSE parsing and provides a clean streaming interface:
			
 
				+
			
 
				+```typescript
			
 
				+import { streamText } from "ai"
			
 
				+import { createOpenAI } from "@ai-sdk/openai"
			
 
				+
			
 
				+const kilo = createOpenAI({
			
 
				+	baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+	apiKey: process.env.KILO_API_KEY,
			
 
				+})
			
 
				+
			
 
				+const result = streamText({
			
 
				+	model: kilo("anthropic/claude-sonnet-4.5"),
			
 
				+	prompt: "Write a short story about a robot.",
			
 
				+})
			
 
				+
			
 
				+for await (const textPart of result.textStream) {
			
 
				+	process.stdout.write(textPart)
			
 
				+}
			
 
				+
			
 
				+// Access usage data after streaming completes
			
 
				+const usage = await result.usage
			
 
				+console.log("Tokens used:", usage)
			
 
				+```
			
 
				+
			
 
				+## Streaming with the OpenAI SDK
			
 
				+
			
 
				+{% tabs %}
			
 
				+{% tab label="TypeScript" %}
			
 
				+
			
 
				+```typescript
			
 
				+import OpenAI from "openai"
			
 
				+
			
 
				+const client = new OpenAI({
			
 
				+	apiKey: process.env.KILO_API_KEY,
			
 
				+	baseURL: "https://api.kilo.ai/api/gateway",
			
 
				+})
			
 
				+
			
 
				+const stream = await client.chat.completions.create({
			
 
				+	model: "anthropic/claude-sonnet-4.5",
			
 
				+	messages: [{ role: "user", content: "Write a short story" }],
			
 
				+	stream: true,
			
 
				+})
			
 
				+
			
 
				+for await (const chunk of stream) {
			
 
				+	const content = chunk.choices[0]?.delta?.content
			
 
				+	if (content) {
			
 
				+		process.stdout.write(content)
			
 
				+	}
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+{% /tab %}
			
 
				+{% tab label="Python" %}
			
 
				+
			
 
				+```python
			
 
				+from openai import OpenAI
			
 
				+
			
 
				+client = OpenAI(
			
 
				+    api_key=os.getenv("KILO_API_KEY"),
			
 
				+    base_url="https://api.kilo.ai/api/gateway",
			
 
				+)
			
 
				+
			
 
				+stream = client.chat.completions.create(
			
 
				+    model="anthropic/claude-sonnet-4.5",
			
 
				+    messages=[{"role": "user", "content": "Write a short story"}],
			
 
				+    stream=True,
			
 
				+)
			
 
				+
			
 
				+for chunk in stream:
			
 
				+    content = chunk.choices[0].delta.content
			
 
				+    if content:
			
 
				+        print(content, end="", flush=True)
			
 
				+```
			
 
				+
			
 
				+{% /tab %}
			
 
				+{% /tabs %}
			
 
				+
			
 
				+## Raw SSE format
			
 
				+
			
 
				+When streaming, the gateway returns data in SSE format. Each event is a JSON object prefixed with `data: `:
			
 
				+
			
 
				+```
			
 
				+data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1234567890,"model":"anthropic/claude-sonnet-4.5","choices":[{"index":0,"delta":{"role":"assistant","content":"Once"},"finish_reason":null}]}
			
 
				+
			
 
				+data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1234567890,"model":"anthropic/claude-sonnet-4.5","choices":[{"index":0,"delta":{"content":" upon"},"finish_reason":null}]}
			
 
				+
			
 
				+data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1234567890,"model":"anthropic/claude-sonnet-4.5","choices":[{"index":0,"delta":{"content":" a"},"finish_reason":null}]}
			
 
				+
			
 
				+data: [DONE]
			
 
				+```
			
 
				+
			
 
				+### Usage in the final chunk
			
 
				+
			
 
				+Token usage data is included in the final chunk before `[DONE]`, with an empty `choices` array:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"id": "chatcmpl-abc123",
			
 
				+	"object": "chat.completion.chunk",
			
 
				+	"usage": {
			
 
				+		"prompt_tokens": 12,
			
 
				+		"completion_tokens": 150,
			
 
				+		"total_tokens": 162
			
 
				+	},
			
 
				+	"choices": []
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## Stream cancellation
			
 
				+
			
 
				+You can cancel a streaming request by aborting the connection. This stops token generation and billing for ungenerated tokens:
			
 
				+
			
 
				+```typescript
			
 
				+const controller = new AbortController()
			
 
				+
			
 
				+const response = await fetch("https://api.kilo.ai/api/gateway/chat/completions", {
			
 
				+	method: "POST",
			
 
				+	headers: {
			
 
				+		Authorization: `Bearer ${process.env.KILO_API_KEY}`,
			
 
				+		"Content-Type": "application/json",
			
 
				+	},
			
 
				+	body: JSON.stringify({
			
 
				+		model: "anthropic/claude-sonnet-4.5",
			
 
				+		messages: [{ role: "user", content: "Write a long essay" }],
			
 
				+		stream: true,
			
 
				+	}),
			
 
				+	signal: controller.signal,
			
 
				+})
			
 
				+
			
 
				+// Cancel after 5 seconds
			
 
				+setTimeout(() => controller.abort(), 5000)
			
 
				+```
			
 
				+
			
 
				+{% callout type="warning" %}
			
 
				+Stream cancellation behavior depends on the upstream provider. Some providers stop processing immediately, while others may continue processing after disconnection. The gateway handles partial usage tracking for cancelled streams.
			
 
				+{% /callout %}
			
 
				+
			
 
				+## Error handling during streaming
			
 
				+
			
 
				+### Errors before streaming starts
			
 
				+
			
 
				+If an error occurs before any tokens are sent, the gateway returns a standard JSON error response with the appropriate HTTP status code:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"error": {
			
 
				+		"message": "Insufficient balance",
			
 
				+		"code": 402
			
 
				+	}
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Errors during streaming
			
 
				+
			
 
				+If an error occurs after tokens have already been sent, the HTTP status (200) cannot be changed. The error appears as an SSE event:
			
 
				+
			
 
				+```
			
 
				+data: {"error":{"message":"Provider disconnected","code":502},"choices":[{"index":0,"delta":{"content":""},"finish_reason":"error"}]}
			
 
				+```
			
 
				+
			
 
				+Check for `finish_reason: "error"` to detect mid-stream errors in your client code.
			
 
				+
			
 
				+## Recommended SSE clients
			
 
				+
			
 
				+For parsing SSE streams, we recommend these libraries:
			
 
				+
			
 
				+- [eventsource-parser](https://github.com/rexxars/eventsource-parser) -- Lightweight SSE parser
			
 
				+- [OpenAI SDK](https://www.npmjs.com/package/openai) -- Built-in streaming support
			
 
				+- [Vercel AI SDK](https://www.npmjs.com/package/ai) -- High-level streaming abstractions
			
--- a/apps/kilocode-docs/pages/gateway/usage-and-billing.md
+++ b/apps/kilocode-docs/pages/gateway/usage-and-billing.md
@@ -0,0 +1,137 @@
 
				+---
			
 
				+title: "Usage & Billing"
			
 
				+description: "Understand how the Kilo AI Gateway tracks costs, manages balances, and enforces organization-level spending controls."
			
 
				+---
			
 
				+
			
 
				+# Usage & Billing
			
 
				+
			
 
				+The Kilo AI Gateway tracks usage and costs for every request with microdollar precision (1 USD = 1,000,000 microdollars). This enables accurate billing even for very low-cost requests.
			
 
				+
			
 
				+## How billing works
			
 
				+
			
 
				+Every request to the gateway follows this flow:
			
 
				+
			
 
				+1. **Balance check**: Before proxying the request, the gateway verifies you have sufficient balance
			
 
				+2. **Request execution**: The request is sent to the upstream provider
			
 
				+3. **Usage tracking**: Token counts and costs are extracted from the response
			
 
				+4. **Balance update**: Your balance is atomically updated with the request cost
			
 
				+
			
 
				+### Cost calculation
			
 
				+
			
 
				+Costs are determined by the upstream provider's pricing based on token usage:
			
 
				+
			
 
				+- **Input tokens**: Tokens in your prompt (system message, user messages, tool definitions)
			
 
				+- **Output tokens**: Tokens generated by the model
			
 
				+- **Cache write tokens**: Tokens written to the provider's prompt cache
			
 
				+- **Cache hit tokens**: Tokens served from the provider's prompt cache (typically discounted)
			
 
				+
			
 
				+### Free and BYOK requests
			
 
				+
			
 
				+- **Free models**: Models tagged with `:free` have zero cost -- usage is tracked but not billed
			
 
				+- **BYOK requests**: When using your own API key, the cost is set to $0 on Kilo's side. You pay the provider directly based on your agreement with them
			
 
				+
			
 
				+## Balance management
			
 
				+
			
 
				+### Individual accounts
			
 
				+
			
 
				+Your account balance is the difference between total credits purchased and total usage. Check your balance in the [Kilo dashboard](https://app.kilo.ai).
			
 
				+
			
 
				+When your balance reaches zero, requests to paid models will return HTTP 402 with a link to add credits:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"error": {
			
 
				+		"message": "Insufficient balance. Please add credits to continue.",
			
 
				+		"code": 402,
			
 
				+		"metadata": {
			
 
				+			"buyCreditsUrl": "https://app.kilo.ai/credits"
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Organization accounts
			
 
				+
			
 
				+Organizations have their own balance pool that members draw from. Organization billing supports:
			
 
				+
			
 
				+- **Shared balance**: All members use a common credit pool
			
 
				+- **Per-user daily limits**: Cap individual member spending (e.g., $5/day per user)
			
 
				+- **Auto top-up**: Automatically replenish credits when the balance drops below a threshold
			
 
				+- **Minimum balance alerts**: Email notifications when the balance drops below a configured amount
			
 
				+
			
 
				+## Organization controls
			
 
				+
			
 
				+Organizations can enforce policies on gateway usage for their members.
			
 
				+
			
 
				+### Model allow lists
			
 
				+
			
 
				+Restrict which models organization members can use:
			
 
				+
			
 
				+```
			
 
				+# Examples of allow list entries
			
 
				+anthropic/claude-sonnet-4.5      # Specific model
			
 
				+anthropic/*                       # All Anthropic models
			
 
				+openai/gpt-5.2                   # Specific OpenAI model
			
 
				+```
			
 
				+
			
 
				+The allow list supports exact matches and wildcard patterns. Requests for models not on the list return HTTP 403.
			
 
				+
			
 
				+### Provider allow lists
			
 
				+
			
 
				+Restrict which inference providers can be used for routing. This is passed to the upstream router and affects which backends serve the request.
			
 
				+
			
 
				+### Data collection controls
			
 
				+
			
 
				+Organizations can set a data collection policy (`allow` or `deny`) that is applied to all requests from their members. Some free models require data collection to be allowed.
			
 
				+
			
 
				+### Per-user daily spending limits
			
 
				+
			
 
				+Set a maximum daily spend per organization member. When a member reaches their daily limit, subsequent requests return a balance error. The daily limit resets at midnight UTC.
			
 
				+
			
 
				+## Rate limiting
			
 
				+
			
 
				+### Free model rate limits
			
 
				+
			
 
				+All free model requests (both anonymous and authenticated) are rate-limited by IP address:
			
 
				+
			
 
				+| Scope              | Limit                 |
			
 
				+| ------------------ | --------------------- |
			
 
				+| Free models per IP | 200 requests per hour |
			
 
				+
			
 
				+When rate-limited, you receive HTTP 429:
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+	"error": {
			
 
				+		"message": "Rate limit exceeded for free models. Please try again later.",
			
 
				+		"code": 429
			
 
				+	}
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### Paid model limits
			
 
				+
			
 
				+Paid model requests are not rate-limited by the gateway itself, but may be rate-limited by upstream providers. Organization per-user daily spending limits provide an additional layer of cost control.
			
 
				+
			
 
				+## Usage data
			
 
				+
			
 
				+Usage data is tracked per request and includes:
			
 
				+
			
 
				+| Field                 | Description                                |
			
 
				+| --------------------- | ------------------------------------------ |
			
 
				+| `model`               | Model ID used                              |
			
 
				+| `provider`            | Inference provider that served the request |
			
 
				+| `input_tokens`        | Number of input/prompt tokens              |
			
 
				+| `output_tokens`       | Number of output/completion tokens         |
			
 
				+| `cache_write_tokens`  | Tokens written to cache                    |
			
 
				+| `cache_hit_tokens`    | Tokens served from cache                   |
			
 
				+| `cost_microdollars`   | Cost in microdollars (1 USD = 1,000,000)   |
			
 
				+| `time_to_first_token` | Latency to first token (streaming only)    |
			
 
				+| `is_byok`             | Whether a BYOK key was used                |
			
 
				+
			
 
				+## Token counting
			
 
				+
			
 
				+Token counts are provided by the upstream model and are based on the model's native tokenizer. The gateway does not re-tokenize content. Usage data is available:
			
 
				+
			
 
				+- **Non-streaming**: In the `usage` field of the response body
			
 
				+- **Streaming**: In the final SSE chunk before `[DONE]`
			
--- a/apps/kilocode-docs/pages/getting-started/faq.md
+++ b/apps/kilocode-docs/pages/getting-started/faq.md
@@ -101,7 +101,7 @@ Yes, Kilo Code supports running models locally using [Ollama](/docs/providers/ol
 
				 
			
 
				 ### How do I start a new task?
			
 
				 
			
 
				-Open the Kilo Code panel (<img src="/docs/img/kilo-v1.svg" width="12" />) and type your task in the chat box. Be clear and specific about what you want Kilo Code to do. See [The Chat Interface](/docs/basic-usage/the-chat-interface) for best practices.
			
 
				+Open the Kilo Code panel ({% kiloCodeIcon size="1em" /%}) and type your task in the chat box. Be clear and specific about what you want Kilo Code to do. See [The Chat Interface](/docs/basic-usage/the-chat-interface) for best practices.
			
 
				 
			
 
				 ### When should I use chat vs autocomplete?
			
 
				 
			
--- a/apps/kilocode-docs/pages/getting-started/installing.md
+++ b/apps/kilocode-docs/pages/getting-started/installing.md
@@ -45,6 +45,43 @@ Get started with Kilo Code by installing it on your preferred platform. Choose y
 
				 {% /tab %}
			
 
				 {% /tabs %}
			
 
				 
			
 
				+## Pre-Release Extension
			
 
				+
			
 
				+{% callout type="info" %}
			
 
				+We're rebuilding Kilo Code from the ground up on the new [Kilo CLI](https://github.com/Kilo-Org/kilo). The pre-release extension is available for users who want to try the latest architecture and provide feedback, and don't mind some missing features and rough edges.
			
 
				+{% /callout %}
			
 
				+
			
 
				+The pre-release extension is a complete rebuild featuring:
			
 
				+
			
 
				+- A new Solid.js-based UI
			
 
				+- Deep integration with the Kilo CLI backend
			
 
				+- Improved session management and model switching
			
 
				+
			
 
				+### Current Status
			
 
				+
			
 
				+This is an early pre-release. Core features like chat, markdown rendering, authentication, and model/mode switching are working. Some features from the stable extension are still being implemented.
			
 
				+
			
 
				+For the full feature status, see the [feature parity tracking document](https://github.com/Kilo-Org/kilo/blob/main/packages/kilo-vscode/docs/opencode-migration-plan.md).
			
 
				+
			
 
				+### Installing the Pre-Release
			
 
				+
			
 
				+1. Open VS Code
			
 
				+2. Go to Extensions (`Ctrl+Shift+X` / `Cmd+Shift+X`)
			
 
				+3. Search for "Kilo Code"
			
 
				+4. Click the dropdown arrow next to **Install** and select **Install Pre-Release Version**
			
 
				+
			
 
				+### Switching Back to Stable
			
 
				+
			
 
				+If you need to return to the stable version:
			
 
				+
			
 
				+1. Open Extensions in VS Code
			
 
				+2. Find Kilo Code
			
 
				+3. Click the dropdown and select **Switch to Release Version**
			
 
				+
			
 
				+### Feedback and Issues
			
 
				+
			
 
				+Report issues or provide feedback in the [Kilo-Org/kilo repository](https://github.com/Kilo-Org/kilo/issues).
			
 
				+
			
 
				 ## Manual Installations
			
 
				 
			
 
				 ### Open VSX Registry
			
--- a/apps/kilocode-docs/pages/index.tsx
+++ b/apps/kilocode-docs/pages/index.tsx
@@ -15,6 +15,18 @@ const terminalContent = {
 
				 			<span className="terminal-prompt">$</span> npm install -g @kilocode/cli
			
 
				 		</>
			
 
				 	),
			
 
				+	gateway: (
			
 
				+		<>
			
 
				+			<span className="terminal-comment"># Call Kilo Gateway with a quick curl script</span>
			
 
				+			{"\n"}
			
 
				+			<span className="terminal-prompt">$</span> export KILO_API_KEY="YOUR_API_KEY"
			
 
				+			{"\n"}
			
 
				+			<span className="terminal-prompt">$</span> curl https://api.kilo.ai/api/gateway/chat/completions \{"\n"}
			
 
				+			-H "Authorization: Bearer $KILO_API_KEY" \{"\n"}
			
 
				+			-H "Content-Type: application/json" \{"\n"}
			
 
				+			{`  -d '{"model":"anthropic/claude-sonnet-4.5","messages":[{"role":"user","content":"Say hi from Kilo Gateway"}]}'`}
			
 
				+		</>
			
 
				+	),
			
 
				 	firstTask: (
			
 
				 		<>
			
 
				 			<span className="terminal-comment"># Start a new task with Kilo Code</span>
			
@@ -141,6 +153,24 @@ const categories = [
 
				 			{ title: "Security Reviews", href: "/deploy-secure" },
			
 
				 		],
			
 
				 	},
			
 
				+	{
			
 
				+		title: "Kilo Gateway",
			
 
				+		description:
			
 
				+			"A unified API to access hundreds of AI models through a single endpoint with streaming, BYOK, and usage tracking.",
			
 
				+		href: "/gateway",
			
 
				+		icon: (
			
 
				+			<svg className="category-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
			
 
				+				<path d="M3 12h18" strokeLinecap="round" strokeLinejoin="round" />
			
 
				+				<path d="M12 3v18" strokeLinecap="round" strokeLinejoin="round" />
			
 
				+				<circle cx="12" cy="12" r="4" strokeLinecap="round" strokeLinejoin="round" />
			
 
				+			</svg>
			
 
				+		),
			
 
				+		links: [
			
 
				+			{ title: "Quickstart", href: "/gateway/quickstart" },
			
 
				+			{ title: "Models & Providers", href: "/gateway/models-and-providers" },
			
 
				+			{ title: "API Reference", href: "/gateway/api-reference" },
			
 
				+		],
			
 
				+	},
			
 
				 	{
			
 
				 		title: "Contributing",
			
 
				 		description: "Help improve Kilo Code and learn about its architecture",
			
@@ -163,7 +193,7 @@ const categories = [
 
				 ]
			
 
				 
			
 
				 export default function HomePage() {
			
 
				-	const [activeTab, setActiveTab] = useState<"installation" | "firstTask" | "customRules">("installation")
			
 
				+	const [activeTab, setActiveTab] = useState<"installation" | "firstTask" | "customRules" | "gateway">("installation")
			
 
				 
			
 
				 	return (
			
 
				 		<div className="homepage">
			
@@ -281,6 +311,11 @@ export default function HomePage() {
 
				 							onClick={() => setActiveTab("customRules")}>
			
 
				 							Custom Rules
			
 
				 						</button>
			
 
				+						<button
			
 
				+							className={`terminal-tab ${activeTab === "gateway" ? "active" : ""}`}
			
 
				+							onClick={() => setActiveTab("gateway")}>
			
 
				+							Kilo Gateway
			
 
				+						</button>
			
 
				 					</div>
			
 
				 					<div className="terminal-window">
			
 
				 						<div className="terminal-header">
			
--- a/apps/kilocode-docs/public/globals.css
+++ b/apps/kilocode-docs/public/globals.css
@@ -4,7 +4,7 @@
 
				 @custom-variant dark (&:where(.dark, .dark *));
			
 
				 
			
 
				 :root {
			
 
				-	--top-nav-height: 105px;
			
 
				+	--top-nav-height: 141px; /* 105px nav + 36px banner */
			
 
				 	--border-color: #dce6e9;
			
 
				 	--bg-color: theme(colors.zinc.50);
			
 
				 	--bg-secondary: theme(colors.zinc.100);
			
@@ -195,33 +195,123 @@ a:hover {
 
				 	margin: 0.5em 0;
			
 
				 }
			
 
				 
			
 
				-/* Dark mode code syntax highlighting */
			
 
				+/* ==========================================================================
			
 
				+   Code block base styles
			
 
				+   ========================================================================== */
			
 
				+
			
 
				+code[class*="language-"],
			
 
				+pre[class*="language-"] {
			
 
				+	font-family: "JetBrains Mono", Consolas, Monaco, "Andale Mono", monospace;
			
 
				+	font-size: 14px;
			
 
				+	line-height: 1.6;
			
 
				+}
			
 
				+
			
 
				+/* ==========================================================================
			
 
				+   Light mode syntax highlighting
			
 
				+   Coordinated with site palette: zinc/slate base, indigo accent
			
 
				+   ========================================================================== */
			
 
				+
			
 
				+pre[class*="language-"],
			
 
				+:not(pre) > code[class*="language-"] {
			
 
				+	background: #f4f4f5; /* zinc-100 */
			
 
				+}
			
 
				+
			
 
				+code[class*="language-"],
			
 
				+pre[class*="language-"] {
			
 
				+	color: #3c4257; /* matches --text-color */
			
 
				+	text-shadow: none;
			
 
				+}
			
 
				+
			
 
				+.token.comment,
			
 
				+.token.prolog,
			
 
				+.token.doctype,
			
 
				+.token.cdata {
			
 
				+	color: #8891a4; /* slate-muted */
			
 
				+}
			
 
				+
			
 
				+.token.punctuation {
			
 
				+	color: #64748b; /* slate-500 */
			
 
				+}
			
 
				+
			
 
				+.token.property,
			
 
				+.token.tag,
			
 
				+.token.boolean,
			
 
				+.token.number,
			
 
				+.token.constant,
			
 
				+.token.symbol,
			
 
				+.token.deleted {
			
 
				+	color: #b45309; /* amber-700, warm for values */
			
 
				+}
			
 
				+
			
 
				+.token.selector,
			
 
				+.token.attr-name,
			
 
				+.token.string,
			
 
				+.token.char,
			
 
				+.token.builtin,
			
 
				+.token.inserted {
			
 
				+	color: #16733e; /* green-700, strings */
			
 
				+}
			
 
				+
			
 
				+.token.operator,
			
 
				+.token.entity,
			
 
				+.token.url,
			
 
				+.language-css .token.string,
			
 
				+.style .token.string {
			
 
				+	color: #64748b; /* slate-500, neutral for operators */
			
 
				+	background: none;
			
 
				+}
			
 
				+
			
 
				+.token.atrule,
			
 
				+.token.attr-value,
			
 
				+.token.keyword {
			
 
				+	color: #4f46e5; /* indigo-600, matches accent */
			
 
				+}
			
 
				+
			
 
				+.token.function,
			
 
				+.token.class-name {
			
 
				+	color: #9333ea; /* purple-600 */
			
 
				+}
			
 
				+
			
 
				+.token.regex,
			
 
				+.token.important,
			
 
				+.token.variable {
			
 
				+	color: #b45309; /* amber-700 */
			
 
				+}
			
 
				+
			
 
				+/* ==========================================================================
			
 
				+   Dark mode syntax highlighting
			
 
				+   Coordinated with site palette: #1a1a18 base, #f8f674 brand accent
			
 
				+   ========================================================================== */
			
 
				+
			
 
				+.dark pre[class*="language-"],
			
 
				+.dark :not(pre) > code[class*="language-"] {
			
 
				+	background: #24241f !important; /* slightly lighter than --bg-color */
			
 
				+}
			
 
				+
			
 
				+.dark code[class*="language-"],
			
 
				 .dark pre[class*="language-"] {
			
 
				-	background: #1f2937 !important;
			
 
				+	color: #c8c8c6; /* warm neutral base text */
			
 
				 	text-shadow: none;
			
 
				-	color: #e5e7eb;
			
 
				 }
			
 
				 
			
 
				-.dark pre[class*="language-"] *,
			
 
				-.dark code[class*="language-"] {
			
 
				-	color: #e5e7eb;
			
 
				+.dark pre[class*="language-"] * {
			
 
				 	text-shadow: none;
			
 
				 }
			
 
				 
			
 
				 /* Ensure inline code also has proper color in dark mode */
			
 
				 .dark code {
			
 
				-	color: #e5e7eb;
			
 
				+	color: #c8c8c6;
			
 
				 }
			
 
				 
			
 
				 .dark .token.comment,
			
 
				 .dark .token.prolog,
			
 
				 .dark .token.doctype,
			
 
				 .dark .token.cdata {
			
 
				-	color: #6b7280;
			
 
				+	color: #6b6b66; /* muted, warm gray */
			
 
				 }
			
 
				 
			
 
				 .dark .token.punctuation {
			
 
				-	color: #9ca3af;
			
 
				+	color: #8a8a85; /* warm mid-gray */
			
 
				 }
			
 
				 
			
 
				 .dark .token.property,
			
@@ -231,7 +321,7 @@ a:hover {
 
				 .dark .token.constant,
			
 
				 .dark .token.symbol,
			
 
				 .dark .token.deleted {
			
 
				-	color: #f87171;
			
 
				+	color: #e8a862; /* warm amber for values */
			
 
				 }
			
 
				 
			
 
				 .dark .token.selector,
			
@@ -240,7 +330,7 @@ a:hover {
 
				 .dark .token.char,
			
 
				 .dark .token.builtin,
			
 
				 .dark .token.inserted {
			
 
				-	color: #34d399;
			
 
				+	color: #a8cc7a; /* soft green, strings */
			
 
				 }
			
 
				 
			
 
				 .dark .token.operator,
			
@@ -248,24 +338,25 @@ a:hover {
 
				 .dark .token.url,
			
 
				 .dark .language-css .token.string,
			
 
				 .dark .style .token.string {
			
 
				-	color: #fbbf24;
			
 
				+	color: #8a8a85; /* neutral for operators */
			
 
				+	background: none;
			
 
				 }
			
 
				 
			
 
				 .dark .token.atrule,
			
 
				 .dark .token.attr-value,
			
 
				 .dark .token.keyword {
			
 
				-	color: #60a5fa;
			
 
				+	color: #d4c96b; /* warm yellow, near brand #f8f674 but less saturated */
			
 
				 }
			
 
				 
			
 
				 .dark .token.function,
			
 
				 .dark .token.class-name {
			
 
				-	color: #f472b6;
			
 
				+	color: #c9a5e8; /* soft purple */
			
 
				 }
			
 
				 
			
 
				 .dark .token.regex,
			
 
				 .dark .token.important,
			
 
				 .dark .token.variable {
			
 
				-	color: #fbbf24;
			
 
				+	color: #e8a862; /* warm amber */
			
 
				 }
			
 
				 
			
 
				 /* Mobile Responsive Styles */
			
@@ -278,7 +369,7 @@ a:hover {
 
				 
			
 
				 @media (max-width: 768px) {
			
 
				 	:root {
			
 
				-		--top-nav-height: 60px;
			
 
				+		--top-nav-height: 116px; /* 60px nav + 56px banner */
			
 
				 	}
			
 
				 
			
 
				 	/* Hide desktop sidenav, show via mobile toggle */
			
--- a/apps/kilocode-docs/public/img/browser-use/KiloCodeBrowser.png
+++ b/apps/kilocode-docs/public/img/browser-use/KiloCodeBrowser.png
--- a/apps/kilocode-docs/public/img/browser-use/browser-use-1.png
+++ b/apps/kilocode-docs/public/img/browser-use/browser-use-1.png
--- a/apps/kilocode-docs/public/img/browser-use/browser-use-2.png
+++ b/apps/kilocode-docs/public/img/browser-use/browser-use-2.png
--- a/apps/kilocode-docs/public/img/browser-use/browser-use-3.png
+++ b/apps/kilocode-docs/public/img/browser-use/browser-use-3.png
--- a/apps/kilocode-docs/public/img/browser-use/browser-use-4.png
+++ b/apps/kilocode-docs/public/img/browser-use/browser-use-4.png
--- a/apps/kilocode-docs/public/img/browser-use/browser-use-5.png
+++ b/apps/kilocode-docs/public/img/browser-use/browser-use-5.png
--- a/apps/kilocode-docs/public/img/browser-use/browser-use.png
+++ b/apps/kilocode-docs/public/img/browser-use/browser-use.png
--- a/apps/kilocode-docs/tsconfig.tsbuildinfo
+++ b/apps/kilocode-docs/tsconfig.tsbuildinfo
--- a/cli/src/constants/providers/labels.ts
+++ b/cli/src/constants/providers/labels.ts
@@ -52,7 +52,8 @@ export const PROVIDER_LABELS: Record<ProviderName, string> = {
 
				 	synthetic: "Synthetic",
			
 
				 	"sap-ai-core": "SAP AI Core",
			
 
				 	baseten: "BaseTen",
			
 
				-	corethink: "Corethink"
			
 
				+	apertis: "Apertis", // kilocode_change
			
 
				+	corethink: "Corethink",
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/cli/src/constants/providers/models.ts
+++ b/cli/src/constants/providers/models.ts
@@ -46,6 +46,7 @@ import {
 
				 	minimaxModels,
			
 
				 	minimaxDefaultModelId,
			
 
				 	ovhCloudAiEndpointsDefaultModelId,
			
 
				+	apertisDefaultModelId,
			
 
				 	zenmuxDefaultModelId,
			
 
				 } from "@roo-code/types"
			
 
				 
			
@@ -168,6 +169,7 @@ export const PROVIDER_TO_ROUTER_NAME: Record<ProviderName, RouterName | null> =
 
				 	synthetic: null,
			
 
				 	"sap-ai-core": null,
			
 
				 	baseten: null,
			
 
				+	apertis: null, // kilocode_change
			
 
				 	corethink: null,
			
 
				 }
			
 
				 
			
@@ -223,6 +225,7 @@ export const PROVIDER_MODEL_FIELD: Record<ProviderName, string | null> = {
 
				 	synthetic: null,
			
 
				 	"sap-ai-core": "sapAiCoreModelId",
			
 
				 	baseten: null,
			
 
				+	apertis: "apertisModelId", // kilocode_change
			
 
				 	corethink: null,
			
 
				 }
			
 
				 
			
@@ -289,6 +292,7 @@ export const DEFAULT_MODEL_IDS: Partial<Record<ProviderName, string>> = {
 
				 	zai: internationalZAiDefaultModelId,
			
 
				 	roo: rooDefaultModelId,
			
 
				 	ovhcloud: ovhCloudAiEndpointsDefaultModelId,
			
 
				+	apertis: apertisDefaultModelId, // kilocode_change
			
 
				 	zenmux: zenmuxDefaultModelId,
			
 
				 }
			
 
				 
			
--- a/cli/src/constants/providers/settings.ts
+++ b/cli/src/constants/providers/settings.ts
@@ -1110,7 +1110,8 @@ export const PROVIDER_DEFAULT_MODELS: Record<ProviderName, string> = {
 
				 	synthetic: "synthetic-model",
			
 
				 	"sap-ai-core": "gpt-4o",
			
 
				 	baseten: "zai-org/GLM-4.6",
			
 
				-	corethink: "corethink"
			
 
				+	apertis: "claude-sonnet-4-20250514", // kilocode_change
			
 
				+	corethink: "corethink",
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/cli/src/constants/providers/validation.ts
+++ b/cli/src/constants/providers/validation.ts
@@ -53,5 +53,6 @@ export const PROVIDER_REQUIRED_FIELDS: Record<ProviderName, string[]> = {
 
				 	"virtual-quota-fallback": [], // Has array validation
			
 
				 	minimax: ["minimaxBaseUrl", "minimaxApiKey", "apiModelId"],
			
 
				 	baseten: ["basetenApiKey", "apiModelId"],
			
 
				-	corethink: ["corethinkApiKey", "corethinkModelId"]
			
 
				+	apertis: ["apertisApiKey", "apertisModelId"], // kilocode_change
			
 
				+	corethink: ["corethinkApiKey", "corethinkModelId"],
			
 
				 }
			
--- a/docs/plans/2026-01-20-apertis-provider-design.md
+++ b/docs/plans/2026-01-20-apertis-provider-design.md
@@ -0,0 +1,231 @@
 
				+# Apertis Provider Design for Kilo Code
			
 
				+
			
 
				+**Date:** 2026-01-20
			
 
				+**Status:** Approved
			
 
				+**Author:** Claude + User
			
 
				+
			
 
				+## Overview
			
 
				+
			
 
				+Apertis is a unified AI API platform providing access to 450+ models from multiple providers (OpenAI, Anthropic, Google, etc.). This document outlines the design for integrating Apertis as a model provider in Kilo Code.
			
 
				+
			
 
				+## API Endpoints
			
 
				+
			
 
				+| Endpoint | Format | Authentication | Use Case |
			
 
				+|----------|--------|----------------|----------|
			
 
				+| `/v1/chat/completions` | OpenAI Chat | `Authorization: Bearer` | General models (GPT, Gemini, etc.) |
			
 
				+| `/v1/messages` | Anthropic | `x-api-key` | Claude models |
			
 
				+| `/v1/responses` | OpenAI Responses | `Authorization: Bearer` | Reasoning models (o1, o3) |
			
 
				+| `/api/models` | - | None required | Public model list |
			
 
				+| `/v1/models` | OpenAI | `Authorization: Bearer` | Detailed model info |
			
 
				+
			
 
				+**Base URL:** `https://api.apertis.ai` (configurable for enterprise/self-hosted)
			
 
				+
			
 
				+## Architecture
			
 
				+
			
 
				+### Smart API Router
			
 
				+
			
 
				+The ApertisHandler implements intelligent routing based on model ID:
			
 
				+
			
 
				+```
			
 
				+┌─────────────────────────────────────────────────────────┐
			
 
				+│                  ApertisHandler                          │
			
 
				+├─────────────────────────────────────────────────────────┤
			
 
				+│  getApiFormat(modelId) → decides which API to use       │
			
 
				+│                                                          │
			
 
				+│  ┌─────────────┐  ┌─────────────┐  ┌─────────────────┐  │
			
 
				+│  │ /v1/messages│  │/v1/responses│  │/v1/chat/complete│  │
			
 
				+│  │  (Claude)   │  │ (o1, etc.)  │  │   (other models)│  │
			
 
				+│  └─────────────┘  └─────────────┘  └─────────────────┘  │
			
 
				+└─────────────────────────────────────────────────────────┘
			
 
				+```
			
 
				+
			
 
				+**Routing Rules:**
			
 
				+- `claude-*` → `/v1/messages` (Anthropic format)
			
 
				+- `o1-*`, `o3-*` or reasoning enabled → `/v1/responses` (Responses API)
			
 
				+- Others → `/v1/chat/completions` (OpenAI Chat)
			
 
				+
			
 
				+## File Structure
			
 
				+
			
 
				+### New Files
			
 
				+
			
 
				+```
			
 
				+packages/types/src/
			
 
				+├── providers/
			
 
				+│   └── apertis.ts              # Apertis-specific types and constants
			
 
				+
			
 
				+src/api/providers/
			
 
				+├── apertis.ts                  # Main Handler (smart routing)
			
 
				+├── fetchers/
			
 
				+│   └── apertis.ts              # Model list fetcher
			
 
				+
			
 
				+webview-ui/src/components/settings/providers/
			
 
				+└── Apertis.tsx                 # Settings UI component
			
 
				+```
			
 
				+
			
 
				+### Modified Files
			
 
				+
			
 
				+```
			
 
				+packages/types/src/provider-settings.ts    # Add Apertis provider definition
			
 
				+src/api/providers/index.ts                 # Export ApertisHandler
			
 
				+src/api/index.ts                           # Register buildApiHandler case
			
 
				+cli/src/constants/providers/models.ts      # Add Apertis mappings
			
 
				+cli/src/constants/providers/labels.ts      # Add label
			
 
				+cli/src/constants/providers/settings.ts    # Add settings schema
			
 
				+webview-ui/src/i18n/locales/*/settings.json # i18n translations
			
 
				+```
			
 
				+
			
 
				+## Provider Settings Schema
			
 
				+
			
 
				+```typescript
			
 
				+const apertisSchema = baseProviderSettingsSchema.extend({
			
 
				+  // Authentication
			
 
				+  apertisApiKey: z.string().optional(),
			
 
				+
			
 
				+  // Model selection
			
 
				+  apertisModelId: z.string().optional(),
			
 
				+
			
 
				+  // Base URL (default: https://api.apertis.ai)
			
 
				+  apertisBaseUrl: z.string().optional(),
			
 
				+
			
 
				+  // Responses API specific
			
 
				+  apertisInstructions: z.string().optional(),
			
 
				+
			
 
				+  // Reasoning settings
			
 
				+  apertisReasoningEffort: z.enum(["low", "medium", "high"]).optional(),
			
 
				+  apertisReasoningSummary: z.enum(["auto", "concise", "detailed"]).optional(),
			
 
				+})
			
 
				+```
			
 
				+
			
 
				+## Constants
			
 
				+
			
 
				+```typescript
			
 
				+// packages/types/src/providers/apertis.ts
			
 
				+
			
 
				+export const APERTIS_DEFAULT_BASE_URL = "https://api.apertis.ai"
			
 
				+export const apertisDefaultModelId = "claude-sonnet-4-20250514"
			
 
				+```
			
 
				+
			
 
				+## Handler Implementation
			
 
				+
			
 
				+```typescript
			
 
				+// src/api/providers/apertis.ts
			
 
				+
			
 
				+export class ApertisHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+  private options: ApiHandlerOptions
			
 
				+  private client: OpenAI
			
 
				+  private anthropicClient: Anthropic
			
 
				+
			
 
				+  constructor(options: ApiHandlerOptions) {
			
 
				+    const baseURL = options.apertisBaseUrl || APERTIS_DEFAULT_BASE_URL
			
 
				+
			
 
				+    this.client = new OpenAI({
			
 
				+      baseURL: `${baseURL}/v1`,
			
 
				+      apiKey: options.apertisApiKey,
			
 
				+    })
			
 
				+
			
 
				+    this.anthropicClient = new Anthropic({
			
 
				+      baseURL: `${baseURL}/v1`,
			
 
				+      apiKey: options.apertisApiKey,
			
 
				+    })
			
 
				+  }
			
 
				+
			
 
				+  private getApiFormat(modelId: string): "messages" | "responses" | "chat" {
			
 
				+    if (modelId.startsWith("claude-")) return "messages"
			
 
				+    if (modelId.startsWith("o1-") || modelId.startsWith("o3-")) return "responses"
			
 
				+    return "chat"
			
 
				+  }
			
 
				+
			
 
				+  async *createMessage(systemPrompt, messages, metadata) {
			
 
				+    const format = this.getApiFormat(this.getModel().id)
			
 
				+
			
 
				+    switch (format) {
			
 
				+      case "messages":
			
 
				+        yield* this.createAnthropicMessage(systemPrompt, messages, metadata)
			
 
				+        break
			
 
				+      case "responses":
			
 
				+        yield* this.createResponsesMessage(systemPrompt, messages, metadata)
			
 
				+        break
			
 
				+      default:
			
 
				+        yield* this.createChatMessage(systemPrompt, messages, metadata)
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## Model Fetcher
			
 
				+
			
 
				+```typescript
			
 
				+// src/api/providers/fetchers/apertis.ts
			
 
				+
			
 
				+export async function getApertisModels(options?: {
			
 
				+  apiKey?: string
			
 
				+  baseUrl?: string
			
 
				+}): Promise<ModelRecord> {
			
 
				+  const baseUrl = options?.baseUrl || APERTIS_DEFAULT_BASE_URL
			
 
				+
			
 
				+  // Use public endpoint (no auth required)
			
 
				+  const response = await fetch(`${baseUrl}/api/models`)
			
 
				+  const data = await response.json()
			
 
				+
			
 
				+  const models: ModelRecord = {}
			
 
				+
			
 
				+  for (const modelId of data.data) {
			
 
				+    models[modelId] = {
			
 
				+      contextWindow: getContextWindow(modelId),
			
 
				+      supportsPromptCache: modelId.startsWith("claude-"),
			
 
				+      supportsImages: supportsVision(modelId),
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return models
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## UI Settings Component
			
 
				+
			
 
				+Key features:
			
 
				+- API Key input with link to `https://apertis.ai/token`
			
 
				+- Model picker with dynamic model list
			
 
				+- Reasoning settings (shown only for o1/o3 models)
			
 
				+- Advanced settings (collapsible) with Base URL option
			
 
				+
			
 
				+## Special Features Support
			
 
				+
			
 
				+| Feature | API | Implementation |
			
 
				+|---------|-----|----------------|
			
 
				+| Extended Thinking | Messages API | `thinking.budget_tokens` parameter |
			
 
				+| Reasoning Effort | Responses API | `reasoning.effort` parameter |
			
 
				+| Reasoning Summary | Responses API | `reasoning.summary` parameter |
			
 
				+| Instructions | Responses API | `instructions` parameter |
			
 
				+| Web Search | Responses API | `tools` with web_search type |
			
 
				+| Streaming | All APIs | `stream: true` parameter |
			
 
				+
			
 
				+## Error Handling
			
 
				+
			
 
				+- Parse Apertis error responses and convert to user-friendly messages
			
 
				+- Handle authentication errors (401) with link to token page
			
 
				+- Handle rate limits (429) with retry suggestion
			
 
				+- Graceful degradation when features not supported by selected API format
			
 
				+
			
 
				+## Testing Strategy
			
 
				+
			
 
				+1. Unit tests for API format routing logic
			
 
				+2. Unit tests for authentication header selection
			
 
				+3. Integration tests for each API format
			
 
				+4. Mock tests for error handling scenarios
			
 
				+
			
 
				+## Implementation Order
			
 
				+
			
 
				+1. `packages/types/` - Provider definitions and schema
			
 
				+2. `src/api/providers/apertis.ts` - Core Handler
			
 
				+3. `src/api/providers/fetchers/apertis.ts` - Model fetcher
			
 
				+4. `src/api/index.ts` - Register handler
			
 
				+5. `webview-ui/` - UI settings component
			
 
				+6. `cli/` - CLI integration
			
 
				+7. Tests and documentation
			
 
				+
			
 
				+## References
			
 
				+
			
 
				+- Apertis API Documentation: https://docs.apertis.ai
			
 
				+- Apertis Token Page: https://apertis.ai/token
			
 
				+- Context7 Library: `/apertis-ai/docs`
			
--- a/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/commands/SetContextCommands.kt
+++ b/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/commands/SetContextCommands.kt
@@ -9,7 +9,7 @@ import com.intellij.openapi.project.Project
 
				  * 
			
 
				  * The setContext command allows the extension to set context values that can be used
			
 
				  * to control UI state and feature availability. This is commonly used by features like
			
 
				- * the GhostProvider (autocomplete) to enable/disable keybindings dynamically.
			
 
				+ * the AutocompleteProvider (autocomplete) to enable/disable keybindings dynamically.
			
 
				  * 
			
 
				  * @param project The current IntelliJ project
			
 
				  * @param registry The command registry to register commands with
			
@@ -47,9 +47,9 @@ fun registerSetContextCommands(project: Project, registry: CommandRegistry) {
 
				  * setting context key-value pairs that can be used throughout the plugin to control
			
 
				  * feature availability and UI state.
			
 
				  * 
			
 
				- * Example context keys used by GhostProvider:
			
 
				- * - kilocode.ghost.enableQuickInlineTaskKeybinding
			
 
				- * - kilocode.ghost.enableSmartInlineTaskKeybinding
			
 
				+ * Example context keys used by AutocompleteProvider:
			
 
				+ * - kilocode.autocomplete.enableQuickInlineTaskKeybinding
			
 
				+ * - kilocode.autocomplete.enableSmartInlineTaskKeybinding
			
 
				  */
			
 
				 class SetContextCommands(val project: Project) {
			
 
				     private val logger = Logger.getInstance(SetContextCommands::class.java)
			
@@ -61,7 +61,7 @@ class SetContextCommands(val project: Project) {
 
				      * This method is called when the setContext command is executed from the extension.
			
 
				      * It stores the key-value pair in the ContextManager for later retrieval.
			
 
				      * 
			
 
				-     * @param key The context key to set (e.g., "kilocode.ghost.enableQuickInlineTaskKeybinding")
			
 
				+     * @param key The context key to set (e.g., "kilocode.autocomplete.enableQuickInlineTaskKeybinding")
			
 
				      * @param value The value to set (typically Boolean, but can be String, Number, etc.)
			
 
				      * @return null (void return type)
			
 
				      */
			
--- a/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/core/ContextManager.kt
+++ b/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/core/ContextManager.kt
@@ -17,8 +17,8 @@ import java.util.concurrent.ConcurrentHashMap
 
				  * Example usage:
			
 
				  * ```kotlin
			
 
				  * val contextManager = project.getService(ContextManager::class.java)
			
 
				- * contextManager.setContext("kilocode.ghost.enableQuickInlineTaskKeybinding", true)
			
 
				- * val value = contextManager.getContext("kilocode.ghost.enableQuickInlineTaskKeybinding")
			
 
				+ * contextManager.setContext("kilocode.autocomplete.enableQuickInlineTaskKeybinding", true)
			
 
				+ * val value = contextManager.getContext("kilocode.autocomplete.enableQuickInlineTaskKeybinding")
			
 
				  * ```
			
 
				  */
			
 
				 @Service(Service.Level.PROJECT)
			
@@ -34,7 +34,7 @@ class ContextManager {
 
				      * Sets a context value for the given key.
			
 
				      * If the value is null, the context key will be removed.
			
 
				      * 
			
 
				-     * @param key The context key (e.g., "kilocode.ghost.enableQuickInlineTaskKeybinding")
			
 
				+     * @param key The context key (e.g., "kilocode.autocomplete.enableQuickInlineTaskKeybinding")
			
 
				      * @param value The value to set (can be Boolean, String, Number, or any serializable type)
			
 
				      */
			
 
				     fun setContext(key: String, value: Any?) {
			
--- a/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/InlineCompletionConstants.kt
+++ b/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/InlineCompletionConstants.kt
@@ -11,9 +11,9 @@ object InlineCompletionConstants {
 
				 
			
 
				     /**
			
 
				      * Command ID registered in the VSCode extension for tracking acceptance events.
			
 
				-     * This matches the command registered in GhostInlineCompletionProvider.
			
 
				+     * This matches the command registered in AutocompleteInlineCompletionProvider.
			
 
				      */
			
 
				-    const val INLINE_COMPLETION_ACCEPTED_COMMAND = "kilocode.ghost.inline-completion.accepted"
			
 
				+    const val INLINE_COMPLETION_ACCEPTED_COMMAND = "kilocode.autocomplete.inline-completion.accepted"
			
 
				 
			
 
				     /**
			
 
				      * Default timeout in milliseconds for inline completion requests.
			
--- a/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/InlineCompletionService.kt
+++ b/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/InlineCompletionService.kt
@@ -16,7 +16,7 @@ import java.util.concurrent.atomic.AtomicReference
 
				 
			
 
				 /**
			
 
				  * Service responsible for getting inline completions via RPC communication
			
 
				- * with the VSCode extension's Ghost service. Encapsulates all RPC logic,
			
 
				+ * with the VSCode extension's Autocomplete service. Encapsulates all RPC logic,
			
 
				  * error handling, and result processing for inline completion generation.
			
 
				  */
			
 
				 class InlineCompletionService {
			
--- a/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/KiloCodeInlineCompletionProvider.kt
+++ b/jetbrains/plugin/src/main/kotlin/ai/kilocode/jetbrains/inline/KiloCodeInlineCompletionProvider.kt
@@ -12,12 +12,12 @@ import com.intellij.openapi.fileEditor.FileDocumentManager
 
				 import com.intellij.openapi.project.Project
			
 
				 
			
 
				 /**
			
 
				- * IntelliJ inline completion provider that bridges to VSCode extension's Ghost service.
			
 
				+ * IntelliJ inline completion provider that bridges to VSCode extension's Autocomplete service.
			
 
				  * This provider uses the new InlineCompletionService which sends full file content
			
 
				- * to the Ghost service via RPC for accurate completions.
			
 
				+ * to the Autocomplete service via RPC for accurate completions.
			
 
				  *
			
 
				  * The provider handles triggering and rendering, while all AI logic (debouncing,
			
 
				- * caching, context gathering, and telemetry) is handled by the Ghost service.
			
 
				+ * caching, context gathering, and telemetry) is handled by the Autocomplete service.
			
 
				  */
			
 
				 class KiloCodeInlineCompletionProvider(
			
 
				     private val handle: Int,
			
@@ -42,7 +42,7 @@ class KiloCodeInlineCompletionProvider(
 
				     override val id: InlineCompletionProviderID = InlineCompletionProviderID("kilocode-inline-completion-$extensionId-$handle")
			
 
				 
			
 
				     /**
			
 
				-     * Gets inline completion suggestions using the Ghost service.
			
 
				+     * Gets inline completion suggestions using the Autocomplete service.
			
 
				      * Sends full file content to ensure accurate completions.
			
 
				      */
			
 
				     override suspend fun getSuggestion(request: InlineCompletionRequest): InlineCompletionSingleSuggestion {
			
--- a/jetbrains/plugin/src/test/kotlin/ai/kilocode/jetbrains/util/ReflectUtilsStatusBarTest.kt
+++ b/jetbrains/plugin/src/test/kotlin/ai/kilocode/jetbrains/util/ReflectUtilsStatusBarTest.kt
@@ -107,10 +107,10 @@ class ReflectUtilsStatusBarTest {
 
				 
			
 
				         val args = listOf(
			
 
				             1.0,                    // id
			
 
				-            "ghost-extension",      // extensionId
			
 
				-            "ghost-status",         // entryId
			
 
				-            "Ghost Status",         // name
			
 
				-            "Ghost (5)",            // text
			
 
				+            "autocomplete-extension",      // extensionId
			
 
				+            "autocomplete-status",         // entryId
			
 
				+            "Autocomplete Status",         // name
			
 
				+            "Autocomplete (5)",            // text
			
 
				             markdownTooltip,        // tooltip - MarkdownString object
			
 
				             false,                  // showProgress
			
 
				             null,                   // command
			
--- a/packages/agent-runtime/src/host/__tests__/VSCode.applyEdit.spec.ts
+++ b/packages/agent-runtime/src/host/__tests__/VSCode.applyEdit.spec.ts
@@ -4,7 +4,7 @@ import path from "path"
 
				 import { createVSCodeAPIMock, Uri, WorkspaceEdit, Position, Range } from "../VSCode.js"
			
 
				 
			
 
				 describe("WorkspaceAPI.applyEdit", () => {
			
 
				-	const tempDir = path.join(process.cwd(), "packages/agent-runtime/src/host/__tests__/__tmp__")
			
 
				+	const tempDir = path.join(__dirname, "__tmp__")
			
 
				 	const filePath = path.join(tempDir, "apply-edit.txt")
			
 
				 
			
 
				 	beforeEach(() => {
			
--- a/packages/core/src/message-utils/consolidateTokenUsage.ts
+++ b/packages/core/src/message-utils/consolidateTokenUsage.ts
@@ -99,7 +99,6 @@ export function consolidateTokenUsage(messages: ClineMessage[]): TokenUsage {
 
				 	// with only apiProtocol (no token data). We need to skip these placeholders and
			
 
				 	// find the last message with actual token data to avoid showing 0% context.
			
 
				 	result.contextTokens = 0
			
 
				-	let foundValidTokenData = false
			
 
				 
			
 
				 	for (let i = messages.length - 1; i >= 0; i--) {
			
 
				 		const message = messages[i]
			
@@ -112,21 +111,14 @@ export function consolidateTokenUsage(messages: ClineMessage[]): TokenUsage {
 
				 				const hasTokenData = typeof tokensIn === "number" || typeof tokensOut === "number"
			
 
				 
			
 
				 				if (hasTokenData) {
			
 
				-					// Since tokensIn now stores TOTAL input tokens (including cache tokens),
			
 
				-					// we no longer need to add cacheWrites and cacheReads separately.
			
 
				-					// This applies to both Anthropic and OpenAI protocols.
			
 
				 					result.contextTokens = (tokensIn || 0) + (tokensOut || 0)
			
 
				-					foundValidTokenData = true
			
 
				+					break
			
 
				 				}
			
 
				 			} catch {
			
 
				-				// Ignore JSON parse errors
			
 
				 				continue
			
 
				 			}
			
 
				 		} else if (message.type === "say" && message.say === "condense_context") {
			
 
				 			result.contextTokens = message.contextCondense?.newContextTokens ?? 0
			
 
				-			foundValidTokenData = true
			
 
				-		}
			
 
				-		if (foundValidTokenData) {
			
 
				 			break
			
 
				 		}
			
 
				 	}
			
--- a/packages/types/src/__tests__/kilocode.test.ts
+++ b/packages/types/src/__tests__/kilocode.test.ts
@@ -2,16 +2,16 @@
 
				 
			
 
				 import { describe, it, expect, vi, afterEach } from "vitest"
			
 
				 import {
			
 
				-	ghostServiceSettingsSchema,
			
 
				+	autocompleteServiceSettingsSchema,
			
 
				 	getAppUrl,
			
 
				 	getApiUrl,
			
 
				 	getKiloUrlFromToken,
			
 
				 	getExtensionConfigUrl,
			
 
				 } from "../kilocode/kilocode.js"
			
 
				 
			
 
				-describe("ghostServiceSettingsSchema", () => {
			
 
				+describe("autocompleteServiceSettingsSchema", () => {
			
 
				 	it("should accept all boolean settings", () => {
			
 
				-		const result = ghostServiceSettingsSchema.safeParse({
			
 
				+		const result = autocompleteServiceSettingsSchema.safeParse({
			
 
				 			enableAutoTrigger: true,
			
 
				 			enableQuickInlineTaskKeybinding: false,
			
 
				 			enableSmartInlineTaskKeybinding: true,
			
@@ -20,7 +20,7 @@ describe("ghostServiceSettingsSchema", () => {
 
				 	})
			
 
				 
			
 
				 	it("should accept combined settings", () => {
			
 
				-		const result = ghostServiceSettingsSchema.safeParse({
			
 
				+		const result = autocompleteServiceSettingsSchema.safeParse({
			
 
				 			enableAutoTrigger: true,
			
 
				 			enableQuickInlineTaskKeybinding: true,
			
 
				 			enableSmartInlineTaskKeybinding: true,
			
@@ -29,7 +29,7 @@ describe("ghostServiceSettingsSchema", () => {
 
				 	})
			
 
				 
			
 
				 	it("should be optional", () => {
			
 
				-		const result = ghostServiceSettingsSchema.safeParse({
			
 
				+		const result = autocompleteServiceSettingsSchema.safeParse({
			
 
				 			enableAutoTrigger: true,
			
 
				 		})
			
 
				 		expect(result.success).toBe(true)
			
--- a/packages/types/src/codebase-index.ts
+++ b/packages/types/src/codebase-index.ts
@@ -41,6 +41,7 @@ export const codebaseIndexConfigSchema = z.object({
 
				 			"vercel-ai-gateway",
			
 
				 			"bedrock",
			
 
				 			"openrouter",
			
 
				+			"voyage", // kilocode_change
			
 
				 		])
			
 
				 		.optional(),
			
 
				 	// kilocode_change start
			
@@ -93,6 +94,7 @@ export const codebaseIndexModelsSchema = z.object({
 
				 	"vercel-ai-gateway": z.record(z.string(), z.object({ dimension: z.number() })).optional(),
			
 
				 	openrouter: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
			
 
				 	bedrock: z.record(z.string(), z.object({ dimension: z.number() })).optional(),
			
 
				+	voyage: z.record(z.string(), z.object({ dimension: z.number() })).optional(), // kilocode_change
			
 
				 })
			
 
				 
			
 
				 export type CodebaseIndexModels = z.infer<typeof codebaseIndexModelsSchema>
			
@@ -111,6 +113,7 @@ export const codebaseIndexProviderSchema = z.object({
 
				 	codebaseIndexMistralApiKey: z.string().optional(),
			
 
				 	codebaseIndexVercelAiGatewayApiKey: z.string().optional(),
			
 
				 	codebaseIndexOpenRouterApiKey: z.string().optional(),
			
 
				+	codebaseIndexVoyageApiKey: z.string().optional(), // kilocode_change
			
 
				 })
			
 
				 
			
 
				 export type CodebaseIndexProvider = z.infer<typeof codebaseIndexProviderSchema>
			
--- a/packages/types/src/embedding.ts
+++ b/packages/types/src/embedding.ts
@@ -6,7 +6,8 @@ export type EmbedderProvider =
 
				 	| "mistral"
			
 
				 	| "vercel-ai-gateway"
			
 
				 	| "bedrock"
			
 
				-	| "openrouter" // Add other providers as needed.
			
 
				+	| "openrouter"
			
 
				+	| "voyage" // kilocode_change // Add other providers as needed.
			
 
				 
			
 
				 export interface EmbeddingModelProfile {
			
 
				 	dimension: number
			
--- a/packages/types/src/global-settings.ts
+++ b/packages/types/src/global-settings.ts
@@ -14,7 +14,11 @@ import { telemetrySettingsSchema } from "./telemetry.js"
 
				 import { modeConfigSchema } from "./mode.js"
			
 
				 import { customModePromptsSchema, customSupportPromptsSchema } from "./mode.js"
			
 
				 import { languagesSchema } from "./vscode.js"
			
 
				-import { fastApplyModelSchema, ghostServiceSettingsSchema, fastApplyApiProviderSchema } from "./kilocode/kilocode.js"
			
 
				+import {
			
 
				+	fastApplyModelSchema,
			
 
				+	autocompleteServiceSettingsSchema,
			
 
				+	fastApplyApiProviderSchema,
			
 
				+} from "./kilocode/kilocode.js"
			
 
				 
			
 
				 /**
			
 
				  * Default delay in milliseconds after writes to allow diagnostics to detect potential problems.
			
@@ -227,7 +231,7 @@ export const globalSettingsSchema = z.object({
 
				 	dismissedNotificationIds: z.string().array().optional(), // kilocode_change
			
 
				 	commitMessageApiConfigId: z.string().optional(), // kilocode_change
			
 
				 	terminalCommandApiConfigId: z.string().optional(), // kilocode_change
			
 
				-	ghostServiceSettings: ghostServiceSettingsSchema, // kilocode_change
			
 
				+	ghostServiceSettings: autocompleteServiceSettingsSchema, // kilocode_change
			
 
				 	hasPerformedOrganizationAutoSwitch: z.boolean().optional(), // kilocode_change
			
 
				 	includeTaskHistoryInEnhance: z.boolean().optional(),
			
 
				 	historyPreviewCollapsed: z.boolean().optional(),
			
@@ -310,6 +314,7 @@ export const SECRET_STATE_KEYS = [
 
				 	"vercelAiGatewayApiKey",
			
 
				 	"sapAiCoreServiceKey", // kilocode_change
			
 
				 	"basetenApiKey",
			
 
				+	"codebaseIndexVoyageApiKey", // kilocode_change
			
 
				 	"corethinkApiKey",
			
 
				 ] as const
			
 
				 
			
--- a/packages/types/src/kilocode/kilocode.ts
+++ b/packages/types/src/kilocode/kilocode.ts
@@ -6,7 +6,7 @@ declare global {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-export const ghostServiceSettingsSchema = z
			
 
				+export const autocompleteServiceSettingsSchema = z
			
 
				 	.object({
			
 
				 		enableAutoTrigger: z.boolean().optional(),
			
 
				 		enableSmartInlineTaskKeybinding: z.boolean().optional(),
			
@@ -18,7 +18,7 @@ export const ghostServiceSettingsSchema = z
 
				 	})
			
 
				 	.optional()
			
 
				 
			
 
				-export type GhostServiceSettings = z.infer<typeof ghostServiceSettingsSchema>
			
 
				+export type AutocompleteServiceSettings = z.infer<typeof autocompleteServiceSettingsSchema>
			
 
				 
			
 
				 /**
			
 
				  * Map of provider names to their default autocomplete models.
			
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -51,6 +51,7 @@ export const dynamicProviders = [
 
				 	"huggingface",
			
 
				 	"litellm",
			
 
				 	// kilocode_change start
			
 
				+	"apertis",
			
 
				 	"kilocode",
			
 
				 	"ovhcloud",
			
 
				 	"gemini",
			
@@ -162,6 +163,7 @@ export const providerNames = [
 
				 	"kilocode",
			
 
				 	"minimax",
			
 
				 	"virtual-quota-fallback",
			
 
				+	// Note: apertis, synthetic, inception are in dynamicProviders, no need to duplicate here
			
 
				 	"synthetic",
			
 
				 	"inception",
			
 
				 	"zenmux",
			
@@ -250,6 +252,15 @@ const nanoGptSchema = baseProviderSettingsSchema.extend({
 
				 	nanoGptModelList: nanoGptModelListSchema.optional(),
			
 
				 })
			
 
				 
			
 
				+const apertisSchema = baseProviderSettingsSchema.extend({
			
 
				+	apertisApiKey: z.string().optional(),
			
 
				+	apertisModelId: z.string().optional(),
			
 
				+	apertisBaseUrl: z.string().optional(),
			
 
				+	apertisInstructions: z.string().optional(),
			
 
				+	apertisReasoningEffort: z.enum(["low", "medium", "high"]).optional(),
			
 
				+	apertisReasoningSummary: z.enum(["auto", "concise", "detailed"]).optional(),
			
 
				+})
			
 
				+
			
 
				 export const openRouterProviderDataCollectionSchema = z.enum(["allow", "deny"])
			
 
				 export const openRouterProviderSortSchema = z.enum(["price", "throughput", "latency"])
			
 
				 
			
@@ -614,6 +625,7 @@ export const providerSettingsSchemaDiscriminated = z.discriminatedUnion("apiProv
 
				 	fakeAiSchema.merge(z.object({ apiProvider: z.literal("fake-ai") })),
			
 
				 	xaiSchema.merge(z.object({ apiProvider: z.literal("xai") })),
			
 
				 	// kilocode_change start
			
 
				+	apertisSchema.merge(z.object({ apiProvider: z.literal("apertis") })),
			
 
				 	kilocodeSchema.merge(z.object({ apiProvider: z.literal("kilocode") })),
			
 
				 	virtualQuotaFallbackSchema.merge(z.object({ apiProvider: z.literal("virtual-quota-fallback") })),
			
 
				 	syntheticSchema.merge(z.object({ apiProvider: z.literal("synthetic") })),
			
@@ -655,6 +667,7 @@ export const providerSettingsSchema = z.object({
 
				 	...lmStudioSchema.shape,
			
 
				 	...geminiSchema.shape,
			
 
				 	// kilocode_change start
			
 
				+	...apertisSchema.shape,
			
 
				 	...kilocodeSchema.shape,
			
 
				 	...virtualQuotaFallbackSchema.shape,
			
 
				 	...syntheticSchema.shape,
			
@@ -731,6 +744,7 @@ export const modelIdKeys = [
 
				 	"ovhCloudAiEndpointsModelId", // kilocode_change
			
 
				 	"inceptionLabsModelId", // kilocode_change
			
 
				 	"sapAiCoreModelId", // kilocode_change
			
 
				+	"apertisModelId", // kilocode_change
			
 
				 ] as const satisfies readonly (keyof ProviderSettings)[]
			
 
				 
			
 
				 export type ModelIdKey = (typeof modelIdKeys)[number]
			
@@ -778,6 +792,7 @@ export const modelIdKeysByProvider: Record<TypicalProvider, ModelIdKey> = {
 
				 	ovhcloud: "ovhCloudAiEndpointsModelId",
			
 
				 	inception: "inceptionLabsModelId",
			
 
				 	"sap-ai-core": "sapAiCoreModelId",
			
 
				+	apertis: "apertisModelId",
			
 
				 	zenmux: "zenmuxModelId", // kilocode_change
			
 
				 	// kilocode_change end
			
 
				 	groq: "apiModelId",
			
@@ -953,6 +968,7 @@ export const MODELS_BY_PROVIDER: Record<
 
				 	inception: { id: "inception", label: "Inception", models: [] },
			
 
				 	kilocode: { id: "kilocode", label: "Kilocode", models: [] },
			
 
				 	"virtual-quota-fallback": { id: "virtual-quota-fallback", label: "Virtual Quota Fallback", models: [] },
			
 
				+	apertis: { id: "apertis", label: "Apertis", models: [] },
			
 
				 	zenmux: { id: "zenmux", label: "ZenMux", models: [] }, // kilocode_change
			
 
				 	// kilocode_change end
			
 
				 	deepinfra: { id: "deepinfra", label: "DeepInfra", models: [] },
			
--- a/packages/types/src/providers/apertis.ts
+++ b/packages/types/src/providers/apertis.ts
@@ -0,0 +1,28 @@
 
				+// kilocode_change - new file
			
 
				+import type { ModelInfo } from "../model.js"
			
 
				+
			
 
				+export const APERTIS_DEFAULT_BASE_URL = "https://api.apertis.ai"
			
 
				+
			
 
				+export const apertisDefaultModelId = "claude-sonnet-4-20250514"
			
 
				+
			
 
				+export const apertisDefaultModelInfo: ModelInfo = {
			
 
				+	maxTokens: 8192,
			
 
				+	contextWindow: 200_000,
			
 
				+	supportsImages: true,
			
 
				+	supportsPromptCache: true,
			
 
				+	supportsNativeTools: true,
			
 
				+	inputPrice: 3.0,
			
 
				+	outputPrice: 15.0,
			
 
				+	description: "Claude Sonnet 4 via Apertis - balanced performance and cost",
			
 
				+}
			
 
				+
			
 
				+// Models that support extended thinking (Claude models)
			
 
				+export const APERTIS_THINKING_MODELS = new Set([
			
 
				+	"claude-sonnet-4-20250514",
			
 
				+	"claude-opus-4-5-20251101",
			
 
				+	"claude-3-5-sonnet-20241022",
			
 
				+	"claude-3-opus-20240229",
			
 
				+])
			
 
				+
			
 
				+// Models that use Responses API (reasoning models)
			
 
				+export const APERTIS_RESPONSES_API_MODELS = new Set(["o1-preview", "o1-mini", "o1", "o3-mini", "o3"])
			
--- a/packages/types/src/providers/corethink.ts
+++ b/packages/types/src/providers/corethink.ts
@@ -1,7 +1,7 @@
 
				 import type { ModelInfo } from "../model.js"
			
 
				 
			
 
				 export const corethinkModels = {
			
 
				-	"corethink": {
			
 
				+	corethink: {
			
 
				 		maxTokens: 8192,
			
 
				 		contextWindow: 79000,
			
 
				 		supportsImages: true,
			
--- a/packages/types/src/providers/index.ts
+++ b/packages/types/src/providers/index.ts
@@ -16,6 +16,7 @@ export * from "./synthetic.js"
 
				 export * from "./inception.js"
			
 
				 export * from "./minimax.js"
			
 
				 export * from "./glama.js"
			
 
				+export * from "./apertis.js"
			
 
				 export * from "./zenmux.js"
			
 
				 // kilocode_change end
			
 
				 export * from "./groq.js"
			
@@ -57,6 +58,7 @@ import { featherlessDefaultModelId } from "./featherless.js"
 
				 import { fireworksDefaultModelId } from "./fireworks.js"
			
 
				 import { geminiDefaultModelId } from "./gemini.js"
			
 
				 import { glamaDefaultModelId } from "./glama.js" // kilocode_change
			
 
				+import { apertisDefaultModelId } from "./apertis.js" // kilocode_change
			
 
				 import { zenmuxDefaultModelId } from "./zenmux.js" // kilocode_change
			
 
				 import { groqDefaultModelId } from "./groq.js"
			
 
				 import { ioIntelligenceDefaultModelId } from "./io-intelligence.js"
			
@@ -100,6 +102,8 @@ export function getProviderDefaultModelId(
 
				 		// kilocode_change start
			
 
				 		case "glama":
			
 
				 			return glamaDefaultModelId
			
 
				+		case "apertis":
			
 
				+			return apertisDefaultModelId
			
 
				 		// kilocode_change end
			
 
				 		case "unbound":
			
 
				 			return unboundDefaultModelId
			
--- a/packages/types/src/vscode-extension-host.ts
+++ b/packages/types/src/vscode-extension-host.ts
@@ -574,6 +574,7 @@ export type ExtensionState = Pick<
 
				 	clineMessages: ClineMessage[]
			
 
				 	currentTaskItem?: HistoryItem
			
 
				 	currentTaskTodos?: TodoItem[] // Initial todos for the current task
			
 
				+	currentTaskCumulativeCost?: number // kilocode_change: cumulative cost including deleted messages
			
 
				 	apiConfiguration: ProviderSettings
			
 
				 	uriScheme?: string
			
 
				 	uiKind?: string // kilocode_change
			
@@ -1058,6 +1059,7 @@ export interface WebviewMessage {
 
				 			| "vercel-ai-gateway"
			
 
				 			| "bedrock"
			
 
				 			| "openrouter"
			
 
				+			| "voyage" // kilocode_change
			
 
				 		codebaseIndexVectorStoreProvider?: "lancedb" | "qdrant" // kilocode_change
			
 
				 		codebaseIndexLancedbVectorStoreDirectory?: string // kilocode_change
			
 
				 		codebaseIndexEmbedderBaseUrl?: string
			
@@ -1082,6 +1084,7 @@ export interface WebviewMessage {
 
				 		codebaseIndexMistralApiKey?: string
			
 
				 		codebaseIndexVercelAiGatewayApiKey?: string
			
 
				 		codebaseIndexOpenRouterApiKey?: string
			
 
				+		codebaseIndexVoyageApiKey?: string // kilocode_change
			
 
				 	}
			
 
				 	updatedSettings?: RooCodeSettings
			
 
				 	// kilocode_change start: Review mode