import { afterEach, describe, expect, mock, test } from "bun:test" import { APICallError } from "ai" import { Cause, Effect, Exit, Layer, ManagedRuntime } from "effect" import * as Stream from "effect/Stream" import path from "path" import { Bus } from "../../src/bus" import { Config } from "../../src/config/config" import { Agent } from "../../src/agent/agent" import { LLM } from "../../src/session/llm" import { SessionCompaction } from "../../src/session/compaction" import { Token } from "../../src/util/token" import { Instance } from "../../src/project/instance" import { Log } from "../../src/util/log" import { Permission } from "../../src/permission" import { Plugin } from "../../src/plugin" import { tmpdir } from "../fixture/fixture" import { Session } from "../../src/session" import { MessageV2 } from "../../src/session/message-v2" import { MessageID, PartID, SessionID } from "../../src/session/schema" import { SessionStatus } from "../../src/session/status" import { ModelID, ProviderID } from "../../src/provider/schema" import type { Provider } from "../../src/provider/provider" import * as SessionProcessorModule from "../../src/session/processor" import { Snapshot } from "../../src/snapshot" import { ProviderTest } from "../fake/provider" Log.init({ print: false }) const ref = { providerID: ProviderID.make("test"), modelID: ModelID.make("test-model"), } afterEach(() => { mock.restore() }) function createModel(opts: { context: number output: number input?: number cost?: Provider.Model["cost"] npm?: string }): Provider.Model { return { id: "test-model", providerID: "test", name: "Test", limit: { context: opts.context, input: opts.input, output: opts.output, }, cost: opts.cost ?? { input: 0, output: 0, cache: { read: 0, write: 0 } }, capabilities: { toolcall: true, attachment: false, reasoning: false, temperature: true, input: { text: true, image: false, audio: false, video: false }, output: { text: true, image: false, audio: false, video: false }, }, api: { npm: opts.npm ?? "@ai-sdk/anthropic" }, options: {}, } as Provider.Model } const wide = () => ProviderTest.fake({ model: createModel({ context: 100_000, output: 32_000 }) }) async function user(sessionID: SessionID, text: string) { const msg = await Session.updateMessage({ id: MessageID.ascending(), role: "user", sessionID, agent: "build", model: ref, time: { created: Date.now() }, }) await Session.updatePart({ id: PartID.ascending(), messageID: msg.id, sessionID, type: "text", text, }) return msg } async function assistant(sessionID: SessionID, parentID: MessageID, root: string) { const msg: MessageV2.Assistant = { id: MessageID.ascending(), role: "assistant", sessionID, mode: "build", agent: "build", path: { cwd: root, root }, cost: 0, tokens: { output: 0, input: 0, reasoning: 0, cache: { read: 0, write: 0 }, }, modelID: ref.modelID, providerID: ref.providerID, parentID, time: { created: Date.now() }, finish: "end_turn", } await Session.updateMessage(msg) return msg } async function tool(sessionID: SessionID, messageID: MessageID, tool: string, output: string) { return Session.updatePart({ id: PartID.ascending(), messageID, sessionID, type: "tool", callID: crypto.randomUUID(), tool, state: { status: "completed", input: {}, output, title: "done", metadata: {}, time: { start: Date.now(), end: Date.now() }, }, }) } function fake( input: Parameters[0], result: "continue" | "compact", ) { const msg = input.assistantMessage return { get message() { return msg }, updateToolCall: Effect.fn("TestSessionProcessor.updateToolCall")(() => Effect.succeed(undefined)), completeToolCall: Effect.fn("TestSessionProcessor.completeToolCall")(() => Effect.void), process: Effect.fn("TestSessionProcessor.process")(() => Effect.succeed(result)), } satisfies SessionProcessorModule.SessionProcessor.Handle } function layer(result: "continue" | "compact") { return Layer.succeed( SessionProcessorModule.SessionProcessor.Service, SessionProcessorModule.SessionProcessor.Service.of({ create: Effect.fn("TestSessionProcessor.create")((input) => Effect.succeed(fake(input, result))), }), ) } function runtime(result: "continue" | "compact", plugin = Plugin.defaultLayer, provider = ProviderTest.fake()) { const bus = Bus.layer return ManagedRuntime.make( Layer.mergeAll(SessionCompaction.layer, bus).pipe( Layer.provide(provider.layer), Layer.provide(Session.defaultLayer), Layer.provide(layer(result)), Layer.provide(Agent.defaultLayer), Layer.provide(plugin), Layer.provide(bus), Layer.provide(Config.defaultLayer), ), ) } function llm() { const queue: Array< Stream.Stream | ((input: LLM.StreamInput) => Stream.Stream) > = [] return { push(stream: Stream.Stream | ((input: LLM.StreamInput) => Stream.Stream)) { queue.push(stream) }, layer: Layer.succeed( LLM.Service, LLM.Service.of({ stream: (input) => { const item = queue.shift() ?? Stream.empty const stream = typeof item === "function" ? item(input) : item return stream.pipe(Stream.mapEffect((event) => Effect.succeed(event))) }, }), ), } } function liveRuntime(layer: Layer.Layer, provider = ProviderTest.fake()) { const bus = Bus.layer const status = SessionStatus.layer.pipe(Layer.provide(bus)) const processor = SessionProcessorModule.SessionProcessor.layer return ManagedRuntime.make( Layer.mergeAll(SessionCompaction.layer.pipe(Layer.provide(processor)), processor, bus, status).pipe( Layer.provide(provider.layer), Layer.provide(Session.defaultLayer), Layer.provide(Snapshot.defaultLayer), Layer.provide(layer), Layer.provide(Permission.defaultLayer), Layer.provide(Agent.defaultLayer), Layer.provide(Plugin.defaultLayer), Layer.provide(status), Layer.provide(bus), Layer.provide(Config.defaultLayer), ), ) } function wait(ms = 50) { return new Promise((resolve) => setTimeout(resolve, ms)) } function defer() { let resolve!: () => void const promise = new Promise((done) => { resolve = done }) return { promise, resolve } } function plugin(ready: ReturnType) { return Layer.mock(Plugin.Service)({ trigger: (name: Name, _input: Input, output: Output) => { if (name !== "experimental.session.compacting") return Effect.succeed(output) return Effect.sync(() => ready.resolve()).pipe(Effect.andThen(Effect.never), Effect.as(output)) }, list: () => Effect.succeed([]), init: () => Effect.void, }) } describe("session.compaction.isOverflow", () => { test("returns true when token count exceeds usable context", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const model = createModel({ context: 100_000, output: 32_000 }) const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } } expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true) }, }) }) test("returns false when token count within usable context", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const model = createModel({ context: 200_000, output: 32_000 }) const tokens = { input: 100_000, output: 10_000, reasoning: 0, cache: { read: 0, write: 0 } } expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false) }, }) }) test("includes cache.read in token count", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const model = createModel({ context: 100_000, output: 32_000 }) const tokens = { input: 60_000, output: 10_000, reasoning: 0, cache: { read: 10_000, write: 0 } } expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true) }, }) }) test("respects input limit for input caps", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const model = createModel({ context: 400_000, input: 272_000, output: 128_000 }) const tokens = { input: 271_000, output: 1_000, reasoning: 0, cache: { read: 2_000, write: 0 } } expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true) }, }) }) test("returns false when input/output are within input caps", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const model = createModel({ context: 400_000, input: 272_000, output: 128_000 }) const tokens = { input: 200_000, output: 20_000, reasoning: 0, cache: { read: 10_000, write: 0 } } expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false) }, }) }) test("returns false when output within limit with input caps", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const model = createModel({ context: 200_000, input: 120_000, output: 10_000 }) const tokens = { input: 50_000, output: 9_999, reasoning: 0, cache: { read: 0, write: 0 } } expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false) }, }) }) // ─── Bug reproduction tests ─────────────────────────────────────────── // These tests demonstrate that when limit.input is set, isOverflow() // does not subtract any headroom for the next model response. This means // compaction only triggers AFTER we've already consumed the full input // budget, leaving zero room for the next API call's output tokens. // // Compare: without limit.input, usable = context - output (reserves space). // With limit.input, usable = limit.input (reserves nothing). // // Related issues: #10634, #8089, #11086, #12621 // Open PRs: #6875, #12924 test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { // Simulate Claude with prompt caching: input limit = 200K, output limit = 32K const model = createModel({ context: 200_000, input: 200_000, output: 32_000 }) // We've used 198K tokens total. Only 2K under the input limit. // On the next turn, the full conversation (198K) becomes input, // plus the model needs room to generate output — this WILL overflow. const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } } // count = 180K + 3K + 15K = 198K // usable = limit.input = 200K (no output subtracted!) // 198K > 200K = false → no compaction triggered // WITHOUT limit.input: usable = 200K - 32K = 168K, and 198K > 168K = true ✓ // WITH limit.input: usable = 200K, and 198K > 200K = false ✗ // With 198K used and only 2K headroom, the next turn will overflow. // Compaction MUST trigger here. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true) }, }) }) test("BUG: without limit.input, same token count correctly triggers compaction", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { // Same model but without limit.input — uses context - output instead const model = createModel({ context: 200_000, output: 32_000 }) // Same token usage as above const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } } // count = 198K // usable = context - output = 200K - 32K = 168K // 198K > 168K = true → compaction correctly triggered const result = await SessionCompaction.isOverflow({ tokens, model }) expect(result).toBe(true) // ← Correct: headroom is reserved }, }) }) test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { // Two models with identical context/output limits, differing only in limit.input const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 }) const withoutInputLimit = createModel({ context: 200_000, output: 32_000 }) // 170K total tokens — well above context-output (168K) but below input limit (200K) const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } } const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit }) const withoutLimit = await SessionCompaction.isOverflow({ tokens, model: withoutInputLimit }) // Both models have identical real capacity — they should agree: expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output) expect(withoutLimit).toBe(true) // correctly compacts (170K > 168K) }, }) }) test("returns false when model context limit is 0", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const model = createModel({ context: 0, output: 32_000 }) const tokens = { input: 100_000, output: 10_000, reasoning: 0, cache: { read: 0, write: 0 } } expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false) }, }) }) test("returns false when compaction.auto is disabled", async () => { await using tmp = await tmpdir({ init: async (dir) => { await Bun.write( path.join(dir, "opencode.json"), JSON.stringify({ compaction: { auto: false }, }), ) }, }) await Instance.provide({ directory: tmp.path, fn: async () => { const model = createModel({ context: 100_000, output: 32_000 }) const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } } expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false) }, }) }) }) describe("session.compaction.create", () => { test("creates a compaction user message and part", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) await SessionCompaction.create({ sessionID: session.id, agent: "build", model: ref, auto: true, overflow: true, }) const msgs = await Session.messages({ sessionID: session.id }) expect(msgs).toHaveLength(1) expect(msgs[0].info.role).toBe("user") expect(msgs[0].parts).toHaveLength(1) expect(msgs[0].parts[0]).toMatchObject({ type: "compaction", auto: true, overflow: true, }) }, }) }) }) describe("session.compaction.prune", () => { test("compacts old completed tool output", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) const a = await user(session.id, "first") const b = await assistant(session.id, a.id, tmp.path) await tool(session.id, b.id, "bash", "x".repeat(200_000)) await user(session.id, "second") await user(session.id, "third") await SessionCompaction.prune({ sessionID: session.id }) const msgs = await Session.messages({ sessionID: session.id }) const part = msgs.flatMap((msg) => msg.parts).find((part) => part.type === "tool") expect(part?.type).toBe("tool") expect(part?.state.status).toBe("completed") if (part?.type === "tool" && part.state.status === "completed") { expect(part.state.time.compacted).toBeNumber() } }, }) }) test("skips protected skill tool output", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) const a = await user(session.id, "first") const b = await assistant(session.id, a.id, tmp.path) await tool(session.id, b.id, "skill", "x".repeat(200_000)) await user(session.id, "second") await user(session.id, "third") await SessionCompaction.prune({ sessionID: session.id }) const msgs = await Session.messages({ sessionID: session.id }) const part = msgs.flatMap((msg) => msg.parts).find((part) => part.type === "tool") expect(part?.type).toBe("tool") if (part?.type === "tool" && part.state.status === "completed") { expect(part.state.time.compacted).toBeUndefined() } }, }) }) }) describe("session.compaction.process", () => { test("throws when parent is not a user message", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) const msg = await user(session.id, "hello") const reply = await assistant(session.id, msg.id, tmp.path) const rt = runtime("continue") try { const msgs = await Session.messages({ sessionID: session.id }) await expect( rt.runPromise( SessionCompaction.Service.use((svc) => svc.process({ parentID: reply.id, messages: msgs, sessionID: session.id, auto: false, }), ), ), ).rejects.toThrow(`Compaction parent must be a user message: ${reply.id}`) } finally { await rt.dispose() } }, }) }) test("publishes compacted event on continue", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) const msg = await user(session.id, "hello") const msgs = await Session.messages({ sessionID: session.id }) const done = defer() let seen = false const rt = runtime("continue", Plugin.defaultLayer, wide()) let unsub: (() => void) | undefined try { unsub = await rt.runPromise( Bus.Service.use((svc) => svc.subscribeCallback(SessionCompaction.Event.Compacted, (evt) => { if (evt.properties.sessionID !== session.id) return seen = true done.resolve() }), ), ) const result = await rt.runPromise( SessionCompaction.Service.use((svc) => svc.process({ parentID: msg.id, messages: msgs, sessionID: session.id, auto: false, }), ), ) await Promise.race([ done.promise, wait(500).then(() => { throw new Error("timed out waiting for compacted event") }), ]) expect(result).toBe("continue") expect(seen).toBe(true) } finally { unsub?.() await rt.dispose() } }, }) }) test("marks summary message as errored on compact result", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) const msg = await user(session.id, "hello") const rt = runtime("compact", Plugin.defaultLayer, wide()) try { const msgs = await Session.messages({ sessionID: session.id }) const result = await rt.runPromise( SessionCompaction.Service.use((svc) => svc.process({ parentID: msg.id, messages: msgs, sessionID: session.id, auto: false, }), ), ) const summary = (await Session.messages({ sessionID: session.id })).find( (msg) => msg.info.role === "assistant" && msg.info.summary, ) expect(result).toBe("stop") expect(summary?.info.role).toBe("assistant") if (summary?.info.role === "assistant") { expect(summary.info.finish).toBe("error") expect(JSON.stringify(summary.info.error)).toContain("Session too large to compact") } } finally { await rt.dispose() } }, }) }) test("adds synthetic continue prompt when auto is enabled", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) const msg = await user(session.id, "hello") const rt = runtime("continue", Plugin.defaultLayer, wide()) try { const msgs = await Session.messages({ sessionID: session.id }) const result = await rt.runPromise( SessionCompaction.Service.use((svc) => svc.process({ parentID: msg.id, messages: msgs, sessionID: session.id, auto: true, }), ), ) const all = await Session.messages({ sessionID: session.id }) const last = all.at(-1) expect(result).toBe("continue") expect(last?.info.role).toBe("user") expect(last?.parts[0]).toMatchObject({ type: "text", synthetic: true, }) if (last?.parts[0]?.type === "text") { expect(last.parts[0].text).toContain("Continue if you have next steps") } } finally { await rt.dispose() } }, }) }) test("replays the prior user turn on overflow when earlier context exists", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) await user(session.id, "root") const replay = await user(session.id, "image") await Session.updatePart({ id: PartID.ascending(), messageID: replay.id, sessionID: session.id, type: "file", mime: "image/png", filename: "cat.png", url: "https://example.com/cat.png", }) const msg = await user(session.id, "current") const rt = runtime("continue", Plugin.defaultLayer, wide()) try { const msgs = await Session.messages({ sessionID: session.id }) const result = await rt.runPromise( SessionCompaction.Service.use((svc) => svc.process({ parentID: msg.id, messages: msgs, sessionID: session.id, auto: true, overflow: true, }), ), ) const last = (await Session.messages({ sessionID: session.id })).at(-1) expect(result).toBe("continue") expect(last?.info.role).toBe("user") expect(last?.parts.some((part) => part.type === "file")).toBe(false) expect( last?.parts.some((part) => part.type === "text" && part.text.includes("Attached image/png: cat.png")), ).toBe(true) } finally { await rt.dispose() } }, }) }) test("falls back to overflow guidance when no replayable turn exists", async () => { await using tmp = await tmpdir() await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) await user(session.id, "earlier") const msg = await user(session.id, "current") const rt = runtime("continue", Plugin.defaultLayer, wide()) try { const msgs = await Session.messages({ sessionID: session.id }) const result = await rt.runPromise( SessionCompaction.Service.use((svc) => svc.process({ parentID: msg.id, messages: msgs, sessionID: session.id, auto: true, overflow: true, }), ), ) const last = (await Session.messages({ sessionID: session.id })).at(-1) expect(result).toBe("continue") expect(last?.info.role).toBe("user") if (last?.parts[0]?.type === "text") { expect(last.parts[0].text).toContain("previous request exceeded the provider's size limit") } } finally { await rt.dispose() } }, }) }) test("stops quickly when aborted during retry backoff", async () => { const stub = llm() const ready = defer() stub.push( Stream.fromAsyncIterable( { async *[Symbol.asyncIterator]() { yield { type: "start" } as LLM.Event throw new APICallError({ message: "boom", url: "https://example.com/v1/chat/completions", requestBodyValues: {}, statusCode: 503, responseHeaders: { "retry-after-ms": "10000" }, responseBody: '{"error":"boom"}', isRetryable: true, }) }, }, (err) => err, ), ) await using tmp = await tmpdir({ git: true }) await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) const msg = await user(session.id, "hello") const msgs = await Session.messages({ sessionID: session.id }) const abort = new AbortController() const rt = liveRuntime(stub.layer, wide()) let off: (() => void) | undefined let run: Promise<"continue" | "stop"> | undefined try { off = await rt.runPromise( Bus.Service.use((svc) => svc.subscribeCallback(SessionStatus.Event.Status, (evt) => { if (evt.properties.sessionID !== session.id) return if (evt.properties.status.type !== "retry") return ready.resolve() }), ), ) run = rt .runPromiseExit( SessionCompaction.Service.use((svc) => svc.process({ parentID: msg.id, messages: msgs, sessionID: session.id, auto: false, }), ), { signal: abort.signal }, ) .then((exit) => { if (Exit.isFailure(exit)) { if (Cause.hasInterrupts(exit.cause) && abort.signal.aborted) return "stop" throw Cause.squash(exit.cause) } return exit.value }) await Promise.race([ ready.promise, wait(1000).then(() => { throw new Error("timed out waiting for retry status") }), ]) const start = Date.now() abort.abort() const result = await Promise.race([ run.then((value) => ({ kind: "done" as const, value, ms: Date.now() - start })), wait(250).then(() => ({ kind: "timeout" as const })), ]) expect(result.kind).toBe("done") if (result.kind === "done") { expect(result.value).toBe("stop") expect(result.ms).toBeLessThan(250) } } finally { off?.() abort.abort() await rt.dispose() await run?.catch(() => undefined) } }, }) }) test("does not leave a summary assistant when aborted before processor setup", async () => { const ready = defer() await using tmp = await tmpdir({ git: true }) await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) const msg = await user(session.id, "hello") const msgs = await Session.messages({ sessionID: session.id }) const abort = new AbortController() const rt = runtime("continue", plugin(ready), wide()) let run: Promise<"continue" | "stop"> | undefined try { run = rt .runPromiseExit( SessionCompaction.Service.use((svc) => svc.process({ parentID: msg.id, messages: msgs, sessionID: session.id, auto: false, }), ), { signal: abort.signal }, ) .then((exit) => { if (Exit.isFailure(exit)) { if (Cause.hasInterrupts(exit.cause) && abort.signal.aborted) return "stop" throw Cause.squash(exit.cause) } return exit.value }) await Promise.race([ ready.promise, wait(1000).then(() => { throw new Error("timed out waiting for compaction hook") }), ]) abort.abort() expect(await run).toBe("stop") const all = await Session.messages({ sessionID: session.id }) expect(all.some((msg) => msg.info.role === "assistant" && msg.info.summary)).toBe(false) } finally { abort.abort() await rt.dispose() await run?.catch(() => undefined) } }, }) }) test("does not allow tool calls while generating the summary", async () => { const stub = llm() stub.push( Stream.make( { type: "start" } satisfies LLM.Event, { type: "tool-input-start", id: "call-1", toolName: "_noop" } satisfies LLM.Event, { type: "tool-call", toolCallId: "call-1", toolName: "_noop", input: {} } satisfies LLM.Event, { type: "finish-step", finishReason: "tool-calls", rawFinishReason: "tool_calls", response: { id: "res", modelId: "test-model", timestamp: new Date() }, providerMetadata: undefined, usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2, inputTokenDetails: { noCacheTokens: undefined, cacheReadTokens: undefined, cacheWriteTokens: undefined, }, outputTokenDetails: { textTokens: undefined, reasoningTokens: undefined, }, }, } satisfies LLM.Event, { type: "finish", finishReason: "tool-calls", rawFinishReason: "tool_calls", totalUsage: { inputTokens: 1, outputTokens: 1, totalTokens: 2, inputTokenDetails: { noCacheTokens: undefined, cacheReadTokens: undefined, cacheWriteTokens: undefined, }, outputTokenDetails: { textTokens: undefined, reasoningTokens: undefined, }, }, } satisfies LLM.Event, ), ) await using tmp = await tmpdir({ git: true }) await Instance.provide({ directory: tmp.path, fn: async () => { const session = await Session.create({}) const msg = await user(session.id, "hello") const rt = liveRuntime(stub.layer, wide()) try { const msgs = await Session.messages({ sessionID: session.id }) await rt.runPromise( SessionCompaction.Service.use((svc) => svc.process({ parentID: msg.id, messages: msgs, sessionID: session.id, auto: false, }), ), ) const summary = (await Session.messages({ sessionID: session.id })).find( (item) => item.info.role === "assistant" && item.info.summary, ) expect(summary?.info.role).toBe("assistant") expect(summary?.parts.some((part) => part.type === "tool")).toBe(false) } finally { await rt.dispose() } }, }) }) }) describe("util.token.estimate", () => { test("estimates tokens from text (4 chars per token)", () => { const text = "x".repeat(4000) expect(Token.estimate(text)).toBe(1000) }) test("estimates tokens from larger text", () => { const text = "y".repeat(20_000) expect(Token.estimate(text)).toBe(5000) }) test("returns 0 for empty string", () => { expect(Token.estimate("")).toBe(0) }) }) describe("session.getUsage", () => { test("normalizes standard usage to token format", () => { const model = createModel({ context: 100_000, output: 32_000 }) const result = Session.getUsage({ model, usage: { inputTokens: 1000, outputTokens: 500, totalTokens: 1500, }, }) expect(result.tokens.input).toBe(1000) expect(result.tokens.output).toBe(500) expect(result.tokens.reasoning).toBe(0) expect(result.tokens.cache.read).toBe(0) expect(result.tokens.cache.write).toBe(0) }) test("extracts cached tokens to cache.read", () => { const model = createModel({ context: 100_000, output: 32_000 }) const result = Session.getUsage({ model, usage: { inputTokens: 1000, outputTokens: 500, totalTokens: 1500, cachedInputTokens: 200, }, }) expect(result.tokens.input).toBe(800) expect(result.tokens.cache.read).toBe(200) }) test("handles anthropic cache write metadata", () => { const model = createModel({ context: 100_000, output: 32_000 }) const result = Session.getUsage({ model, usage: { inputTokens: 1000, outputTokens: 500, totalTokens: 1500, }, metadata: { anthropic: { cacheCreationInputTokens: 300, }, }, }) expect(result.tokens.cache.write).toBe(300) }) test("subtracts cached tokens for anthropic provider", () => { const model = createModel({ context: 100_000, output: 32_000 }) // AI SDK v6 normalizes inputTokens to include cached tokens for all providers const result = Session.getUsage({ model, usage: { inputTokens: 1000, outputTokens: 500, totalTokens: 1500, cachedInputTokens: 200, }, metadata: { anthropic: {}, }, }) expect(result.tokens.input).toBe(800) expect(result.tokens.cache.read).toBe(200) }) test("separates reasoning tokens from output tokens", () => { const model = createModel({ context: 100_000, output: 32_000 }) const result = Session.getUsage({ model, usage: { inputTokens: 1000, outputTokens: 500, totalTokens: 1500, reasoningTokens: 100, }, }) expect(result.tokens.input).toBe(1000) expect(result.tokens.output).toBe(400) expect(result.tokens.reasoning).toBe(100) expect(result.tokens.total).toBe(1500) }) test("does not double count reasoning tokens in cost", () => { const model = createModel({ context: 100_000, output: 32_000, cost: { input: 0, output: 15, cache: { read: 0, write: 0 }, }, }) const result = Session.getUsage({ model, usage: { inputTokens: 0, outputTokens: 1_000_000, totalTokens: 1_000_000, reasoningTokens: 250_000, }, }) expect(result.tokens.output).toBe(750_000) expect(result.tokens.reasoning).toBe(250_000) expect(result.cost).toBe(15) }) test("handles undefined optional values gracefully", () => { const model = createModel({ context: 100_000, output: 32_000 }) const result = Session.getUsage({ model, usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, }, }) expect(result.tokens.input).toBe(0) expect(result.tokens.output).toBe(0) expect(result.tokens.reasoning).toBe(0) expect(result.tokens.cache.read).toBe(0) expect(result.tokens.cache.write).toBe(0) expect(Number.isNaN(result.cost)).toBe(false) }) test("calculates cost correctly", () => { const model = createModel({ context: 100_000, output: 32_000, cost: { input: 3, output: 15, cache: { read: 0.3, write: 3.75 }, }, }) const result = Session.getUsage({ model, usage: { inputTokens: 1_000_000, outputTokens: 100_000, totalTokens: 1_100_000, }, }) expect(result.cost).toBe(3 + 1.5) }) // kilocode_change start - Test for OpenRouter provider cost test("uses openrouter provider cost when available", () => { const model = createModel({ context: 100_000, output: 32_000, cost: { input: 3, output: 15, cache: { read: 0.3, write: 3.75 }, }, }) const result = Session.getUsage({ model, usage: { inputTokens: 1_000_000, outputTokens: 100_000, totalTokens: 1_100_000, }, metadata: { openrouter: { usage: { cost: 0.42, // Provider-reported cost should be used instead of calculated }, }, }, }) // Should use the provider cost (0.42) instead of calculated cost (4.5) expect(result.cost).toBe(0.42) }) test("falls back to calculated cost when openrouter cost is not available", () => { const model = createModel({ context: 100_000, output: 32_000, cost: { input: 3, output: 15, cache: { read: 0.3, write: 3.75 }, }, }) const result = Session.getUsage({ model, usage: { inputTokens: 1_000_000, outputTokens: 100_000, totalTokens: 1_100_000, }, metadata: { openrouter: { usage: { // cost is undefined }, }, }, }) // Should fall back to calculated cost expect(result.cost).toBe(3 + 1.5) }) test("falls back to calculated cost when openrouter metadata is empty", () => { const model = createModel({ context: 100_000, output: 32_000, cost: { input: 3, output: 15, cache: { read: 0.3, write: 3.75 }, }, }) const result = Session.getUsage({ model, usage: { inputTokens: 1_000_000, outputTokens: 100_000, totalTokens: 1_100_000, }, metadata: { openrouter: {}, }, }) // Should fall back to calculated cost expect(result.cost).toBe(3 + 1.5) }) test("uses upstreamInferenceCost for Kilo provider", () => { const model = createModel({ context: 100_000, output: 32_000, cost: { input: 3, output: 15, cache: { read: 0.3, write: 3.75 }, }, }) const provider = { id: "kilo" } as Provider.Info const result = Session.getUsage({ model, provider, usage: { inputTokens: 1_000_000, outputTokens: 100_000, totalTokens: 1_100_000, }, metadata: { openrouter: { usage: { cost: 0.01, // OpenRouter 5% fee costDetails: { upstreamInferenceCost: 0.2, // Actual inference cost }, }, }, }, }) // Should use upstreamInferenceCost for Kilo provider (BYOK) expect(result.cost).toBe(0.2) }) test("uses regular cost for OpenRouter provider", () => { const model = createModel({ context: 100_000, output: 32_000, cost: { input: 3, output: 15, cache: { read: 0.3, write: 3.75 }, }, }) const provider = { id: "openrouter" } as Provider.Info const result = Session.getUsage({ model, provider, usage: { inputTokens: 1_000_000, outputTokens: 100_000, totalTokens: 1_100_000, }, metadata: { openrouter: { usage: { cost: 0.5, // Regular OpenRouter cost costDetails: { upstreamInferenceCost: 0.45, }, }, }, }, }) // Should use regular cost for OpenRouter provider expect(result.cost).toBe(0.5) }) test("falls back to regular cost when provider is not specified", () => { const model = createModel({ context: 100_000, output: 32_000, cost: { input: 3, output: 15, cache: { read: 0.3, write: 3.75 }, }, }) const result = Session.getUsage({ model, usage: { inputTokens: 1_000_000, outputTokens: 100_000, totalTokens: 1_100_000, }, metadata: { openrouter: { usage: { cost: 0.3, costDetails: { upstreamInferenceCost: 0.25, }, }, }, }, }) // Should use regular cost when provider is not specified expect(result.cost).toBe(0.3) }) test("uses regular cost when upstreamInferenceCost is missing for Kilo", () => { const model = createModel({ context: 100_000, output: 32_000, cost: { input: 3, output: 15, cache: { read: 0.3, write: 3.75 }, }, }) const provider = { id: "kilo" } as Provider.Info const result = Session.getUsage({ model, provider, usage: { inputTokens: 1_000_000, outputTokens: 100_000, totalTokens: 1_100_000, }, metadata: { openrouter: { usage: { cost: 0.01, // costDetails is missing }, }, }, }) // When upstream cost is missing for Kilo, fall back to regular cost field expect(result.cost).toBe(0.01) }) // kilocode_change end test.each(["@ai-sdk/anthropic", "@ai-sdk/amazon-bedrock", "@ai-sdk/google-vertex/anthropic"])( "computes total from components for %s models", (npm) => { const model = createModel({ context: 100_000, output: 32_000, npm }) // AI SDK v6: inputTokens includes cached tokens for all providers const usage = { inputTokens: 1000, outputTokens: 500, totalTokens: 1500, cachedInputTokens: 200, } if (npm === "@ai-sdk/amazon-bedrock") { const result = Session.getUsage({ model, usage, metadata: { bedrock: { usage: { cacheWriteInputTokens: 300, }, }, }, }) // inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500 expect(result.tokens.input).toBe(500) expect(result.tokens.cache.read).toBe(200) expect(result.tokens.cache.write).toBe(300) // total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300) expect(result.tokens.total).toBe(1500) return } const result = Session.getUsage({ model, usage, metadata: { anthropic: { cacheCreationInputTokens: 300, }, }, }) // inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500 expect(result.tokens.input).toBe(500) expect(result.tokens.cache.read).toBe(200) expect(result.tokens.cache.write).toBe(300) // total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300) expect(result.tokens.total).toBe(1500) }, ) test("extracts cache write tokens from vertex metadata key", () => { const model = createModel({ context: 100_000, output: 32_000, npm: "@ai-sdk/google-vertex/anthropic" }) const result = Session.getUsage({ model, usage: { inputTokens: 1000, outputTokens: 500, totalTokens: 1500, cachedInputTokens: 200, }, metadata: { vertex: { cacheCreationInputTokens: 300, }, }, }) expect(result.tokens.input).toBe(500) expect(result.tokens.cache.read).toBe(200) expect(result.tokens.cache.write).toBe(300) }) })