compaction.test.ts 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454
  1. import { afterEach, describe, expect, mock, test } from "bun:test"
  2. import { APICallError } from "ai"
  3. import { Cause, Effect, Exit, Layer, ManagedRuntime } from "effect"
  4. import * as Stream from "effect/Stream"
  5. import path from "path"
  6. import { Bus } from "../../src/bus"
  7. import { Config } from "../../src/config/config"
  8. import { Agent } from "../../src/agent/agent"
  9. import { LLM } from "../../src/session/llm"
  10. import { SessionCompaction } from "../../src/session/compaction"
  11. import { Token } from "../../src/util/token"
  12. import { Instance } from "../../src/project/instance"
  13. import { Log } from "../../src/util/log"
  14. import { Permission } from "../../src/permission"
  15. import { Plugin } from "../../src/plugin"
  16. import { tmpdir } from "../fixture/fixture"
  17. import { Session } from "../../src/session"
  18. import { MessageV2 } from "../../src/session/message-v2"
  19. import { MessageID, PartID, SessionID } from "../../src/session/schema"
  20. import { SessionStatus } from "../../src/session/status"
  21. import { ModelID, ProviderID } from "../../src/provider/schema"
  22. import type { Provider } from "../../src/provider/provider"
  23. import * as SessionProcessorModule from "../../src/session/processor"
  24. import { Snapshot } from "../../src/snapshot"
  25. import { ProviderTest } from "../fake/provider"
  26. Log.init({ print: false })
  27. const ref = {
  28. providerID: ProviderID.make("test"),
  29. modelID: ModelID.make("test-model"),
  30. }
  31. afterEach(() => {
  32. mock.restore()
  33. })
  34. function createModel(opts: {
  35. context: number
  36. output: number
  37. input?: number
  38. cost?: Provider.Model["cost"]
  39. npm?: string
  40. }): Provider.Model {
  41. return {
  42. id: "test-model",
  43. providerID: "test",
  44. name: "Test",
  45. limit: {
  46. context: opts.context,
  47. input: opts.input,
  48. output: opts.output,
  49. },
  50. cost: opts.cost ?? { input: 0, output: 0, cache: { read: 0, write: 0 } },
  51. capabilities: {
  52. toolcall: true,
  53. attachment: false,
  54. reasoning: false,
  55. temperature: true,
  56. input: { text: true, image: false, audio: false, video: false },
  57. output: { text: true, image: false, audio: false, video: false },
  58. },
  59. api: { npm: opts.npm ?? "@ai-sdk/anthropic" },
  60. options: {},
  61. } as Provider.Model
  62. }
  63. const wide = () => ProviderTest.fake({ model: createModel({ context: 100_000, output: 32_000 }) })
  64. async function user(sessionID: SessionID, text: string) {
  65. const msg = await Session.updateMessage({
  66. id: MessageID.ascending(),
  67. role: "user",
  68. sessionID,
  69. agent: "build",
  70. model: ref,
  71. time: { created: Date.now() },
  72. })
  73. await Session.updatePart({
  74. id: PartID.ascending(),
  75. messageID: msg.id,
  76. sessionID,
  77. type: "text",
  78. text,
  79. })
  80. return msg
  81. }
  82. async function assistant(sessionID: SessionID, parentID: MessageID, root: string) {
  83. const msg: MessageV2.Assistant = {
  84. id: MessageID.ascending(),
  85. role: "assistant",
  86. sessionID,
  87. mode: "build",
  88. agent: "build",
  89. path: { cwd: root, root },
  90. cost: 0,
  91. tokens: {
  92. output: 0,
  93. input: 0,
  94. reasoning: 0,
  95. cache: { read: 0, write: 0 },
  96. },
  97. modelID: ref.modelID,
  98. providerID: ref.providerID,
  99. parentID,
  100. time: { created: Date.now() },
  101. finish: "end_turn",
  102. }
  103. await Session.updateMessage(msg)
  104. return msg
  105. }
  106. async function tool(sessionID: SessionID, messageID: MessageID, tool: string, output: string) {
  107. return Session.updatePart({
  108. id: PartID.ascending(),
  109. messageID,
  110. sessionID,
  111. type: "tool",
  112. callID: crypto.randomUUID(),
  113. tool,
  114. state: {
  115. status: "completed",
  116. input: {},
  117. output,
  118. title: "done",
  119. metadata: {},
  120. time: { start: Date.now(), end: Date.now() },
  121. },
  122. })
  123. }
  124. function fake(
  125. input: Parameters<SessionProcessorModule.SessionProcessor.Interface["create"]>[0],
  126. result: "continue" | "compact",
  127. ) {
  128. const msg = input.assistantMessage
  129. return {
  130. get message() {
  131. return msg
  132. },
  133. updateToolCall: Effect.fn("TestSessionProcessor.updateToolCall")(() => Effect.succeed(undefined)),
  134. completeToolCall: Effect.fn("TestSessionProcessor.completeToolCall")(() => Effect.void),
  135. process: Effect.fn("TestSessionProcessor.process")(() => Effect.succeed(result)),
  136. } satisfies SessionProcessorModule.SessionProcessor.Handle
  137. }
  138. function layer(result: "continue" | "compact") {
  139. return Layer.succeed(
  140. SessionProcessorModule.SessionProcessor.Service,
  141. SessionProcessorModule.SessionProcessor.Service.of({
  142. create: Effect.fn("TestSessionProcessor.create")((input) => Effect.succeed(fake(input, result))),
  143. }),
  144. )
  145. }
  146. function runtime(result: "continue" | "compact", plugin = Plugin.defaultLayer, provider = ProviderTest.fake()) {
  147. const bus = Bus.layer
  148. return ManagedRuntime.make(
  149. Layer.mergeAll(SessionCompaction.layer, bus).pipe(
  150. Layer.provide(provider.layer),
  151. Layer.provide(Session.defaultLayer),
  152. Layer.provide(layer(result)),
  153. Layer.provide(Agent.defaultLayer),
  154. Layer.provide(plugin),
  155. Layer.provide(bus),
  156. Layer.provide(Config.defaultLayer),
  157. ),
  158. )
  159. }
  160. function llm() {
  161. const queue: Array<
  162. Stream.Stream<LLM.Event, unknown> | ((input: LLM.StreamInput) => Stream.Stream<LLM.Event, unknown>)
  163. > = []
  164. return {
  165. push(stream: Stream.Stream<LLM.Event, unknown> | ((input: LLM.StreamInput) => Stream.Stream<LLM.Event, unknown>)) {
  166. queue.push(stream)
  167. },
  168. layer: Layer.succeed(
  169. LLM.Service,
  170. LLM.Service.of({
  171. stream: (input) => {
  172. const item = queue.shift() ?? Stream.empty
  173. const stream = typeof item === "function" ? item(input) : item
  174. return stream.pipe(Stream.mapEffect((event) => Effect.succeed(event)))
  175. },
  176. }),
  177. ),
  178. }
  179. }
  180. function liveRuntime(layer: Layer.Layer<LLM.Service>, provider = ProviderTest.fake()) {
  181. const bus = Bus.layer
  182. const status = SessionStatus.layer.pipe(Layer.provide(bus))
  183. const processor = SessionProcessorModule.SessionProcessor.layer
  184. return ManagedRuntime.make(
  185. Layer.mergeAll(SessionCompaction.layer.pipe(Layer.provide(processor)), processor, bus, status).pipe(
  186. Layer.provide(provider.layer),
  187. Layer.provide(Session.defaultLayer),
  188. Layer.provide(Snapshot.defaultLayer),
  189. Layer.provide(layer),
  190. Layer.provide(Permission.defaultLayer),
  191. Layer.provide(Agent.defaultLayer),
  192. Layer.provide(Plugin.defaultLayer),
  193. Layer.provide(status),
  194. Layer.provide(bus),
  195. Layer.provide(Config.defaultLayer),
  196. ),
  197. )
  198. }
  199. function wait(ms = 50) {
  200. return new Promise((resolve) => setTimeout(resolve, ms))
  201. }
  202. function defer() {
  203. let resolve!: () => void
  204. const promise = new Promise<void>((done) => {
  205. resolve = done
  206. })
  207. return { promise, resolve }
  208. }
  209. function plugin(ready: ReturnType<typeof defer>) {
  210. return Layer.mock(Plugin.Service)({
  211. trigger: <Name extends string, Input, Output>(name: Name, _input: Input, output: Output) => {
  212. if (name !== "experimental.session.compacting") return Effect.succeed(output)
  213. return Effect.sync(() => ready.resolve()).pipe(Effect.andThen(Effect.never), Effect.as(output))
  214. },
  215. list: () => Effect.succeed([]),
  216. init: () => Effect.void,
  217. })
  218. }
  219. describe("session.compaction.isOverflow", () => {
  220. test("returns true when token count exceeds usable context", async () => {
  221. await using tmp = await tmpdir()
  222. await Instance.provide({
  223. directory: tmp.path,
  224. fn: async () => {
  225. const model = createModel({ context: 100_000, output: 32_000 })
  226. const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } }
  227. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
  228. },
  229. })
  230. })
  231. test("returns false when token count within usable context", async () => {
  232. await using tmp = await tmpdir()
  233. await Instance.provide({
  234. directory: tmp.path,
  235. fn: async () => {
  236. const model = createModel({ context: 200_000, output: 32_000 })
  237. const tokens = { input: 100_000, output: 10_000, reasoning: 0, cache: { read: 0, write: 0 } }
  238. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
  239. },
  240. })
  241. })
  242. test("includes cache.read in token count", async () => {
  243. await using tmp = await tmpdir()
  244. await Instance.provide({
  245. directory: tmp.path,
  246. fn: async () => {
  247. const model = createModel({ context: 100_000, output: 32_000 })
  248. const tokens = { input: 60_000, output: 10_000, reasoning: 0, cache: { read: 10_000, write: 0 } }
  249. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
  250. },
  251. })
  252. })
  253. test("respects input limit for input caps", async () => {
  254. await using tmp = await tmpdir()
  255. await Instance.provide({
  256. directory: tmp.path,
  257. fn: async () => {
  258. const model = createModel({ context: 400_000, input: 272_000, output: 128_000 })
  259. const tokens = { input: 271_000, output: 1_000, reasoning: 0, cache: { read: 2_000, write: 0 } }
  260. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
  261. },
  262. })
  263. })
  264. test("returns false when input/output are within input caps", async () => {
  265. await using tmp = await tmpdir()
  266. await Instance.provide({
  267. directory: tmp.path,
  268. fn: async () => {
  269. const model = createModel({ context: 400_000, input: 272_000, output: 128_000 })
  270. const tokens = { input: 200_000, output: 20_000, reasoning: 0, cache: { read: 10_000, write: 0 } }
  271. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
  272. },
  273. })
  274. })
  275. test("returns false when output within limit with input caps", async () => {
  276. await using tmp = await tmpdir()
  277. await Instance.provide({
  278. directory: tmp.path,
  279. fn: async () => {
  280. const model = createModel({ context: 200_000, input: 120_000, output: 10_000 })
  281. const tokens = { input: 50_000, output: 9_999, reasoning: 0, cache: { read: 0, write: 0 } }
  282. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
  283. },
  284. })
  285. })
  286. // ─── Bug reproduction tests ───────────────────────────────────────────
  287. // These tests demonstrate that when limit.input is set, isOverflow()
  288. // does not subtract any headroom for the next model response. This means
  289. // compaction only triggers AFTER we've already consumed the full input
  290. // budget, leaving zero room for the next API call's output tokens.
  291. //
  292. // Compare: without limit.input, usable = context - output (reserves space).
  293. // With limit.input, usable = limit.input (reserves nothing).
  294. //
  295. // Related issues: #10634, #8089, #11086, #12621
  296. // Open PRs: #6875, #12924
  297. test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => {
  298. await using tmp = await tmpdir()
  299. await Instance.provide({
  300. directory: tmp.path,
  301. fn: async () => {
  302. // Simulate Claude with prompt caching: input limit = 200K, output limit = 32K
  303. const model = createModel({ context: 200_000, input: 200_000, output: 32_000 })
  304. // We've used 198K tokens total. Only 2K under the input limit.
  305. // On the next turn, the full conversation (198K) becomes input,
  306. // plus the model needs room to generate output — this WILL overflow.
  307. const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } }
  308. // count = 180K + 3K + 15K = 198K
  309. // usable = limit.input = 200K (no output subtracted!)
  310. // 198K > 200K = false → no compaction triggered
  311. // WITHOUT limit.input: usable = 200K - 32K = 168K, and 198K > 168K = true ✓
  312. // WITH limit.input: usable = 200K, and 198K > 200K = false ✗
  313. // With 198K used and only 2K headroom, the next turn will overflow.
  314. // Compaction MUST trigger here.
  315. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
  316. },
  317. })
  318. })
  319. test("BUG: without limit.input, same token count correctly triggers compaction", async () => {
  320. await using tmp = await tmpdir()
  321. await Instance.provide({
  322. directory: tmp.path,
  323. fn: async () => {
  324. // Same model but without limit.input — uses context - output instead
  325. const model = createModel({ context: 200_000, output: 32_000 })
  326. // Same token usage as above
  327. const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } }
  328. // count = 198K
  329. // usable = context - output = 200K - 32K = 168K
  330. // 198K > 168K = true → compaction correctly triggered
  331. const result = await SessionCompaction.isOverflow({ tokens, model })
  332. expect(result).toBe(true) // ← Correct: headroom is reserved
  333. },
  334. })
  335. })
  336. test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => {
  337. await using tmp = await tmpdir()
  338. await Instance.provide({
  339. directory: tmp.path,
  340. fn: async () => {
  341. // Two models with identical context/output limits, differing only in limit.input
  342. const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
  343. const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
  344. // 170K total tokens — well above context-output (168K) but below input limit (200K)
  345. const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
  346. const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit })
  347. const withoutLimit = await SessionCompaction.isOverflow({ tokens, model: withoutInputLimit })
  348. // Both models have identical real capacity — they should agree:
  349. expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output)
  350. expect(withoutLimit).toBe(true) // correctly compacts (170K > 168K)
  351. },
  352. })
  353. })
  354. test("returns false when model context limit is 0", async () => {
  355. await using tmp = await tmpdir()
  356. await Instance.provide({
  357. directory: tmp.path,
  358. fn: async () => {
  359. const model = createModel({ context: 0, output: 32_000 })
  360. const tokens = { input: 100_000, output: 10_000, reasoning: 0, cache: { read: 0, write: 0 } }
  361. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
  362. },
  363. })
  364. })
  365. test("returns false when compaction.auto is disabled", async () => {
  366. await using tmp = await tmpdir({
  367. init: async (dir) => {
  368. await Bun.write(
  369. path.join(dir, "opencode.json"),
  370. JSON.stringify({
  371. compaction: { auto: false },
  372. }),
  373. )
  374. },
  375. })
  376. await Instance.provide({
  377. directory: tmp.path,
  378. fn: async () => {
  379. const model = createModel({ context: 100_000, output: 32_000 })
  380. const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } }
  381. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
  382. },
  383. })
  384. })
  385. })
  386. describe("session.compaction.create", () => {
  387. test("creates a compaction user message and part", async () => {
  388. await using tmp = await tmpdir()
  389. await Instance.provide({
  390. directory: tmp.path,
  391. fn: async () => {
  392. const session = await Session.create({})
  393. await SessionCompaction.create({
  394. sessionID: session.id,
  395. agent: "build",
  396. model: ref,
  397. auto: true,
  398. overflow: true,
  399. })
  400. const msgs = await Session.messages({ sessionID: session.id })
  401. expect(msgs).toHaveLength(1)
  402. expect(msgs[0].info.role).toBe("user")
  403. expect(msgs[0].parts).toHaveLength(1)
  404. expect(msgs[0].parts[0]).toMatchObject({
  405. type: "compaction",
  406. auto: true,
  407. overflow: true,
  408. })
  409. },
  410. })
  411. })
  412. })
  413. describe("session.compaction.prune", () => {
  414. test("compacts old completed tool output", async () => {
  415. await using tmp = await tmpdir()
  416. await Instance.provide({
  417. directory: tmp.path,
  418. fn: async () => {
  419. const session = await Session.create({})
  420. const a = await user(session.id, "first")
  421. const b = await assistant(session.id, a.id, tmp.path)
  422. await tool(session.id, b.id, "bash", "x".repeat(200_000))
  423. await user(session.id, "second")
  424. await user(session.id, "third")
  425. await SessionCompaction.prune({ sessionID: session.id })
  426. const msgs = await Session.messages({ sessionID: session.id })
  427. const part = msgs.flatMap((msg) => msg.parts).find((part) => part.type === "tool")
  428. expect(part?.type).toBe("tool")
  429. expect(part?.state.status).toBe("completed")
  430. if (part?.type === "tool" && part.state.status === "completed") {
  431. expect(part.state.time.compacted).toBeNumber()
  432. }
  433. },
  434. })
  435. })
  436. test("skips protected skill tool output", async () => {
  437. await using tmp = await tmpdir()
  438. await Instance.provide({
  439. directory: tmp.path,
  440. fn: async () => {
  441. const session = await Session.create({})
  442. const a = await user(session.id, "first")
  443. const b = await assistant(session.id, a.id, tmp.path)
  444. await tool(session.id, b.id, "skill", "x".repeat(200_000))
  445. await user(session.id, "second")
  446. await user(session.id, "third")
  447. await SessionCompaction.prune({ sessionID: session.id })
  448. const msgs = await Session.messages({ sessionID: session.id })
  449. const part = msgs.flatMap((msg) => msg.parts).find((part) => part.type === "tool")
  450. expect(part?.type).toBe("tool")
  451. if (part?.type === "tool" && part.state.status === "completed") {
  452. expect(part.state.time.compacted).toBeUndefined()
  453. }
  454. },
  455. })
  456. })
  457. })
  458. describe("session.compaction.process", () => {
  459. test("throws when parent is not a user message", async () => {
  460. await using tmp = await tmpdir()
  461. await Instance.provide({
  462. directory: tmp.path,
  463. fn: async () => {
  464. const session = await Session.create({})
  465. const msg = await user(session.id, "hello")
  466. const reply = await assistant(session.id, msg.id, tmp.path)
  467. const rt = runtime("continue")
  468. try {
  469. const msgs = await Session.messages({ sessionID: session.id })
  470. await expect(
  471. rt.runPromise(
  472. SessionCompaction.Service.use((svc) =>
  473. svc.process({
  474. parentID: reply.id,
  475. messages: msgs,
  476. sessionID: session.id,
  477. auto: false,
  478. }),
  479. ),
  480. ),
  481. ).rejects.toThrow(`Compaction parent must be a user message: ${reply.id}`)
  482. } finally {
  483. await rt.dispose()
  484. }
  485. },
  486. })
  487. })
  488. test("publishes compacted event on continue", async () => {
  489. await using tmp = await tmpdir()
  490. await Instance.provide({
  491. directory: tmp.path,
  492. fn: async () => {
  493. const session = await Session.create({})
  494. const msg = await user(session.id, "hello")
  495. const msgs = await Session.messages({ sessionID: session.id })
  496. const done = defer()
  497. let seen = false
  498. const rt = runtime("continue", Plugin.defaultLayer, wide())
  499. let unsub: (() => void) | undefined
  500. try {
  501. unsub = await rt.runPromise(
  502. Bus.Service.use((svc) =>
  503. svc.subscribeCallback(SessionCompaction.Event.Compacted, (evt) => {
  504. if (evt.properties.sessionID !== session.id) return
  505. seen = true
  506. done.resolve()
  507. }),
  508. ),
  509. )
  510. const result = await rt.runPromise(
  511. SessionCompaction.Service.use((svc) =>
  512. svc.process({
  513. parentID: msg.id,
  514. messages: msgs,
  515. sessionID: session.id,
  516. auto: false,
  517. }),
  518. ),
  519. )
  520. await Promise.race([
  521. done.promise,
  522. wait(500).then(() => {
  523. throw new Error("timed out waiting for compacted event")
  524. }),
  525. ])
  526. expect(result).toBe("continue")
  527. expect(seen).toBe(true)
  528. } finally {
  529. unsub?.()
  530. await rt.dispose()
  531. }
  532. },
  533. })
  534. })
  535. test("marks summary message as errored on compact result", async () => {
  536. await using tmp = await tmpdir()
  537. await Instance.provide({
  538. directory: tmp.path,
  539. fn: async () => {
  540. const session = await Session.create({})
  541. const msg = await user(session.id, "hello")
  542. const rt = runtime("compact", Plugin.defaultLayer, wide())
  543. try {
  544. const msgs = await Session.messages({ sessionID: session.id })
  545. const result = await rt.runPromise(
  546. SessionCompaction.Service.use((svc) =>
  547. svc.process({
  548. parentID: msg.id,
  549. messages: msgs,
  550. sessionID: session.id,
  551. auto: false,
  552. }),
  553. ),
  554. )
  555. const summary = (await Session.messages({ sessionID: session.id })).find(
  556. (msg) => msg.info.role === "assistant" && msg.info.summary,
  557. )
  558. expect(result).toBe("stop")
  559. expect(summary?.info.role).toBe("assistant")
  560. if (summary?.info.role === "assistant") {
  561. expect(summary.info.finish).toBe("error")
  562. expect(JSON.stringify(summary.info.error)).toContain("Session too large to compact")
  563. }
  564. } finally {
  565. await rt.dispose()
  566. }
  567. },
  568. })
  569. })
  570. test("adds synthetic continue prompt when auto is enabled", async () => {
  571. await using tmp = await tmpdir()
  572. await Instance.provide({
  573. directory: tmp.path,
  574. fn: async () => {
  575. const session = await Session.create({})
  576. const msg = await user(session.id, "hello")
  577. const rt = runtime("continue", Plugin.defaultLayer, wide())
  578. try {
  579. const msgs = await Session.messages({ sessionID: session.id })
  580. const result = await rt.runPromise(
  581. SessionCompaction.Service.use((svc) =>
  582. svc.process({
  583. parentID: msg.id,
  584. messages: msgs,
  585. sessionID: session.id,
  586. auto: true,
  587. }),
  588. ),
  589. )
  590. const all = await Session.messages({ sessionID: session.id })
  591. const last = all.at(-1)
  592. expect(result).toBe("continue")
  593. expect(last?.info.role).toBe("user")
  594. expect(last?.parts[0]).toMatchObject({
  595. type: "text",
  596. synthetic: true,
  597. })
  598. if (last?.parts[0]?.type === "text") {
  599. expect(last.parts[0].text).toContain("Continue if you have next steps")
  600. }
  601. } finally {
  602. await rt.dispose()
  603. }
  604. },
  605. })
  606. })
  607. test("replays the prior user turn on overflow when earlier context exists", async () => {
  608. await using tmp = await tmpdir()
  609. await Instance.provide({
  610. directory: tmp.path,
  611. fn: async () => {
  612. const session = await Session.create({})
  613. await user(session.id, "root")
  614. const replay = await user(session.id, "image")
  615. await Session.updatePart({
  616. id: PartID.ascending(),
  617. messageID: replay.id,
  618. sessionID: session.id,
  619. type: "file",
  620. mime: "image/png",
  621. filename: "cat.png",
  622. url: "https://example.com/cat.png",
  623. })
  624. const msg = await user(session.id, "current")
  625. const rt = runtime("continue", Plugin.defaultLayer, wide())
  626. try {
  627. const msgs = await Session.messages({ sessionID: session.id })
  628. const result = await rt.runPromise(
  629. SessionCompaction.Service.use((svc) =>
  630. svc.process({
  631. parentID: msg.id,
  632. messages: msgs,
  633. sessionID: session.id,
  634. auto: true,
  635. overflow: true,
  636. }),
  637. ),
  638. )
  639. const last = (await Session.messages({ sessionID: session.id })).at(-1)
  640. expect(result).toBe("continue")
  641. expect(last?.info.role).toBe("user")
  642. expect(last?.parts.some((part) => part.type === "file")).toBe(false)
  643. expect(
  644. last?.parts.some((part) => part.type === "text" && part.text.includes("Attached image/png: cat.png")),
  645. ).toBe(true)
  646. } finally {
  647. await rt.dispose()
  648. }
  649. },
  650. })
  651. })
  652. test("falls back to overflow guidance when no replayable turn exists", async () => {
  653. await using tmp = await tmpdir()
  654. await Instance.provide({
  655. directory: tmp.path,
  656. fn: async () => {
  657. const session = await Session.create({})
  658. await user(session.id, "earlier")
  659. const msg = await user(session.id, "current")
  660. const rt = runtime("continue", Plugin.defaultLayer, wide())
  661. try {
  662. const msgs = await Session.messages({ sessionID: session.id })
  663. const result = await rt.runPromise(
  664. SessionCompaction.Service.use((svc) =>
  665. svc.process({
  666. parentID: msg.id,
  667. messages: msgs,
  668. sessionID: session.id,
  669. auto: true,
  670. overflow: true,
  671. }),
  672. ),
  673. )
  674. const last = (await Session.messages({ sessionID: session.id })).at(-1)
  675. expect(result).toBe("continue")
  676. expect(last?.info.role).toBe("user")
  677. if (last?.parts[0]?.type === "text") {
  678. expect(last.parts[0].text).toContain("previous request exceeded the provider's size limit")
  679. }
  680. } finally {
  681. await rt.dispose()
  682. }
  683. },
  684. })
  685. })
  686. test("stops quickly when aborted during retry backoff", async () => {
  687. const stub = llm()
  688. const ready = defer()
  689. stub.push(
  690. Stream.fromAsyncIterable(
  691. {
  692. async *[Symbol.asyncIterator]() {
  693. yield { type: "start" } as LLM.Event
  694. throw new APICallError({
  695. message: "boom",
  696. url: "https://example.com/v1/chat/completions",
  697. requestBodyValues: {},
  698. statusCode: 503,
  699. responseHeaders: { "retry-after-ms": "10000" },
  700. responseBody: '{"error":"boom"}',
  701. isRetryable: true,
  702. })
  703. },
  704. },
  705. (err) => err,
  706. ),
  707. )
  708. await using tmp = await tmpdir({ git: true })
  709. await Instance.provide({
  710. directory: tmp.path,
  711. fn: async () => {
  712. const session = await Session.create({})
  713. const msg = await user(session.id, "hello")
  714. const msgs = await Session.messages({ sessionID: session.id })
  715. const abort = new AbortController()
  716. const rt = liveRuntime(stub.layer, wide())
  717. let off: (() => void) | undefined
  718. let run: Promise<"continue" | "stop"> | undefined
  719. try {
  720. off = await rt.runPromise(
  721. Bus.Service.use((svc) =>
  722. svc.subscribeCallback(SessionStatus.Event.Status, (evt) => {
  723. if (evt.properties.sessionID !== session.id) return
  724. if (evt.properties.status.type !== "retry") return
  725. ready.resolve()
  726. }),
  727. ),
  728. )
  729. run = rt
  730. .runPromiseExit(
  731. SessionCompaction.Service.use((svc) =>
  732. svc.process({
  733. parentID: msg.id,
  734. messages: msgs,
  735. sessionID: session.id,
  736. auto: false,
  737. }),
  738. ),
  739. { signal: abort.signal },
  740. )
  741. .then((exit) => {
  742. if (Exit.isFailure(exit)) {
  743. if (Cause.hasInterrupts(exit.cause) && abort.signal.aborted) return "stop"
  744. throw Cause.squash(exit.cause)
  745. }
  746. return exit.value
  747. })
  748. await Promise.race([
  749. ready.promise,
  750. wait(1000).then(() => {
  751. throw new Error("timed out waiting for retry status")
  752. }),
  753. ])
  754. const start = Date.now()
  755. abort.abort()
  756. const result = await Promise.race([
  757. run.then((value) => ({ kind: "done" as const, value, ms: Date.now() - start })),
  758. wait(250).then(() => ({ kind: "timeout" as const })),
  759. ])
  760. expect(result.kind).toBe("done")
  761. if (result.kind === "done") {
  762. expect(result.value).toBe("stop")
  763. expect(result.ms).toBeLessThan(250)
  764. }
  765. } finally {
  766. off?.()
  767. abort.abort()
  768. await rt.dispose()
  769. await run?.catch(() => undefined)
  770. }
  771. },
  772. })
  773. })
  774. test("does not leave a summary assistant when aborted before processor setup", async () => {
  775. const ready = defer()
  776. await using tmp = await tmpdir({ git: true })
  777. await Instance.provide({
  778. directory: tmp.path,
  779. fn: async () => {
  780. const session = await Session.create({})
  781. const msg = await user(session.id, "hello")
  782. const msgs = await Session.messages({ sessionID: session.id })
  783. const abort = new AbortController()
  784. const rt = runtime("continue", plugin(ready), wide())
  785. let run: Promise<"continue" | "stop"> | undefined
  786. try {
  787. run = rt
  788. .runPromiseExit(
  789. SessionCompaction.Service.use((svc) =>
  790. svc.process({
  791. parentID: msg.id,
  792. messages: msgs,
  793. sessionID: session.id,
  794. auto: false,
  795. }),
  796. ),
  797. { signal: abort.signal },
  798. )
  799. .then((exit) => {
  800. if (Exit.isFailure(exit)) {
  801. if (Cause.hasInterrupts(exit.cause) && abort.signal.aborted) return "stop"
  802. throw Cause.squash(exit.cause)
  803. }
  804. return exit.value
  805. })
  806. await Promise.race([
  807. ready.promise,
  808. wait(1000).then(() => {
  809. throw new Error("timed out waiting for compaction hook")
  810. }),
  811. ])
  812. abort.abort()
  813. expect(await run).toBe("stop")
  814. const all = await Session.messages({ sessionID: session.id })
  815. expect(all.some((msg) => msg.info.role === "assistant" && msg.info.summary)).toBe(false)
  816. } finally {
  817. abort.abort()
  818. await rt.dispose()
  819. await run?.catch(() => undefined)
  820. }
  821. },
  822. })
  823. })
  824. test("does not allow tool calls while generating the summary", async () => {
  825. const stub = llm()
  826. stub.push(
  827. Stream.make(
  828. { type: "start" } satisfies LLM.Event,
  829. { type: "tool-input-start", id: "call-1", toolName: "_noop" } satisfies LLM.Event,
  830. { type: "tool-call", toolCallId: "call-1", toolName: "_noop", input: {} } satisfies LLM.Event,
  831. {
  832. type: "finish-step",
  833. finishReason: "tool-calls",
  834. rawFinishReason: "tool_calls",
  835. response: { id: "res", modelId: "test-model", timestamp: new Date() },
  836. providerMetadata: undefined,
  837. usage: {
  838. inputTokens: 1,
  839. outputTokens: 1,
  840. totalTokens: 2,
  841. inputTokenDetails: {
  842. noCacheTokens: undefined,
  843. cacheReadTokens: undefined,
  844. cacheWriteTokens: undefined,
  845. },
  846. outputTokenDetails: {
  847. textTokens: undefined,
  848. reasoningTokens: undefined,
  849. },
  850. },
  851. } satisfies LLM.Event,
  852. {
  853. type: "finish",
  854. finishReason: "tool-calls",
  855. rawFinishReason: "tool_calls",
  856. totalUsage: {
  857. inputTokens: 1,
  858. outputTokens: 1,
  859. totalTokens: 2,
  860. inputTokenDetails: {
  861. noCacheTokens: undefined,
  862. cacheReadTokens: undefined,
  863. cacheWriteTokens: undefined,
  864. },
  865. outputTokenDetails: {
  866. textTokens: undefined,
  867. reasoningTokens: undefined,
  868. },
  869. },
  870. } satisfies LLM.Event,
  871. ),
  872. )
  873. await using tmp = await tmpdir({ git: true })
  874. await Instance.provide({
  875. directory: tmp.path,
  876. fn: async () => {
  877. const session = await Session.create({})
  878. const msg = await user(session.id, "hello")
  879. const rt = liveRuntime(stub.layer, wide())
  880. try {
  881. const msgs = await Session.messages({ sessionID: session.id })
  882. await rt.runPromise(
  883. SessionCompaction.Service.use((svc) =>
  884. svc.process({
  885. parentID: msg.id,
  886. messages: msgs,
  887. sessionID: session.id,
  888. auto: false,
  889. }),
  890. ),
  891. )
  892. const summary = (await Session.messages({ sessionID: session.id })).find(
  893. (item) => item.info.role === "assistant" && item.info.summary,
  894. )
  895. expect(summary?.info.role).toBe("assistant")
  896. expect(summary?.parts.some((part) => part.type === "tool")).toBe(false)
  897. } finally {
  898. await rt.dispose()
  899. }
  900. },
  901. })
  902. })
  903. })
  904. describe("util.token.estimate", () => {
  905. test("estimates tokens from text (4 chars per token)", () => {
  906. const text = "x".repeat(4000)
  907. expect(Token.estimate(text)).toBe(1000)
  908. })
  909. test("estimates tokens from larger text", () => {
  910. const text = "y".repeat(20_000)
  911. expect(Token.estimate(text)).toBe(5000)
  912. })
  913. test("returns 0 for empty string", () => {
  914. expect(Token.estimate("")).toBe(0)
  915. })
  916. })
  917. describe("session.getUsage", () => {
  918. test("normalizes standard usage to token format", () => {
  919. const model = createModel({ context: 100_000, output: 32_000 })
  920. const result = Session.getUsage({
  921. model,
  922. usage: {
  923. inputTokens: 1000,
  924. outputTokens: 500,
  925. totalTokens: 1500,
  926. },
  927. })
  928. expect(result.tokens.input).toBe(1000)
  929. expect(result.tokens.output).toBe(500)
  930. expect(result.tokens.reasoning).toBe(0)
  931. expect(result.tokens.cache.read).toBe(0)
  932. expect(result.tokens.cache.write).toBe(0)
  933. })
  934. test("extracts cached tokens to cache.read", () => {
  935. const model = createModel({ context: 100_000, output: 32_000 })
  936. const result = Session.getUsage({
  937. model,
  938. usage: {
  939. inputTokens: 1000,
  940. outputTokens: 500,
  941. totalTokens: 1500,
  942. cachedInputTokens: 200,
  943. },
  944. })
  945. expect(result.tokens.input).toBe(800)
  946. expect(result.tokens.cache.read).toBe(200)
  947. })
  948. test("handles anthropic cache write metadata", () => {
  949. const model = createModel({ context: 100_000, output: 32_000 })
  950. const result = Session.getUsage({
  951. model,
  952. usage: {
  953. inputTokens: 1000,
  954. outputTokens: 500,
  955. totalTokens: 1500,
  956. },
  957. metadata: {
  958. anthropic: {
  959. cacheCreationInputTokens: 300,
  960. },
  961. },
  962. })
  963. expect(result.tokens.cache.write).toBe(300)
  964. })
  965. test("subtracts cached tokens for anthropic provider", () => {
  966. const model = createModel({ context: 100_000, output: 32_000 })
  967. // AI SDK v6 normalizes inputTokens to include cached tokens for all providers
  968. const result = Session.getUsage({
  969. model,
  970. usage: {
  971. inputTokens: 1000,
  972. outputTokens: 500,
  973. totalTokens: 1500,
  974. cachedInputTokens: 200,
  975. },
  976. metadata: {
  977. anthropic: {},
  978. },
  979. })
  980. expect(result.tokens.input).toBe(800)
  981. expect(result.tokens.cache.read).toBe(200)
  982. })
  983. test("separates reasoning tokens from output tokens", () => {
  984. const model = createModel({ context: 100_000, output: 32_000 })
  985. const result = Session.getUsage({
  986. model,
  987. usage: {
  988. inputTokens: 1000,
  989. outputTokens: 500,
  990. totalTokens: 1500,
  991. reasoningTokens: 100,
  992. },
  993. })
  994. expect(result.tokens.input).toBe(1000)
  995. expect(result.tokens.output).toBe(400)
  996. expect(result.tokens.reasoning).toBe(100)
  997. expect(result.tokens.total).toBe(1500)
  998. })
  999. test("does not double count reasoning tokens in cost", () => {
  1000. const model = createModel({
  1001. context: 100_000,
  1002. output: 32_000,
  1003. cost: {
  1004. input: 0,
  1005. output: 15,
  1006. cache: { read: 0, write: 0 },
  1007. },
  1008. })
  1009. const result = Session.getUsage({
  1010. model,
  1011. usage: {
  1012. inputTokens: 0,
  1013. outputTokens: 1_000_000,
  1014. totalTokens: 1_000_000,
  1015. reasoningTokens: 250_000,
  1016. },
  1017. })
  1018. expect(result.tokens.output).toBe(750_000)
  1019. expect(result.tokens.reasoning).toBe(250_000)
  1020. expect(result.cost).toBe(15)
  1021. })
  1022. test("handles undefined optional values gracefully", () => {
  1023. const model = createModel({ context: 100_000, output: 32_000 })
  1024. const result = Session.getUsage({
  1025. model,
  1026. usage: {
  1027. inputTokens: 0,
  1028. outputTokens: 0,
  1029. totalTokens: 0,
  1030. },
  1031. })
  1032. expect(result.tokens.input).toBe(0)
  1033. expect(result.tokens.output).toBe(0)
  1034. expect(result.tokens.reasoning).toBe(0)
  1035. expect(result.tokens.cache.read).toBe(0)
  1036. expect(result.tokens.cache.write).toBe(0)
  1037. expect(Number.isNaN(result.cost)).toBe(false)
  1038. })
  1039. test("calculates cost correctly", () => {
  1040. const model = createModel({
  1041. context: 100_000,
  1042. output: 32_000,
  1043. cost: {
  1044. input: 3,
  1045. output: 15,
  1046. cache: { read: 0.3, write: 3.75 },
  1047. },
  1048. })
  1049. const result = Session.getUsage({
  1050. model,
  1051. usage: {
  1052. inputTokens: 1_000_000,
  1053. outputTokens: 100_000,
  1054. totalTokens: 1_100_000,
  1055. },
  1056. })
  1057. expect(result.cost).toBe(3 + 1.5)
  1058. })
  1059. // kilocode_change start - Test for OpenRouter provider cost
  1060. test("uses openrouter provider cost when available", () => {
  1061. const model = createModel({
  1062. context: 100_000,
  1063. output: 32_000,
  1064. cost: {
  1065. input: 3,
  1066. output: 15,
  1067. cache: { read: 0.3, write: 3.75 },
  1068. },
  1069. })
  1070. const result = Session.getUsage({
  1071. model,
  1072. usage: {
  1073. inputTokens: 1_000_000,
  1074. outputTokens: 100_000,
  1075. totalTokens: 1_100_000,
  1076. },
  1077. metadata: {
  1078. openrouter: {
  1079. usage: {
  1080. cost: 0.42, // Provider-reported cost should be used instead of calculated
  1081. },
  1082. },
  1083. },
  1084. })
  1085. // Should use the provider cost (0.42) instead of calculated cost (4.5)
  1086. expect(result.cost).toBe(0.42)
  1087. })
  1088. test("falls back to calculated cost when openrouter cost is not available", () => {
  1089. const model = createModel({
  1090. context: 100_000,
  1091. output: 32_000,
  1092. cost: {
  1093. input: 3,
  1094. output: 15,
  1095. cache: { read: 0.3, write: 3.75 },
  1096. },
  1097. })
  1098. const result = Session.getUsage({
  1099. model,
  1100. usage: {
  1101. inputTokens: 1_000_000,
  1102. outputTokens: 100_000,
  1103. totalTokens: 1_100_000,
  1104. },
  1105. metadata: {
  1106. openrouter: {
  1107. usage: {
  1108. // cost is undefined
  1109. },
  1110. },
  1111. },
  1112. })
  1113. // Should fall back to calculated cost
  1114. expect(result.cost).toBe(3 + 1.5)
  1115. })
  1116. test("falls back to calculated cost when openrouter metadata is empty", () => {
  1117. const model = createModel({
  1118. context: 100_000,
  1119. output: 32_000,
  1120. cost: {
  1121. input: 3,
  1122. output: 15,
  1123. cache: { read: 0.3, write: 3.75 },
  1124. },
  1125. })
  1126. const result = Session.getUsage({
  1127. model,
  1128. usage: {
  1129. inputTokens: 1_000_000,
  1130. outputTokens: 100_000,
  1131. totalTokens: 1_100_000,
  1132. },
  1133. metadata: {
  1134. openrouter: {},
  1135. },
  1136. })
  1137. // Should fall back to calculated cost
  1138. expect(result.cost).toBe(3 + 1.5)
  1139. })
  1140. test("uses upstreamInferenceCost for Kilo provider", () => {
  1141. const model = createModel({
  1142. context: 100_000,
  1143. output: 32_000,
  1144. cost: {
  1145. input: 3,
  1146. output: 15,
  1147. cache: { read: 0.3, write: 3.75 },
  1148. },
  1149. })
  1150. const provider = { id: "kilo" } as Provider.Info
  1151. const result = Session.getUsage({
  1152. model,
  1153. provider,
  1154. usage: {
  1155. inputTokens: 1_000_000,
  1156. outputTokens: 100_000,
  1157. totalTokens: 1_100_000,
  1158. },
  1159. metadata: {
  1160. openrouter: {
  1161. usage: {
  1162. cost: 0.01, // OpenRouter 5% fee
  1163. costDetails: {
  1164. upstreamInferenceCost: 0.2, // Actual inference cost
  1165. },
  1166. },
  1167. },
  1168. },
  1169. })
  1170. // Should use upstreamInferenceCost for Kilo provider (BYOK)
  1171. expect(result.cost).toBe(0.2)
  1172. })
  1173. test("uses regular cost for OpenRouter provider", () => {
  1174. const model = createModel({
  1175. context: 100_000,
  1176. output: 32_000,
  1177. cost: {
  1178. input: 3,
  1179. output: 15,
  1180. cache: { read: 0.3, write: 3.75 },
  1181. },
  1182. })
  1183. const provider = { id: "openrouter" } as Provider.Info
  1184. const result = Session.getUsage({
  1185. model,
  1186. provider,
  1187. usage: {
  1188. inputTokens: 1_000_000,
  1189. outputTokens: 100_000,
  1190. totalTokens: 1_100_000,
  1191. },
  1192. metadata: {
  1193. openrouter: {
  1194. usage: {
  1195. cost: 0.5, // Regular OpenRouter cost
  1196. costDetails: {
  1197. upstreamInferenceCost: 0.45,
  1198. },
  1199. },
  1200. },
  1201. },
  1202. })
  1203. // Should use regular cost for OpenRouter provider
  1204. expect(result.cost).toBe(0.5)
  1205. })
  1206. test("falls back to regular cost when provider is not specified", () => {
  1207. const model = createModel({
  1208. context: 100_000,
  1209. output: 32_000,
  1210. cost: {
  1211. input: 3,
  1212. output: 15,
  1213. cache: { read: 0.3, write: 3.75 },
  1214. },
  1215. })
  1216. const result = Session.getUsage({
  1217. model,
  1218. usage: {
  1219. inputTokens: 1_000_000,
  1220. outputTokens: 100_000,
  1221. totalTokens: 1_100_000,
  1222. },
  1223. metadata: {
  1224. openrouter: {
  1225. usage: {
  1226. cost: 0.3,
  1227. costDetails: {
  1228. upstreamInferenceCost: 0.25,
  1229. },
  1230. },
  1231. },
  1232. },
  1233. })
  1234. // Should use regular cost when provider is not specified
  1235. expect(result.cost).toBe(0.3)
  1236. })
  1237. test("uses regular cost when upstreamInferenceCost is missing for Kilo", () => {
  1238. const model = createModel({
  1239. context: 100_000,
  1240. output: 32_000,
  1241. cost: {
  1242. input: 3,
  1243. output: 15,
  1244. cache: { read: 0.3, write: 3.75 },
  1245. },
  1246. })
  1247. const provider = { id: "kilo" } as Provider.Info
  1248. const result = Session.getUsage({
  1249. model,
  1250. provider,
  1251. usage: {
  1252. inputTokens: 1_000_000,
  1253. outputTokens: 100_000,
  1254. totalTokens: 1_100_000,
  1255. },
  1256. metadata: {
  1257. openrouter: {
  1258. usage: {
  1259. cost: 0.01,
  1260. // costDetails is missing
  1261. },
  1262. },
  1263. },
  1264. })
  1265. // When upstream cost is missing for Kilo, fall back to regular cost field
  1266. expect(result.cost).toBe(0.01)
  1267. })
  1268. // kilocode_change end
  1269. test.each(["@ai-sdk/anthropic", "@ai-sdk/amazon-bedrock", "@ai-sdk/google-vertex/anthropic"])(
  1270. "computes total from components for %s models",
  1271. (npm) => {
  1272. const model = createModel({ context: 100_000, output: 32_000, npm })
  1273. // AI SDK v6: inputTokens includes cached tokens for all providers
  1274. const usage = {
  1275. inputTokens: 1000,
  1276. outputTokens: 500,
  1277. totalTokens: 1500,
  1278. cachedInputTokens: 200,
  1279. }
  1280. if (npm === "@ai-sdk/amazon-bedrock") {
  1281. const result = Session.getUsage({
  1282. model,
  1283. usage,
  1284. metadata: {
  1285. bedrock: {
  1286. usage: {
  1287. cacheWriteInputTokens: 300,
  1288. },
  1289. },
  1290. },
  1291. })
  1292. // inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
  1293. expect(result.tokens.input).toBe(500)
  1294. expect(result.tokens.cache.read).toBe(200)
  1295. expect(result.tokens.cache.write).toBe(300)
  1296. // total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
  1297. expect(result.tokens.total).toBe(1500)
  1298. return
  1299. }
  1300. const result = Session.getUsage({
  1301. model,
  1302. usage,
  1303. metadata: {
  1304. anthropic: {
  1305. cacheCreationInputTokens: 300,
  1306. },
  1307. },
  1308. })
  1309. // inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
  1310. expect(result.tokens.input).toBe(500)
  1311. expect(result.tokens.cache.read).toBe(200)
  1312. expect(result.tokens.cache.write).toBe(300)
  1313. // total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
  1314. expect(result.tokens.total).toBe(1500)
  1315. },
  1316. )
  1317. test("extracts cache write tokens from vertex metadata key", () => {
  1318. const model = createModel({ context: 100_000, output: 32_000, npm: "@ai-sdk/google-vertex/anthropic" })
  1319. const result = Session.getUsage({
  1320. model,
  1321. usage: {
  1322. inputTokens: 1000,
  1323. outputTokens: 500,
  1324. totalTokens: 1500,
  1325. cachedInputTokens: 200,
  1326. },
  1327. metadata: {
  1328. vertex: {
  1329. cacheCreationInputTokens: 300,
  1330. },
  1331. },
  1332. })
  1333. expect(result.tokens.input).toBe(500)
  1334. expect(result.tokens.cache.read).toBe(200)
  1335. expect(result.tokens.cache.write).toBe(300)
  1336. })
  1337. })