compaction.test.ts 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531
  1. import { afterEach, describe, expect, mock, test } from "bun:test"
  2. import { APICallError } from "ai"
  3. import { Cause, Effect, Exit, Layer, ManagedRuntime } from "effect"
  4. import * as Stream from "effect/Stream"
  5. import z from "zod"
  6. import { Bus } from "../../src/bus"
  7. import { Config } from "../../src/config"
  8. import { Agent } from "../../src/agent/agent"
  9. import { LLM } from "../../src/session/llm"
  10. import { SessionCompaction } from "../../src/session/compaction"
  11. import { Token } from "../../src/util"
  12. import { Instance } from "../../src/project/instance"
  13. import { Log } from "../../src/util"
  14. import { Permission } from "../../src/permission"
  15. import { Plugin } from "../../src/plugin/plugin"
  16. import { provideTmpdirInstance, tmpdir } from "../fixture/fixture"
  17. import { Session as SessionNs } from "../../src/session"
  18. import { MessageV2 } from "../../src/session/message-v2"
  19. import { MessageID, PartID, SessionID } from "../../src/session/schema"
  20. import { SessionStatus } from "../../src/session/status"
  21. import { SessionSummary } from "../../src/session/summary"
  22. import { ModelID, ProviderID } from "../../src/provider/schema"
  23. import type { Provider } from "../../src/provider"
  24. import * as SessionProcessorModule from "../../src/session/processor"
  25. import { Snapshot } from "../../src/snapshot"
  26. import { ProviderTest } from "../fake/provider"
  27. import { testEffect } from "../lib/effect"
  28. import * as CrossSpawnSpawner from "../../src/effect/cross-spawn-spawner"
  29. void Log.init({ print: false })
  30. function run<A, E>(fx: Effect.Effect<A, E, SessionNs.Service>) {
  31. return Effect.runPromise(fx.pipe(Effect.provide(SessionNs.defaultLayer)))
  32. }
  33. const svc = {
  34. ...SessionNs,
  35. create(input?: SessionNs.CreateInput) {
  36. return run(SessionNs.Service.use((svc) => svc.create(input)))
  37. },
  38. messages(input: z.output<typeof SessionNs.MessagesInput>) {
  39. return run(SessionNs.Service.use((svc) => svc.messages(input)))
  40. },
  41. updateMessage<T extends MessageV2.Info>(msg: T) {
  42. return run(SessionNs.Service.use((svc) => svc.updateMessage(msg)))
  43. },
  44. updatePart<T extends MessageV2.Part>(part: T) {
  45. return run(SessionNs.Service.use((svc) => svc.updatePart(part)))
  46. },
  47. }
  48. const summary = Layer.succeed(
  49. SessionSummary.Service,
  50. SessionSummary.Service.of({
  51. summarize: () => Effect.void,
  52. diff: () => Effect.succeed([]),
  53. computeDiff: () => Effect.succeed([]),
  54. }),
  55. )
  56. const ref = {
  57. providerID: ProviderID.make("test"),
  58. modelID: ModelID.make("test-model"),
  59. }
  60. afterEach(() => {
  61. mock.restore()
  62. })
  63. function createModel(opts: {
  64. context: number
  65. output: number
  66. input?: number
  67. cost?: Provider.Model["cost"]
  68. npm?: string
  69. }): Provider.Model {
  70. return {
  71. id: "test-model",
  72. providerID: "test",
  73. name: "Test",
  74. limit: {
  75. context: opts.context,
  76. input: opts.input,
  77. output: opts.output,
  78. },
  79. cost: opts.cost ?? { input: 0, output: 0, cache: { read: 0, write: 0 } },
  80. capabilities: {
  81. toolcall: true,
  82. attachment: false,
  83. reasoning: false,
  84. temperature: true,
  85. input: { text: true, image: false, audio: false, video: false },
  86. output: { text: true, image: false, audio: false, video: false },
  87. },
  88. api: { npm: opts.npm ?? "@ai-sdk/anthropic" },
  89. options: {},
  90. } as Provider.Model
  91. }
  92. const wide = () => ProviderTest.fake({ model: createModel({ context: 100_000, output: 32_000 }) })
  93. async function user(sessionID: SessionID, text: string) {
  94. const msg = await svc.updateMessage({
  95. id: MessageID.ascending(),
  96. role: "user",
  97. sessionID,
  98. agent: "build",
  99. model: ref,
  100. time: { created: Date.now() },
  101. })
  102. await svc.updatePart({
  103. id: PartID.ascending(),
  104. messageID: msg.id,
  105. sessionID,
  106. type: "text",
  107. text,
  108. })
  109. return msg
  110. }
  111. async function assistant(sessionID: SessionID, parentID: MessageID, root: string) {
  112. const msg: MessageV2.Assistant = {
  113. id: MessageID.ascending(),
  114. role: "assistant",
  115. sessionID,
  116. mode: "build",
  117. agent: "build",
  118. path: { cwd: root, root },
  119. cost: 0,
  120. tokens: {
  121. output: 0,
  122. input: 0,
  123. reasoning: 0,
  124. cache: { read: 0, write: 0 },
  125. },
  126. modelID: ref.modelID,
  127. providerID: ref.providerID,
  128. parentID,
  129. time: { created: Date.now() },
  130. finish: "end_turn",
  131. }
  132. await svc.updateMessage(msg)
  133. return msg
  134. }
  135. function fake(
  136. input: Parameters<SessionProcessorModule.SessionProcessor.Interface["create"]>[0],
  137. result: "continue" | "compact",
  138. ) {
  139. const msg = input.assistantMessage
  140. return {
  141. get message() {
  142. return msg
  143. },
  144. updateToolCall: Effect.fn("TestSessionProcessor.updateToolCall")(() => Effect.succeed(undefined)),
  145. completeToolCall: Effect.fn("TestSessionProcessor.completeToolCall")(() => Effect.void),
  146. process: Effect.fn("TestSessionProcessor.process")(() => Effect.succeed(result)),
  147. } satisfies SessionProcessorModule.SessionProcessor.Handle
  148. }
  149. function layer(result: "continue" | "compact") {
  150. return Layer.succeed(
  151. SessionProcessorModule.SessionProcessor.Service,
  152. SessionProcessorModule.SessionProcessor.Service.of({
  153. create: Effect.fn("TestSessionProcessor.create")((input) => Effect.succeed(fake(input, result))),
  154. }),
  155. )
  156. }
  157. function runtime(result: "continue" | "compact", plugin = Plugin.defaultLayer, provider = ProviderTest.fake()) {
  158. const bus = Bus.layer
  159. return ManagedRuntime.make(
  160. Layer.mergeAll(SessionCompaction.layer, bus).pipe(
  161. Layer.provide(provider.layer),
  162. Layer.provide(SessionNs.defaultLayer),
  163. Layer.provide(layer(result)),
  164. Layer.provide(Agent.defaultLayer),
  165. Layer.provide(plugin),
  166. Layer.provide(bus),
  167. Layer.provide(Config.defaultLayer),
  168. ),
  169. )
  170. }
  171. const deps = Layer.mergeAll(
  172. ProviderTest.fake().layer,
  173. layer("continue"),
  174. Agent.defaultLayer,
  175. Plugin.defaultLayer,
  176. Bus.layer,
  177. Config.defaultLayer,
  178. )
  179. const env = Layer.mergeAll(
  180. SessionNs.defaultLayer,
  181. CrossSpawnSpawner.defaultLayer,
  182. SessionCompaction.layer.pipe(Layer.provide(SessionNs.defaultLayer), Layer.provideMerge(deps)),
  183. )
  184. const it = testEffect(env)
  185. function llm() {
  186. const queue: Array<
  187. Stream.Stream<LLM.Event, unknown> | ((input: LLM.StreamInput) => Stream.Stream<LLM.Event, unknown>)
  188. > = []
  189. return {
  190. push(stream: Stream.Stream<LLM.Event, unknown> | ((input: LLM.StreamInput) => Stream.Stream<LLM.Event, unknown>)) {
  191. queue.push(stream)
  192. },
  193. layer: Layer.succeed(
  194. LLM.Service,
  195. LLM.Service.of({
  196. stream: (input) => {
  197. const item = queue.shift() ?? Stream.empty
  198. const stream = typeof item === "function" ? item(input) : item
  199. return stream.pipe(Stream.mapEffect((event) => Effect.succeed(event)))
  200. },
  201. }),
  202. ),
  203. }
  204. }
  205. function liveRuntime(layer: Layer.Layer<LLM.Service>, provider = ProviderTest.fake()) {
  206. const bus = Bus.layer
  207. const status = SessionStatus.layer.pipe(Layer.provide(bus))
  208. const processor = SessionProcessorModule.SessionProcessor.layer.pipe(Layer.provide(summary))
  209. return ManagedRuntime.make(
  210. Layer.mergeAll(SessionCompaction.layer.pipe(Layer.provide(processor)), processor, bus, status).pipe(
  211. Layer.provide(provider.layer),
  212. Layer.provide(SessionNs.defaultLayer),
  213. Layer.provide(Snapshot.defaultLayer),
  214. Layer.provide(layer),
  215. Layer.provide(Permission.defaultLayer),
  216. Layer.provide(Agent.defaultLayer),
  217. Layer.provide(Plugin.defaultLayer),
  218. Layer.provide(status),
  219. Layer.provide(bus),
  220. Layer.provide(Config.defaultLayer),
  221. ),
  222. )
  223. }
  224. function wait(ms = 50) {
  225. return new Promise((resolve) => setTimeout(resolve, ms))
  226. }
  227. function defer() {
  228. let resolve!: () => void
  229. const promise = new Promise<void>((done) => {
  230. resolve = done
  231. })
  232. return { promise, resolve }
  233. }
  234. function plugin(ready: ReturnType<typeof defer>) {
  235. return Layer.mock(Plugin.Service)({
  236. trigger: <Name extends string, Input, Output>(name: Name, _input: Input, output: Output) => {
  237. if (name !== "experimental.session.compacting") return Effect.succeed(output)
  238. return Effect.sync(() => ready.resolve()).pipe(Effect.andThen(Effect.never), Effect.as(output))
  239. },
  240. list: () => Effect.succeed([]),
  241. init: () => Effect.void,
  242. })
  243. }
  244. function autocontinue(enabled: boolean) {
  245. return Layer.mock(Plugin.Service)({
  246. trigger: <Name extends string, Input, Output>(name: Name, _input: Input, output: Output) => {
  247. if (name !== "experimental.compaction.autocontinue") return Effect.succeed(output)
  248. return Effect.sync(() => {
  249. ;(output as { enabled: boolean }).enabled = enabled
  250. return output
  251. })
  252. },
  253. list: () => Effect.succeed([]),
  254. init: () => Effect.void,
  255. })
  256. }
  257. describe("session.compaction.isOverflow", () => {
  258. it.live(
  259. "returns true when token count exceeds usable context",
  260. provideTmpdirInstance(() =>
  261. Effect.gen(function* () {
  262. const compact = yield* SessionCompaction.Service
  263. const model = createModel({ context: 100_000, output: 32_000 })
  264. const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } }
  265. expect(yield* compact.isOverflow({ tokens, model })).toBe(true)
  266. }),
  267. ),
  268. )
  269. it.live(
  270. "returns false when token count within usable context",
  271. provideTmpdirInstance(() =>
  272. Effect.gen(function* () {
  273. const compact = yield* SessionCompaction.Service
  274. const model = createModel({ context: 200_000, output: 32_000 })
  275. const tokens = { input: 100_000, output: 10_000, reasoning: 0, cache: { read: 0, write: 0 } }
  276. expect(yield* compact.isOverflow({ tokens, model })).toBe(false)
  277. }),
  278. ),
  279. )
  280. it.live(
  281. "includes cache.read in token count",
  282. provideTmpdirInstance(() =>
  283. Effect.gen(function* () {
  284. const compact = yield* SessionCompaction.Service
  285. const model = createModel({ context: 100_000, output: 32_000 })
  286. const tokens = { input: 60_000, output: 10_000, reasoning: 0, cache: { read: 10_000, write: 0 } }
  287. expect(yield* compact.isOverflow({ tokens, model })).toBe(true)
  288. }),
  289. ),
  290. )
  291. it.live(
  292. "respects input limit for input caps",
  293. provideTmpdirInstance(() =>
  294. Effect.gen(function* () {
  295. const compact = yield* SessionCompaction.Service
  296. const model = createModel({ context: 400_000, input: 272_000, output: 128_000 })
  297. const tokens = { input: 271_000, output: 1_000, reasoning: 0, cache: { read: 2_000, write: 0 } }
  298. expect(yield* compact.isOverflow({ tokens, model })).toBe(true)
  299. }),
  300. ),
  301. )
  302. it.live(
  303. "returns false when input/output are within input caps",
  304. provideTmpdirInstance(() =>
  305. Effect.gen(function* () {
  306. const compact = yield* SessionCompaction.Service
  307. const model = createModel({ context: 400_000, input: 272_000, output: 128_000 })
  308. const tokens = { input: 200_000, output: 20_000, reasoning: 0, cache: { read: 10_000, write: 0 } }
  309. expect(yield* compact.isOverflow({ tokens, model })).toBe(false)
  310. }),
  311. ),
  312. )
  313. it.live(
  314. "returns false when output within limit with input caps",
  315. provideTmpdirInstance(() =>
  316. Effect.gen(function* () {
  317. const compact = yield* SessionCompaction.Service
  318. const model = createModel({ context: 200_000, input: 120_000, output: 10_000 })
  319. const tokens = { input: 50_000, output: 9_999, reasoning: 0, cache: { read: 0, write: 0 } }
  320. expect(yield* compact.isOverflow({ tokens, model })).toBe(false)
  321. }),
  322. ),
  323. )
  324. // ─── Bug reproduction tests ───────────────────────────────────────────
  325. // These tests demonstrate that when limit.input is set, isOverflow()
  326. // does not subtract any headroom for the next model response. This means
  327. // compaction only triggers AFTER we've already consumed the full input
  328. // budget, leaving zero room for the next API call's output tokens.
  329. //
  330. // Compare: without limit.input, usable = context - output (reserves space).
  331. // With limit.input, usable = limit.input (reserves nothing).
  332. //
  333. // Related issues: #10634, #8089, #11086, #12621
  334. // Open PRs: #6875, #12924
  335. it.live(
  336. "BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not",
  337. provideTmpdirInstance(() =>
  338. Effect.gen(function* () {
  339. const compact = yield* SessionCompaction.Service
  340. // Simulate Claude with prompt caching: input limit = 200K, output limit = 32K
  341. const model = createModel({ context: 200_000, input: 200_000, output: 32_000 })
  342. // We've used 198K tokens total. Only 2K under the input limit.
  343. // On the next turn, the full conversation (198K) becomes input,
  344. // plus the model needs room to generate output — this WILL overflow.
  345. const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } }
  346. // count = 180K + 3K + 15K = 198K
  347. // usable = limit.input = 200K (no output subtracted!)
  348. // 198K > 200K = false → no compaction triggered
  349. // WITHOUT limit.input: usable = 200K - 32K = 168K, and 198K > 168K = true ✓
  350. // WITH limit.input: usable = 200K, and 198K > 200K = false ✗
  351. // With 198K used and only 2K headroom, the next turn will overflow.
  352. // Compaction MUST trigger here.
  353. expect(yield* compact.isOverflow({ tokens, model })).toBe(true)
  354. }),
  355. ),
  356. )
  357. it.live(
  358. "BUG: without limit.input, same token count correctly triggers compaction",
  359. provideTmpdirInstance(() =>
  360. Effect.gen(function* () {
  361. const compact = yield* SessionCompaction.Service
  362. // Same model but without limit.input — uses context - output instead
  363. const model = createModel({ context: 200_000, output: 32_000 })
  364. // Same token usage as above
  365. const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } }
  366. // count = 198K
  367. // usable = context - output = 200K - 32K = 168K
  368. // 198K > 168K = true → compaction correctly triggered
  369. const result = yield* compact.isOverflow({ tokens, model })
  370. expect(result).toBe(true) // ← Correct: headroom is reserved
  371. }),
  372. ),
  373. )
  374. it.live(
  375. "BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it",
  376. provideTmpdirInstance(() =>
  377. Effect.gen(function* () {
  378. const compact = yield* SessionCompaction.Service
  379. // Two models with identical context/output limits, differing only in limit.input
  380. const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
  381. const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
  382. // 170K total tokens — well above context-output (168K) but below input limit (200K)
  383. const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
  384. const withLimit = yield* compact.isOverflow({ tokens, model: withInputLimit })
  385. const withoutLimit = yield* compact.isOverflow({ tokens, model: withoutInputLimit })
  386. // Both models have identical real capacity — they should agree:
  387. expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output)
  388. expect(withoutLimit).toBe(true) // correctly compacts (170K > 168K)
  389. }),
  390. ),
  391. )
  392. it.live(
  393. "returns false when model context limit is 0",
  394. provideTmpdirInstance(() =>
  395. Effect.gen(function* () {
  396. const compact = yield* SessionCompaction.Service
  397. const model = createModel({ context: 0, output: 32_000 })
  398. const tokens = { input: 100_000, output: 10_000, reasoning: 0, cache: { read: 0, write: 0 } }
  399. expect(yield* compact.isOverflow({ tokens, model })).toBe(false)
  400. }),
  401. ),
  402. )
  403. it.live(
  404. "returns false when compaction.auto is disabled",
  405. provideTmpdirInstance(
  406. () =>
  407. Effect.gen(function* () {
  408. const compact = yield* SessionCompaction.Service
  409. const model = createModel({ context: 100_000, output: 32_000 })
  410. const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } }
  411. expect(yield* compact.isOverflow({ tokens, model })).toBe(false)
  412. }),
  413. {
  414. config: {
  415. compaction: { auto: false },
  416. },
  417. },
  418. ),
  419. )
  420. })
  421. describe("session.compaction.create", () => {
  422. it.live(
  423. "creates a compaction user message and part",
  424. provideTmpdirInstance(() =>
  425. Effect.gen(function* () {
  426. const compact = yield* SessionCompaction.Service
  427. const ssn = yield* SessionNs.Service
  428. const info = yield* ssn.create({})
  429. yield* compact.create({
  430. sessionID: info.id,
  431. agent: "build",
  432. model: ref,
  433. auto: true,
  434. overflow: true,
  435. })
  436. const msgs = yield* ssn.messages({ sessionID: info.id })
  437. expect(msgs).toHaveLength(1)
  438. expect(msgs[0].info.role).toBe("user")
  439. expect(msgs[0].parts).toHaveLength(1)
  440. expect(msgs[0].parts[0]).toMatchObject({
  441. type: "compaction",
  442. auto: true,
  443. overflow: true,
  444. })
  445. }),
  446. ),
  447. )
  448. })
  449. describe("session.compaction.prune", () => {
  450. it.live(
  451. "compacts old completed tool output",
  452. provideTmpdirInstance((dir) =>
  453. Effect.gen(function* () {
  454. const compact = yield* SessionCompaction.Service
  455. const ssn = yield* SessionNs.Service
  456. const info = yield* ssn.create({})
  457. const a = yield* ssn.updateMessage({
  458. id: MessageID.ascending(),
  459. role: "user",
  460. sessionID: info.id,
  461. agent: "build",
  462. model: ref,
  463. time: { created: Date.now() },
  464. })
  465. yield* ssn.updatePart({
  466. id: PartID.ascending(),
  467. messageID: a.id,
  468. sessionID: info.id,
  469. type: "text",
  470. text: "first",
  471. })
  472. const b: MessageV2.Assistant = {
  473. id: MessageID.ascending(),
  474. role: "assistant",
  475. sessionID: info.id,
  476. mode: "build",
  477. agent: "build",
  478. path: { cwd: dir, root: dir },
  479. cost: 0,
  480. tokens: {
  481. output: 0,
  482. input: 0,
  483. reasoning: 0,
  484. cache: { read: 0, write: 0 },
  485. },
  486. modelID: ref.modelID,
  487. providerID: ref.providerID,
  488. parentID: a.id,
  489. time: { created: Date.now() },
  490. finish: "end_turn",
  491. }
  492. yield* ssn.updateMessage(b)
  493. yield* ssn.updatePart({
  494. id: PartID.ascending(),
  495. messageID: b.id,
  496. sessionID: info.id,
  497. type: "tool",
  498. callID: crypto.randomUUID(),
  499. tool: "bash",
  500. state: {
  501. status: "completed",
  502. input: {},
  503. output: "x".repeat(200_000),
  504. title: "done",
  505. metadata: {},
  506. time: { start: Date.now(), end: Date.now() },
  507. },
  508. })
  509. for (const text of ["second", "third"]) {
  510. const msg = yield* ssn.updateMessage({
  511. id: MessageID.ascending(),
  512. role: "user",
  513. sessionID: info.id,
  514. agent: "build",
  515. model: ref,
  516. time: { created: Date.now() },
  517. })
  518. yield* ssn.updatePart({
  519. id: PartID.ascending(),
  520. messageID: msg.id,
  521. sessionID: info.id,
  522. type: "text",
  523. text,
  524. })
  525. }
  526. yield* compact.prune({ sessionID: info.id })
  527. const msgs = yield* ssn.messages({ sessionID: info.id })
  528. const part = msgs.flatMap((msg) => msg.parts).find((part) => part.type === "tool")
  529. expect(part?.type).toBe("tool")
  530. expect(part?.state.status).toBe("completed")
  531. if (part?.type === "tool" && part.state.status === "completed") {
  532. expect(part.state.time.compacted).toBeNumber()
  533. }
  534. }),
  535. ),
  536. )
  537. it.live(
  538. "skips protected skill tool output",
  539. provideTmpdirInstance((dir) =>
  540. Effect.gen(function* () {
  541. const compact = yield* SessionCompaction.Service
  542. const ssn = yield* SessionNs.Service
  543. const info = yield* ssn.create({})
  544. const a = yield* ssn.updateMessage({
  545. id: MessageID.ascending(),
  546. role: "user",
  547. sessionID: info.id,
  548. agent: "build",
  549. model: ref,
  550. time: { created: Date.now() },
  551. })
  552. yield* ssn.updatePart({
  553. id: PartID.ascending(),
  554. messageID: a.id,
  555. sessionID: info.id,
  556. type: "text",
  557. text: "first",
  558. })
  559. const b: MessageV2.Assistant = {
  560. id: MessageID.ascending(),
  561. role: "assistant",
  562. sessionID: info.id,
  563. mode: "build",
  564. agent: "build",
  565. path: { cwd: dir, root: dir },
  566. cost: 0,
  567. tokens: {
  568. output: 0,
  569. input: 0,
  570. reasoning: 0,
  571. cache: { read: 0, write: 0 },
  572. },
  573. modelID: ref.modelID,
  574. providerID: ref.providerID,
  575. parentID: a.id,
  576. time: { created: Date.now() },
  577. finish: "end_turn",
  578. }
  579. yield* ssn.updateMessage(b)
  580. yield* ssn.updatePart({
  581. id: PartID.ascending(),
  582. messageID: b.id,
  583. sessionID: info.id,
  584. type: "tool",
  585. callID: crypto.randomUUID(),
  586. tool: "skill",
  587. state: {
  588. status: "completed",
  589. input: {},
  590. output: "x".repeat(200_000),
  591. title: "done",
  592. metadata: {},
  593. time: { start: Date.now(), end: Date.now() },
  594. },
  595. })
  596. for (const text of ["second", "third"]) {
  597. const msg = yield* ssn.updateMessage({
  598. id: MessageID.ascending(),
  599. role: "user",
  600. sessionID: info.id,
  601. agent: "build",
  602. model: ref,
  603. time: { created: Date.now() },
  604. })
  605. yield* ssn.updatePart({
  606. id: PartID.ascending(),
  607. messageID: msg.id,
  608. sessionID: info.id,
  609. type: "text",
  610. text,
  611. })
  612. }
  613. yield* compact.prune({ sessionID: info.id })
  614. const msgs = yield* ssn.messages({ sessionID: info.id })
  615. const part = msgs.flatMap((msg) => msg.parts).find((part) => part.type === "tool")
  616. expect(part?.type).toBe("tool")
  617. if (part?.type === "tool" && part.state.status === "completed") {
  618. expect(part.state.time.compacted).toBeUndefined()
  619. }
  620. }),
  621. ),
  622. )
  623. })
  624. describe("session.compaction.process", () => {
  625. test("throws when parent is not a user message", async () => {
  626. await using tmp = await tmpdir()
  627. await Instance.provide({
  628. directory: tmp.path,
  629. fn: async () => {
  630. const session = await svc.create({})
  631. const msg = await user(session.id, "hello")
  632. const reply = await assistant(session.id, msg.id, tmp.path)
  633. const rt = runtime("continue")
  634. try {
  635. const msgs = await svc.messages({ sessionID: session.id })
  636. await expect(
  637. rt.runPromise(
  638. SessionCompaction.Service.use((svc) =>
  639. svc.process({
  640. parentID: reply.id,
  641. messages: msgs,
  642. sessionID: session.id,
  643. auto: false,
  644. }),
  645. ),
  646. ),
  647. ).rejects.toThrow(`Compaction parent must be a user message: ${reply.id}`)
  648. } finally {
  649. await rt.dispose()
  650. }
  651. },
  652. })
  653. })
  654. test("publishes compacted event on continue", async () => {
  655. await using tmp = await tmpdir()
  656. await Instance.provide({
  657. directory: tmp.path,
  658. fn: async () => {
  659. const session = await svc.create({})
  660. const msg = await user(session.id, "hello")
  661. const msgs = await svc.messages({ sessionID: session.id })
  662. const done = defer()
  663. let seen = false
  664. const rt = runtime("continue", Plugin.defaultLayer, wide())
  665. let unsub: (() => void) | undefined
  666. try {
  667. unsub = await rt.runPromise(
  668. Bus.Service.use((svc) =>
  669. svc.subscribeCallback(SessionCompaction.Event.Compacted, (evt) => {
  670. if (evt.properties.sessionID !== session.id) return
  671. seen = true
  672. done.resolve()
  673. }),
  674. ),
  675. )
  676. const result = await rt.runPromise(
  677. SessionCompaction.Service.use((svc) =>
  678. svc.process({
  679. parentID: msg.id,
  680. messages: msgs,
  681. sessionID: session.id,
  682. auto: false,
  683. }),
  684. ),
  685. )
  686. await Promise.race([
  687. done.promise,
  688. wait(500).then(() => {
  689. throw new Error("timed out waiting for compacted event")
  690. }),
  691. ])
  692. expect(result).toBe("continue")
  693. expect(seen).toBe(true)
  694. } finally {
  695. unsub?.()
  696. await rt.dispose()
  697. }
  698. },
  699. })
  700. })
  701. test("marks summary message as errored on compact result", async () => {
  702. await using tmp = await tmpdir()
  703. await Instance.provide({
  704. directory: tmp.path,
  705. fn: async () => {
  706. const session = await svc.create({})
  707. const msg = await user(session.id, "hello")
  708. const rt = runtime("compact", Plugin.defaultLayer, wide())
  709. try {
  710. const msgs = await svc.messages({ sessionID: session.id })
  711. const result = await rt.runPromise(
  712. SessionCompaction.Service.use((svc) =>
  713. svc.process({
  714. parentID: msg.id,
  715. messages: msgs,
  716. sessionID: session.id,
  717. auto: false,
  718. }),
  719. ),
  720. )
  721. const summary = (await svc.messages({ sessionID: session.id })).find(
  722. (msg) => msg.info.role === "assistant" && msg.info.summary,
  723. )
  724. expect(result).toBe("stop")
  725. expect(summary?.info.role).toBe("assistant")
  726. if (summary?.info.role === "assistant") {
  727. expect(summary.info.finish).toBe("error")
  728. expect(JSON.stringify(summary.info.error)).toContain("Session too large to compact")
  729. }
  730. } finally {
  731. await rt.dispose()
  732. }
  733. },
  734. })
  735. })
  736. test("adds synthetic continue prompt when auto is enabled", async () => {
  737. await using tmp = await tmpdir()
  738. await Instance.provide({
  739. directory: tmp.path,
  740. fn: async () => {
  741. const session = await svc.create({})
  742. const msg = await user(session.id, "hello")
  743. const rt = runtime("continue", Plugin.defaultLayer, wide())
  744. try {
  745. const msgs = await svc.messages({ sessionID: session.id })
  746. const result = await rt.runPromise(
  747. SessionCompaction.Service.use((svc) =>
  748. svc.process({
  749. parentID: msg.id,
  750. messages: msgs,
  751. sessionID: session.id,
  752. auto: true,
  753. }),
  754. ),
  755. )
  756. const all = await svc.messages({ sessionID: session.id })
  757. const last = all.at(-1)
  758. expect(result).toBe("continue")
  759. expect(last?.info.role).toBe("user")
  760. expect(last?.parts[0]).toMatchObject({
  761. type: "text",
  762. synthetic: true,
  763. metadata: { compaction_continue: true },
  764. })
  765. if (last?.parts[0]?.type === "text") {
  766. expect(last.parts[0].text).toContain("Continue if you have next steps")
  767. }
  768. } finally {
  769. await rt.dispose()
  770. }
  771. },
  772. })
  773. })
  774. test("allows plugins to disable synthetic continue prompt", async () => {
  775. await using tmp = await tmpdir()
  776. await Instance.provide({
  777. directory: tmp.path,
  778. fn: async () => {
  779. const session = await svc.create({})
  780. const msg = await user(session.id, "hello")
  781. const rt = runtime("continue", autocontinue(false), wide())
  782. try {
  783. const msgs = await svc.messages({ sessionID: session.id })
  784. const result = await rt.runPromise(
  785. SessionCompaction.Service.use((svc) =>
  786. svc.process({
  787. parentID: msg.id,
  788. messages: msgs,
  789. sessionID: session.id,
  790. auto: true,
  791. }),
  792. ),
  793. )
  794. const all = await svc.messages({ sessionID: session.id })
  795. const last = all.at(-1)
  796. expect(result).toBe("continue")
  797. expect(last?.info.role).toBe("assistant")
  798. expect(
  799. all.some(
  800. (msg) =>
  801. msg.info.role === "user" &&
  802. msg.parts.some(
  803. (part) =>
  804. part.type === "text" && part.synthetic && part.text.includes("Continue if you have next steps"),
  805. ),
  806. ),
  807. ).toBe(false)
  808. } finally {
  809. await rt.dispose()
  810. }
  811. },
  812. })
  813. })
  814. test("replays the prior user turn on overflow when earlier context exists", async () => {
  815. await using tmp = await tmpdir()
  816. await Instance.provide({
  817. directory: tmp.path,
  818. fn: async () => {
  819. const session = await svc.create({})
  820. await user(session.id, "root")
  821. const replay = await user(session.id, "image")
  822. await svc.updatePart({
  823. id: PartID.ascending(),
  824. messageID: replay.id,
  825. sessionID: session.id,
  826. type: "file",
  827. mime: "image/png",
  828. filename: "cat.png",
  829. url: "https://example.com/cat.png",
  830. })
  831. const msg = await user(session.id, "current")
  832. const rt = runtime("continue", Plugin.defaultLayer, wide())
  833. try {
  834. const msgs = await svc.messages({ sessionID: session.id })
  835. const result = await rt.runPromise(
  836. SessionCompaction.Service.use((svc) =>
  837. svc.process({
  838. parentID: msg.id,
  839. messages: msgs,
  840. sessionID: session.id,
  841. auto: true,
  842. overflow: true,
  843. }),
  844. ),
  845. )
  846. const last = (await svc.messages({ sessionID: session.id })).at(-1)
  847. expect(result).toBe("continue")
  848. expect(last?.info.role).toBe("user")
  849. expect(last?.parts.some((part) => part.type === "file")).toBe(false)
  850. expect(
  851. last?.parts.some((part) => part.type === "text" && part.text.includes("Attached image/png: cat.png")),
  852. ).toBe(true)
  853. } finally {
  854. await rt.dispose()
  855. }
  856. },
  857. })
  858. })
  859. test("falls back to overflow guidance when no replayable turn exists", async () => {
  860. await using tmp = await tmpdir()
  861. await Instance.provide({
  862. directory: tmp.path,
  863. fn: async () => {
  864. const session = await svc.create({})
  865. await user(session.id, "earlier")
  866. const msg = await user(session.id, "current")
  867. const rt = runtime("continue", Plugin.defaultLayer, wide())
  868. try {
  869. const msgs = await svc.messages({ sessionID: session.id })
  870. const result = await rt.runPromise(
  871. SessionCompaction.Service.use((svc) =>
  872. svc.process({
  873. parentID: msg.id,
  874. messages: msgs,
  875. sessionID: session.id,
  876. auto: true,
  877. overflow: true,
  878. }),
  879. ),
  880. )
  881. const last = (await svc.messages({ sessionID: session.id })).at(-1)
  882. expect(result).toBe("continue")
  883. expect(last?.info.role).toBe("user")
  884. if (last?.parts[0]?.type === "text") {
  885. expect(last.parts[0].text).toContain("previous request exceeded the provider's size limit")
  886. }
  887. } finally {
  888. await rt.dispose()
  889. }
  890. },
  891. })
  892. })
  893. test("stops quickly when aborted during retry backoff", async () => {
  894. const stub = llm()
  895. const ready = defer()
  896. stub.push(
  897. Stream.fromAsyncIterable(
  898. {
  899. async *[Symbol.asyncIterator]() {
  900. yield { type: "start" } as LLM.Event
  901. throw new APICallError({
  902. message: "boom",
  903. url: "https://example.com/v1/chat/completions",
  904. requestBodyValues: {},
  905. statusCode: 503,
  906. responseHeaders: { "retry-after-ms": "10000" },
  907. responseBody: '{"error":"boom"}',
  908. isRetryable: true,
  909. })
  910. },
  911. },
  912. (err) => err,
  913. ),
  914. )
  915. await using tmp = await tmpdir({ git: true })
  916. await Instance.provide({
  917. directory: tmp.path,
  918. fn: async () => {
  919. const session = await svc.create({})
  920. const msg = await user(session.id, "hello")
  921. const msgs = await svc.messages({ sessionID: session.id })
  922. const abort = new AbortController()
  923. const rt = liveRuntime(stub.layer, wide())
  924. let off: (() => void) | undefined
  925. let run: Promise<"continue" | "stop"> | undefined
  926. try {
  927. off = await rt.runPromise(
  928. Bus.Service.use((svc) =>
  929. svc.subscribeCallback(SessionStatus.Event.Status, (evt) => {
  930. if (evt.properties.sessionID !== session.id) return
  931. if (evt.properties.status.type !== "retry") return
  932. ready.resolve()
  933. }),
  934. ),
  935. )
  936. run = rt
  937. .runPromiseExit(
  938. SessionCompaction.Service.use((svc) =>
  939. svc.process({
  940. parentID: msg.id,
  941. messages: msgs,
  942. sessionID: session.id,
  943. auto: false,
  944. }),
  945. ),
  946. { signal: abort.signal },
  947. )
  948. .then((exit) => {
  949. if (Exit.isFailure(exit)) {
  950. if (Cause.hasInterrupts(exit.cause) && abort.signal.aborted) return "stop"
  951. throw Cause.squash(exit.cause)
  952. }
  953. return exit.value
  954. })
  955. await Promise.race([
  956. ready.promise,
  957. wait(1000).then(() => {
  958. throw new Error("timed out waiting for retry status")
  959. }),
  960. ])
  961. const start = Date.now()
  962. abort.abort()
  963. const result = await Promise.race([
  964. run.then((value) => ({ kind: "done" as const, value, ms: Date.now() - start })),
  965. wait(250).then(() => ({ kind: "timeout" as const })),
  966. ])
  967. expect(result.kind).toBe("done")
  968. if (result.kind === "done") {
  969. expect(result.value).toBe("stop")
  970. expect(result.ms).toBeLessThan(250)
  971. }
  972. } finally {
  973. off?.()
  974. abort.abort()
  975. await rt.dispose()
  976. await run?.catch(() => undefined)
  977. }
  978. },
  979. })
  980. })
  981. test("does not leave a summary assistant when aborted before processor setup", async () => {
  982. const ready = defer()
  983. await using tmp = await tmpdir({ git: true })
  984. await Instance.provide({
  985. directory: tmp.path,
  986. fn: async () => {
  987. const session = await svc.create({})
  988. const msg = await user(session.id, "hello")
  989. const msgs = await svc.messages({ sessionID: session.id })
  990. const abort = new AbortController()
  991. const rt = runtime("continue", plugin(ready), wide())
  992. let run: Promise<"continue" | "stop"> | undefined
  993. try {
  994. run = rt
  995. .runPromiseExit(
  996. SessionCompaction.Service.use((svc) =>
  997. svc.process({
  998. parentID: msg.id,
  999. messages: msgs,
  1000. sessionID: session.id,
  1001. auto: false,
  1002. }),
  1003. ),
  1004. { signal: abort.signal },
  1005. )
  1006. .then((exit) => {
  1007. if (Exit.isFailure(exit)) {
  1008. if (Cause.hasInterrupts(exit.cause) && abort.signal.aborted) return "stop"
  1009. throw Cause.squash(exit.cause)
  1010. }
  1011. return exit.value
  1012. })
  1013. await Promise.race([
  1014. ready.promise,
  1015. wait(1000).then(() => {
  1016. throw new Error("timed out waiting for compaction hook")
  1017. }),
  1018. ])
  1019. abort.abort()
  1020. expect(await run).toBe("stop")
  1021. const all = await svc.messages({ sessionID: session.id })
  1022. expect(all.some((msg) => msg.info.role === "assistant" && msg.info.summary)).toBe(false)
  1023. } finally {
  1024. abort.abort()
  1025. await rt.dispose()
  1026. await run?.catch(() => undefined)
  1027. }
  1028. },
  1029. })
  1030. })
  1031. test("does not allow tool calls while generating the summary", async () => {
  1032. const stub = llm()
  1033. stub.push(
  1034. Stream.make(
  1035. { type: "start" } satisfies LLM.Event,
  1036. { type: "tool-input-start", id: "call-1", toolName: "_noop" } satisfies LLM.Event,
  1037. { type: "tool-call", toolCallId: "call-1", toolName: "_noop", input: {} } satisfies LLM.Event,
  1038. {
  1039. type: "finish-step",
  1040. finishReason: "tool-calls",
  1041. rawFinishReason: "tool_calls",
  1042. response: { id: "res", modelId: "test-model", timestamp: new Date() },
  1043. providerMetadata: undefined,
  1044. usage: {
  1045. inputTokens: 1,
  1046. outputTokens: 1,
  1047. totalTokens: 2,
  1048. inputTokenDetails: {
  1049. noCacheTokens: undefined,
  1050. cacheReadTokens: undefined,
  1051. cacheWriteTokens: undefined,
  1052. },
  1053. outputTokenDetails: {
  1054. textTokens: undefined,
  1055. reasoningTokens: undefined,
  1056. },
  1057. },
  1058. } satisfies LLM.Event,
  1059. {
  1060. type: "finish",
  1061. finishReason: "tool-calls",
  1062. rawFinishReason: "tool_calls",
  1063. totalUsage: {
  1064. inputTokens: 1,
  1065. outputTokens: 1,
  1066. totalTokens: 2,
  1067. inputTokenDetails: {
  1068. noCacheTokens: undefined,
  1069. cacheReadTokens: undefined,
  1070. cacheWriteTokens: undefined,
  1071. },
  1072. outputTokenDetails: {
  1073. textTokens: undefined,
  1074. reasoningTokens: undefined,
  1075. },
  1076. },
  1077. } satisfies LLM.Event,
  1078. ),
  1079. )
  1080. await using tmp = await tmpdir({ git: true })
  1081. await Instance.provide({
  1082. directory: tmp.path,
  1083. fn: async () => {
  1084. const session = await svc.create({})
  1085. const msg = await user(session.id, "hello")
  1086. const rt = liveRuntime(stub.layer, wide())
  1087. try {
  1088. const msgs = await svc.messages({ sessionID: session.id })
  1089. await rt.runPromise(
  1090. SessionCompaction.Service.use((svc) =>
  1091. svc.process({
  1092. parentID: msg.id,
  1093. messages: msgs,
  1094. sessionID: session.id,
  1095. auto: false,
  1096. }),
  1097. ),
  1098. )
  1099. const summary = (await svc.messages({ sessionID: session.id })).find(
  1100. (item) => item.info.role === "assistant" && item.info.summary,
  1101. )
  1102. expect(summary?.info.role).toBe("assistant")
  1103. expect(summary?.parts.some((part) => part.type === "tool")).toBe(false)
  1104. } finally {
  1105. await rt.dispose()
  1106. }
  1107. },
  1108. })
  1109. })
  1110. })
  1111. describe("util.token.estimate", () => {
  1112. test("estimates tokens from text (4 chars per token)", () => {
  1113. const text = "x".repeat(4000)
  1114. expect(Token.estimate(text)).toBe(1000)
  1115. })
  1116. test("estimates tokens from larger text", () => {
  1117. const text = "y".repeat(20_000)
  1118. expect(Token.estimate(text)).toBe(5000)
  1119. })
  1120. test("returns 0 for empty string", () => {
  1121. expect(Token.estimate("")).toBe(0)
  1122. })
  1123. })
  1124. describe("SessionNs.getUsage", () => {
  1125. test("normalizes standard usage to token format", () => {
  1126. const model = createModel({ context: 100_000, output: 32_000 })
  1127. const result = SessionNs.getUsage({
  1128. model,
  1129. usage: {
  1130. inputTokens: 1000,
  1131. outputTokens: 500,
  1132. totalTokens: 1500,
  1133. inputTokenDetails: {
  1134. noCacheTokens: undefined,
  1135. cacheReadTokens: undefined,
  1136. cacheWriteTokens: undefined,
  1137. },
  1138. outputTokenDetails: {
  1139. textTokens: undefined,
  1140. reasoningTokens: undefined,
  1141. },
  1142. },
  1143. })
  1144. expect(result.tokens.input).toBe(1000)
  1145. expect(result.tokens.output).toBe(500)
  1146. expect(result.tokens.reasoning).toBe(0)
  1147. expect(result.tokens.cache.read).toBe(0)
  1148. expect(result.tokens.cache.write).toBe(0)
  1149. })
  1150. test("extracts cached tokens to cache.read", () => {
  1151. const model = createModel({ context: 100_000, output: 32_000 })
  1152. const result = SessionNs.getUsage({
  1153. model,
  1154. usage: {
  1155. inputTokens: 1000,
  1156. outputTokens: 500,
  1157. totalTokens: 1500,
  1158. inputTokenDetails: {
  1159. noCacheTokens: 800,
  1160. cacheReadTokens: 200,
  1161. cacheWriteTokens: undefined,
  1162. },
  1163. outputTokenDetails: {
  1164. textTokens: undefined,
  1165. reasoningTokens: undefined,
  1166. },
  1167. },
  1168. })
  1169. expect(result.tokens.input).toBe(800)
  1170. expect(result.tokens.cache.read).toBe(200)
  1171. })
  1172. test("handles anthropic cache write metadata", () => {
  1173. const model = createModel({ context: 100_000, output: 32_000 })
  1174. const result = SessionNs.getUsage({
  1175. model,
  1176. usage: {
  1177. inputTokens: 1000,
  1178. outputTokens: 500,
  1179. totalTokens: 1500,
  1180. inputTokenDetails: {
  1181. noCacheTokens: undefined,
  1182. cacheReadTokens: undefined,
  1183. cacheWriteTokens: undefined,
  1184. },
  1185. outputTokenDetails: {
  1186. textTokens: undefined,
  1187. reasoningTokens: undefined,
  1188. },
  1189. },
  1190. metadata: {
  1191. anthropic: {
  1192. cacheCreationInputTokens: 300,
  1193. },
  1194. },
  1195. })
  1196. expect(result.tokens.cache.write).toBe(300)
  1197. })
  1198. test("subtracts cached tokens for anthropic provider", () => {
  1199. const model = createModel({ context: 100_000, output: 32_000 })
  1200. // AI SDK v6 normalizes inputTokens to include cached tokens for all providers
  1201. const result = SessionNs.getUsage({
  1202. model,
  1203. usage: {
  1204. inputTokens: 1000,
  1205. outputTokens: 500,
  1206. totalTokens: 1500,
  1207. inputTokenDetails: {
  1208. noCacheTokens: 800,
  1209. cacheReadTokens: 200,
  1210. cacheWriteTokens: undefined,
  1211. },
  1212. outputTokenDetails: {
  1213. textTokens: undefined,
  1214. reasoningTokens: undefined,
  1215. },
  1216. },
  1217. metadata: {
  1218. anthropic: {},
  1219. },
  1220. })
  1221. expect(result.tokens.input).toBe(800)
  1222. expect(result.tokens.cache.read).toBe(200)
  1223. })
  1224. test("separates reasoning tokens from output tokens", () => {
  1225. const model = createModel({ context: 100_000, output: 32_000 })
  1226. const result = SessionNs.getUsage({
  1227. model,
  1228. usage: {
  1229. inputTokens: 1000,
  1230. outputTokens: 500,
  1231. totalTokens: 1500,
  1232. inputTokenDetails: {
  1233. noCacheTokens: undefined,
  1234. cacheReadTokens: undefined,
  1235. cacheWriteTokens: undefined,
  1236. },
  1237. outputTokenDetails: {
  1238. textTokens: 400,
  1239. reasoningTokens: 100,
  1240. },
  1241. },
  1242. })
  1243. expect(result.tokens.input).toBe(1000)
  1244. expect(result.tokens.output).toBe(400)
  1245. expect(result.tokens.reasoning).toBe(100)
  1246. expect(result.tokens.total).toBe(1500)
  1247. })
  1248. test("does not double count reasoning tokens in cost", () => {
  1249. const model = createModel({
  1250. context: 100_000,
  1251. output: 32_000,
  1252. cost: {
  1253. input: 0,
  1254. output: 15,
  1255. cache: { read: 0, write: 0 },
  1256. },
  1257. })
  1258. const result = SessionNs.getUsage({
  1259. model,
  1260. usage: {
  1261. inputTokens: 0,
  1262. outputTokens: 1_000_000,
  1263. totalTokens: 1_000_000,
  1264. inputTokenDetails: {
  1265. noCacheTokens: undefined,
  1266. cacheReadTokens: undefined,
  1267. cacheWriteTokens: undefined,
  1268. },
  1269. outputTokenDetails: {
  1270. textTokens: 750_000,
  1271. reasoningTokens: 250_000,
  1272. },
  1273. },
  1274. })
  1275. expect(result.tokens.output).toBe(750_000)
  1276. expect(result.tokens.reasoning).toBe(250_000)
  1277. expect(result.cost).toBe(15)
  1278. })
  1279. test("handles undefined optional values gracefully", () => {
  1280. const model = createModel({ context: 100_000, output: 32_000 })
  1281. const result = SessionNs.getUsage({
  1282. model,
  1283. usage: {
  1284. inputTokens: 0,
  1285. outputTokens: 0,
  1286. totalTokens: 0,
  1287. inputTokenDetails: {
  1288. noCacheTokens: undefined,
  1289. cacheReadTokens: undefined,
  1290. cacheWriteTokens: undefined,
  1291. },
  1292. outputTokenDetails: {
  1293. textTokens: undefined,
  1294. reasoningTokens: undefined,
  1295. },
  1296. },
  1297. })
  1298. expect(result.tokens.input).toBe(0)
  1299. expect(result.tokens.output).toBe(0)
  1300. expect(result.tokens.reasoning).toBe(0)
  1301. expect(result.tokens.cache.read).toBe(0)
  1302. expect(result.tokens.cache.write).toBe(0)
  1303. expect(Number.isNaN(result.cost)).toBe(false)
  1304. })
  1305. test("calculates cost correctly", () => {
  1306. const model = createModel({
  1307. context: 100_000,
  1308. output: 32_000,
  1309. cost: {
  1310. input: 3,
  1311. output: 15,
  1312. cache: { read: 0.3, write: 3.75 },
  1313. },
  1314. })
  1315. const result = SessionNs.getUsage({
  1316. model,
  1317. usage: {
  1318. inputTokens: 1_000_000,
  1319. outputTokens: 100_000,
  1320. totalTokens: 1_100_000,
  1321. inputTokenDetails: {
  1322. noCacheTokens: undefined,
  1323. cacheReadTokens: undefined,
  1324. cacheWriteTokens: undefined,
  1325. },
  1326. outputTokenDetails: {
  1327. textTokens: undefined,
  1328. reasoningTokens: undefined,
  1329. },
  1330. },
  1331. })
  1332. expect(result.cost).toBe(3 + 1.5)
  1333. })
  1334. test.each(["@ai-sdk/anthropic", "@ai-sdk/amazon-bedrock", "@ai-sdk/google-vertex/anthropic"])(
  1335. "computes total from components for %s models",
  1336. (npm) => {
  1337. const model = createModel({ context: 100_000, output: 32_000, npm })
  1338. // AI SDK v6: inputTokens includes cached tokens for all providers
  1339. const usage = {
  1340. inputTokens: 1000,
  1341. outputTokens: 500,
  1342. totalTokens: 1500,
  1343. inputTokenDetails: {
  1344. noCacheTokens: 800,
  1345. cacheReadTokens: 200,
  1346. cacheWriteTokens: undefined,
  1347. },
  1348. outputTokenDetails: {
  1349. textTokens: undefined,
  1350. reasoningTokens: undefined,
  1351. },
  1352. }
  1353. if (npm === "@ai-sdk/amazon-bedrock") {
  1354. const result = SessionNs.getUsage({
  1355. model,
  1356. usage,
  1357. metadata: {
  1358. bedrock: {
  1359. usage: {
  1360. cacheWriteInputTokens: 300,
  1361. },
  1362. },
  1363. },
  1364. })
  1365. // inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
  1366. expect(result.tokens.input).toBe(500)
  1367. expect(result.tokens.cache.read).toBe(200)
  1368. expect(result.tokens.cache.write).toBe(300)
  1369. // total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
  1370. expect(result.tokens.total).toBe(1500)
  1371. return
  1372. }
  1373. const result = SessionNs.getUsage({
  1374. model,
  1375. usage,
  1376. metadata: {
  1377. anthropic: {
  1378. cacheCreationInputTokens: 300,
  1379. },
  1380. },
  1381. })
  1382. // inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
  1383. expect(result.tokens.input).toBe(500)
  1384. expect(result.tokens.cache.read).toBe(200)
  1385. expect(result.tokens.cache.write).toBe(300)
  1386. // total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
  1387. expect(result.tokens.total).toBe(1500)
  1388. },
  1389. )
  1390. test("extracts cache write tokens from vertex metadata key", () => {
  1391. const model = createModel({ context: 100_000, output: 32_000, npm: "@ai-sdk/google-vertex/anthropic" })
  1392. const result = SessionNs.getUsage({
  1393. model,
  1394. usage: {
  1395. inputTokens: 1000,
  1396. outputTokens: 500,
  1397. totalTokens: 1500,
  1398. inputTokenDetails: {
  1399. noCacheTokens: 800,
  1400. cacheReadTokens: 200,
  1401. cacheWriteTokens: undefined,
  1402. },
  1403. outputTokenDetails: {
  1404. textTokens: undefined,
  1405. reasoningTokens: undefined,
  1406. },
  1407. },
  1408. metadata: {
  1409. vertex: {
  1410. cacheCreationInputTokens: 300,
  1411. },
  1412. },
  1413. })
  1414. expect(result.tokens.input).toBe(500)
  1415. expect(result.tokens.cache.read).toBe(200)
  1416. expect(result.tokens.cache.write).toBe(300)
  1417. })
  1418. })