compaction.test.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. import { describe, expect, test } from "bun:test"
  2. import path from "path"
  3. import { SessionCompaction } from "../../src/session/compaction"
  4. import { Token } from "../../src/util/token"
  5. import { Instance } from "../../src/project/instance"
  6. import { Log } from "../../src/util/log"
  7. import { tmpdir } from "../fixture/fixture"
  8. import { Session } from "../../src/session"
  9. import type { Provider } from "../../src/provider/provider"
  10. Log.init({ print: false })
  11. function createModel(opts: {
  12. context: number
  13. output: number
  14. input?: number
  15. cost?: Provider.Model["cost"]
  16. npm?: string
  17. }): Provider.Model {
  18. return {
  19. id: "test-model",
  20. providerID: "test",
  21. name: "Test",
  22. limit: {
  23. context: opts.context,
  24. input: opts.input,
  25. output: opts.output,
  26. },
  27. cost: opts.cost ?? { input: 0, output: 0, cache: { read: 0, write: 0 } },
  28. capabilities: {
  29. toolcall: true,
  30. attachment: false,
  31. reasoning: false,
  32. temperature: true,
  33. input: { text: true, image: false, audio: false, video: false },
  34. output: { text: true, image: false, audio: false, video: false },
  35. },
  36. api: { npm: opts.npm ?? "@ai-sdk/anthropic" },
  37. options: {},
  38. } as Provider.Model
  39. }
  40. describe("session.compaction.isOverflow", () => {
  41. test("returns true when token count exceeds usable context", async () => {
  42. await using tmp = await tmpdir()
  43. await Instance.provide({
  44. directory: tmp.path,
  45. fn: async () => {
  46. const model = createModel({ context: 100_000, output: 32_000 })
  47. const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } }
  48. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
  49. },
  50. })
  51. })
  52. test("returns false when token count within usable context", async () => {
  53. await using tmp = await tmpdir()
  54. await Instance.provide({
  55. directory: tmp.path,
  56. fn: async () => {
  57. const model = createModel({ context: 200_000, output: 32_000 })
  58. const tokens = { input: 100_000, output: 10_000, reasoning: 0, cache: { read: 0, write: 0 } }
  59. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
  60. },
  61. })
  62. })
  63. test("includes cache.read in token count", async () => {
  64. await using tmp = await tmpdir()
  65. await Instance.provide({
  66. directory: tmp.path,
  67. fn: async () => {
  68. const model = createModel({ context: 100_000, output: 32_000 })
  69. const tokens = { input: 60_000, output: 10_000, reasoning: 0, cache: { read: 10_000, write: 0 } }
  70. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
  71. },
  72. })
  73. })
  74. test("respects input limit for input caps", async () => {
  75. await using tmp = await tmpdir()
  76. await Instance.provide({
  77. directory: tmp.path,
  78. fn: async () => {
  79. const model = createModel({ context: 400_000, input: 272_000, output: 128_000 })
  80. const tokens = { input: 271_000, output: 1_000, reasoning: 0, cache: { read: 2_000, write: 0 } }
  81. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
  82. },
  83. })
  84. })
  85. test("returns false when input/output are within input caps", async () => {
  86. await using tmp = await tmpdir()
  87. await Instance.provide({
  88. directory: tmp.path,
  89. fn: async () => {
  90. const model = createModel({ context: 400_000, input: 272_000, output: 128_000 })
  91. const tokens = { input: 200_000, output: 20_000, reasoning: 0, cache: { read: 10_000, write: 0 } }
  92. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
  93. },
  94. })
  95. })
  96. test("returns false when output within limit with input caps", async () => {
  97. await using tmp = await tmpdir()
  98. await Instance.provide({
  99. directory: tmp.path,
  100. fn: async () => {
  101. const model = createModel({ context: 200_000, input: 120_000, output: 10_000 })
  102. const tokens = { input: 50_000, output: 9_999, reasoning: 0, cache: { read: 0, write: 0 } }
  103. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
  104. },
  105. })
  106. })
  107. // ─── Bug reproduction tests ───────────────────────────────────────────
  108. // These tests demonstrate that when limit.input is set, isOverflow()
  109. // does not subtract any headroom for the next model response. This means
  110. // compaction only triggers AFTER we've already consumed the full input
  111. // budget, leaving zero room for the next API call's output tokens.
  112. //
  113. // Compare: without limit.input, usable = context - output (reserves space).
  114. // With limit.input, usable = limit.input (reserves nothing).
  115. //
  116. // Related issues: #10634, #8089, #11086, #12621
  117. // Open PRs: #6875, #12924
  118. test("BUG: no headroom when limit.input is set — compaction should trigger near boundary but does not", async () => {
  119. await using tmp = await tmpdir()
  120. await Instance.provide({
  121. directory: tmp.path,
  122. fn: async () => {
  123. // Simulate Claude with prompt caching: input limit = 200K, output limit = 32K
  124. const model = createModel({ context: 200_000, input: 200_000, output: 32_000 })
  125. // We've used 198K tokens total. Only 2K under the input limit.
  126. // On the next turn, the full conversation (198K) becomes input,
  127. // plus the model needs room to generate output — this WILL overflow.
  128. const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } }
  129. // count = 180K + 3K + 15K = 198K
  130. // usable = limit.input = 200K (no output subtracted!)
  131. // 198K > 200K = false → no compaction triggered
  132. // WITHOUT limit.input: usable = 200K - 32K = 168K, and 198K > 168K = true ✓
  133. // WITH limit.input: usable = 200K, and 198K > 200K = false ✗
  134. // With 198K used and only 2K headroom, the next turn will overflow.
  135. // Compaction MUST trigger here.
  136. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
  137. },
  138. })
  139. })
  140. test("BUG: without limit.input, same token count correctly triggers compaction", async () => {
  141. await using tmp = await tmpdir()
  142. await Instance.provide({
  143. directory: tmp.path,
  144. fn: async () => {
  145. // Same model but without limit.input — uses context - output instead
  146. const model = createModel({ context: 200_000, output: 32_000 })
  147. // Same token usage as above
  148. const tokens = { input: 180_000, output: 15_000, reasoning: 0, cache: { read: 3_000, write: 0 } }
  149. // count = 198K
  150. // usable = context - output = 200K - 32K = 168K
  151. // 198K > 168K = true → compaction correctly triggered
  152. const result = await SessionCompaction.isOverflow({ tokens, model })
  153. expect(result).toBe(true) // ← Correct: headroom is reserved
  154. },
  155. })
  156. })
  157. test("BUG: asymmetry — limit.input model allows 30K more usage before compaction than equivalent model without it", async () => {
  158. await using tmp = await tmpdir()
  159. await Instance.provide({
  160. directory: tmp.path,
  161. fn: async () => {
  162. // Two models with identical context/output limits, differing only in limit.input
  163. const withInputLimit = createModel({ context: 200_000, input: 200_000, output: 32_000 })
  164. const withoutInputLimit = createModel({ context: 200_000, output: 32_000 })
  165. // 170K total tokens — well above context-output (168K) but below input limit (200K)
  166. const tokens = { input: 166_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
  167. const withLimit = await SessionCompaction.isOverflow({ tokens, model: withInputLimit })
  168. const withoutLimit = await SessionCompaction.isOverflow({ tokens, model: withoutInputLimit })
  169. // Both models have identical real capacity — they should agree:
  170. expect(withLimit).toBe(true) // should compact (170K leaves no room for 32K output)
  171. expect(withoutLimit).toBe(true) // correctly compacts (170K > 168K)
  172. },
  173. })
  174. })
  175. test("returns false when model context limit is 0", async () => {
  176. await using tmp = await tmpdir()
  177. await Instance.provide({
  178. directory: tmp.path,
  179. fn: async () => {
  180. const model = createModel({ context: 0, output: 32_000 })
  181. const tokens = { input: 100_000, output: 10_000, reasoning: 0, cache: { read: 0, write: 0 } }
  182. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
  183. },
  184. })
  185. })
  186. test("returns false when compaction.auto is disabled", async () => {
  187. await using tmp = await tmpdir({
  188. init: async (dir) => {
  189. await Bun.write(
  190. path.join(dir, "opencode.json"),
  191. JSON.stringify({
  192. compaction: { auto: false },
  193. }),
  194. )
  195. },
  196. })
  197. await Instance.provide({
  198. directory: tmp.path,
  199. fn: async () => {
  200. const model = createModel({ context: 100_000, output: 32_000 })
  201. const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } }
  202. expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(false)
  203. },
  204. })
  205. })
  206. })
  207. describe("util.token.estimate", () => {
  208. test("estimates tokens from text (4 chars per token)", () => {
  209. const text = "x".repeat(4000)
  210. expect(Token.estimate(text)).toBe(1000)
  211. })
  212. test("estimates tokens from larger text", () => {
  213. const text = "y".repeat(20_000)
  214. expect(Token.estimate(text)).toBe(5000)
  215. })
  216. test("returns 0 for empty string", () => {
  217. expect(Token.estimate("")).toBe(0)
  218. })
  219. })
  220. describe("session.getUsage", () => {
  221. test("normalizes standard usage to token format", () => {
  222. const model = createModel({ context: 100_000, output: 32_000 })
  223. const result = Session.getUsage({
  224. model,
  225. usage: {
  226. inputTokens: 1000,
  227. outputTokens: 500,
  228. totalTokens: 1500,
  229. },
  230. })
  231. expect(result.tokens.input).toBe(1000)
  232. expect(result.tokens.output).toBe(500)
  233. expect(result.tokens.reasoning).toBe(0)
  234. expect(result.tokens.cache.read).toBe(0)
  235. expect(result.tokens.cache.write).toBe(0)
  236. })
  237. test("extracts cached tokens to cache.read", () => {
  238. const model = createModel({ context: 100_000, output: 32_000 })
  239. const result = Session.getUsage({
  240. model,
  241. usage: {
  242. inputTokens: 1000,
  243. outputTokens: 500,
  244. totalTokens: 1500,
  245. cachedInputTokens: 200,
  246. },
  247. })
  248. expect(result.tokens.input).toBe(800)
  249. expect(result.tokens.cache.read).toBe(200)
  250. })
  251. test("handles anthropic cache write metadata", () => {
  252. const model = createModel({ context: 100_000, output: 32_000 })
  253. const result = Session.getUsage({
  254. model,
  255. usage: {
  256. inputTokens: 1000,
  257. outputTokens: 500,
  258. totalTokens: 1500,
  259. },
  260. metadata: {
  261. anthropic: {
  262. cacheCreationInputTokens: 300,
  263. },
  264. },
  265. })
  266. expect(result.tokens.cache.write).toBe(300)
  267. })
  268. test("does not subtract cached tokens for anthropic provider", () => {
  269. const model = createModel({ context: 100_000, output: 32_000 })
  270. const result = Session.getUsage({
  271. model,
  272. usage: {
  273. inputTokens: 1000,
  274. outputTokens: 500,
  275. totalTokens: 1500,
  276. cachedInputTokens: 200,
  277. },
  278. metadata: {
  279. anthropic: {},
  280. },
  281. })
  282. expect(result.tokens.input).toBe(1000)
  283. expect(result.tokens.cache.read).toBe(200)
  284. })
  285. test("handles reasoning tokens", () => {
  286. const model = createModel({ context: 100_000, output: 32_000 })
  287. const result = Session.getUsage({
  288. model,
  289. usage: {
  290. inputTokens: 1000,
  291. outputTokens: 500,
  292. totalTokens: 1500,
  293. reasoningTokens: 100,
  294. },
  295. })
  296. expect(result.tokens.reasoning).toBe(100)
  297. })
  298. test("handles undefined optional values gracefully", () => {
  299. const model = createModel({ context: 100_000, output: 32_000 })
  300. const result = Session.getUsage({
  301. model,
  302. usage: {
  303. inputTokens: 0,
  304. outputTokens: 0,
  305. totalTokens: 0,
  306. },
  307. })
  308. expect(result.tokens.input).toBe(0)
  309. expect(result.tokens.output).toBe(0)
  310. expect(result.tokens.reasoning).toBe(0)
  311. expect(result.tokens.cache.read).toBe(0)
  312. expect(result.tokens.cache.write).toBe(0)
  313. expect(Number.isNaN(result.cost)).toBe(false)
  314. })
  315. test("calculates cost correctly", () => {
  316. const model = createModel({
  317. context: 100_000,
  318. output: 32_000,
  319. cost: {
  320. input: 3,
  321. output: 15,
  322. cache: { read: 0.3, write: 3.75 },
  323. },
  324. })
  325. const result = Session.getUsage({
  326. model,
  327. usage: {
  328. inputTokens: 1_000_000,
  329. outputTokens: 100_000,
  330. totalTokens: 1_100_000,
  331. },
  332. })
  333. expect(result.cost).toBe(3 + 1.5)
  334. })
  335. test.each(["@ai-sdk/anthropic", "@ai-sdk/amazon-bedrock", "@ai-sdk/google-vertex/anthropic"])(
  336. "computes total from components for %s models",
  337. (npm) => {
  338. const model = createModel({ context: 100_000, output: 32_000, npm })
  339. const usage = {
  340. inputTokens: 1000,
  341. outputTokens: 500,
  342. // These providers typically report total as input + output only,
  343. // excluding cache read/write.
  344. totalTokens: 1500,
  345. cachedInputTokens: 200,
  346. }
  347. if (npm === "@ai-sdk/amazon-bedrock") {
  348. const result = Session.getUsage({
  349. model,
  350. usage,
  351. metadata: {
  352. bedrock: {
  353. usage: {
  354. cacheWriteInputTokens: 300,
  355. },
  356. },
  357. },
  358. })
  359. expect(result.tokens.input).toBe(1000)
  360. expect(result.tokens.cache.read).toBe(200)
  361. expect(result.tokens.cache.write).toBe(300)
  362. expect(result.tokens.total).toBe(2000)
  363. return
  364. }
  365. const result = Session.getUsage({
  366. model,
  367. usage,
  368. metadata: {
  369. anthropic: {
  370. cacheCreationInputTokens: 300,
  371. },
  372. },
  373. })
  374. expect(result.tokens.input).toBe(1000)
  375. expect(result.tokens.cache.read).toBe(200)
  376. expect(result.tokens.cache.write).toBe(300)
  377. expect(result.tokens.total).toBe(2000)
  378. },
  379. )
  380. })