task.test.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. import { afterEach, describe, expect } from "bun:test"
  2. import { Effect, Layer } from "effect"
  3. import { Agent } from "../../src/agent/agent"
  4. import { Config } from "../../src/config"
  5. import * as CrossSpawnSpawner from "../../src/effect/cross-spawn-spawner"
  6. import { Instance } from "../../src/project/instance"
  7. import { Session } from "../../src/session"
  8. import { MessageV2 } from "../../src/session"
  9. import type { SessionPrompt } from "../../src/session"
  10. import { MessageID, PartID } from "../../src/session/schema"
  11. import { ModelID, ProviderID } from "../../src/provider/schema"
  12. import { TaskTool, type TaskPromptOps } from "../../src/tool/task"
  13. import { Truncate } from "../../src/tool/truncate"
  14. import { ToolRegistry } from "../../src/tool/registry"
  15. import { provideTmpdirInstance } from "../fixture/fixture"
  16. import { testEffect } from "../lib/effect"
  17. afterEach(async () => {
  18. await Instance.disposeAll()
  19. })
  20. const ref = {
  21. providerID: ProviderID.make("test"),
  22. modelID: ModelID.make("test-model"),
  23. }
  24. const it = testEffect(
  25. Layer.mergeAll(
  26. Agent.defaultLayer,
  27. Config.defaultLayer,
  28. CrossSpawnSpawner.defaultLayer,
  29. Session.defaultLayer,
  30. Truncate.defaultLayer,
  31. ToolRegistry.defaultLayer,
  32. ),
  33. )
  34. const seed = Effect.fn("TaskToolTest.seed")(function* (title = "Pinned") {
  35. const session = yield* Session.Service
  36. const chat = yield* session.create({ title })
  37. const user = yield* session.updateMessage({
  38. id: MessageID.ascending(),
  39. role: "user",
  40. sessionID: chat.id,
  41. agent: "build",
  42. model: ref,
  43. time: { created: Date.now() },
  44. })
  45. const assistant: MessageV2.Assistant = {
  46. id: MessageID.ascending(),
  47. role: "assistant",
  48. parentID: user.id,
  49. sessionID: chat.id,
  50. mode: "build",
  51. agent: "build",
  52. cost: 0,
  53. path: { cwd: "/tmp", root: "/tmp" },
  54. tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } },
  55. modelID: ref.modelID,
  56. providerID: ref.providerID,
  57. time: { created: Date.now() },
  58. }
  59. yield* session.updateMessage(assistant)
  60. return { chat, assistant }
  61. })
  62. function stubOps(opts?: { onPrompt?: (input: SessionPrompt.PromptInput) => void; text?: string }): TaskPromptOps {
  63. return {
  64. cancel() {},
  65. resolvePromptParts: (template) => Effect.succeed([{ type: "text" as const, text: template }]),
  66. prompt: (input) =>
  67. Effect.sync(() => {
  68. opts?.onPrompt?.(input)
  69. return reply(input, opts?.text ?? "done")
  70. }),
  71. }
  72. }
  73. function reply(input: SessionPrompt.PromptInput, text: string): MessageV2.WithParts {
  74. const id = MessageID.ascending()
  75. return {
  76. info: {
  77. id,
  78. role: "assistant",
  79. parentID: input.messageID ?? MessageID.ascending(),
  80. sessionID: input.sessionID,
  81. mode: input.agent ?? "general",
  82. agent: input.agent ?? "general",
  83. cost: 0,
  84. path: { cwd: "/tmp", root: "/tmp" },
  85. tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } },
  86. modelID: input.model?.modelID ?? ref.modelID,
  87. providerID: input.model?.providerID ?? ref.providerID,
  88. time: { created: Date.now() },
  89. finish: "stop",
  90. },
  91. parts: [
  92. {
  93. id: PartID.ascending(),
  94. messageID: id,
  95. sessionID: input.sessionID,
  96. type: "text",
  97. text,
  98. },
  99. ],
  100. }
  101. }
  102. describe("tool.task", () => {
  103. it.live("description sorts subagents by name and is stable across calls", () =>
  104. provideTmpdirInstance(
  105. () =>
  106. Effect.gen(function* () {
  107. const agent = yield* Agent.Service
  108. const build = yield* agent.get("build")
  109. const registry = yield* ToolRegistry.Service
  110. const get = Effect.fnUntraced(function* () {
  111. const tools = yield* registry.tools({ ...ref, agent: build })
  112. return tools.find((tool) => tool.id === TaskTool.id)?.description ?? ""
  113. })
  114. const first = yield* get()
  115. const second = yield* get()
  116. expect(first).toBe(second)
  117. const alpha = first.indexOf("- alpha: Alpha agent")
  118. const explore = first.indexOf("- explore:")
  119. const general = first.indexOf("- general:")
  120. const zebra = first.indexOf("- zebra: Zebra agent")
  121. expect(alpha).toBeGreaterThan(-1)
  122. expect(explore).toBeGreaterThan(alpha)
  123. expect(general).toBeGreaterThan(explore)
  124. expect(zebra).toBeGreaterThan(general)
  125. }),
  126. {
  127. config: {
  128. agent: {
  129. zebra: {
  130. description: "Zebra agent",
  131. mode: "subagent",
  132. },
  133. alpha: {
  134. description: "Alpha agent",
  135. mode: "subagent",
  136. },
  137. },
  138. },
  139. },
  140. ),
  141. )
  142. it.live("description hides denied subagents for the caller", () =>
  143. provideTmpdirInstance(
  144. () =>
  145. Effect.gen(function* () {
  146. const agent = yield* Agent.Service
  147. const build = yield* agent.get("build")
  148. const registry = yield* ToolRegistry.Service
  149. const description =
  150. (yield* registry.tools({ ...ref, agent: build })).find((tool) => tool.id === TaskTool.id)?.description ?? ""
  151. expect(description).toContain("- alpha: Alpha agent")
  152. expect(description).not.toContain("- zebra: Zebra agent")
  153. }),
  154. {
  155. config: {
  156. permission: {
  157. task: {
  158. "*": "allow",
  159. zebra: "deny",
  160. },
  161. },
  162. agent: {
  163. zebra: {
  164. description: "Zebra agent",
  165. mode: "subagent",
  166. },
  167. alpha: {
  168. description: "Alpha agent",
  169. mode: "subagent",
  170. },
  171. },
  172. },
  173. },
  174. ),
  175. )
  176. it.live("execute resumes an existing task session from task_id", () =>
  177. provideTmpdirInstance(() =>
  178. Effect.gen(function* () {
  179. const sessions = yield* Session.Service
  180. const { chat, assistant } = yield* seed()
  181. const child = yield* sessions.create({ parentID: chat.id, title: "Existing child" })
  182. const tool = yield* TaskTool
  183. const def = yield* tool.init()
  184. let seen: SessionPrompt.PromptInput | undefined
  185. const promptOps = stubOps({ text: "resumed", onPrompt: (input) => (seen = input) })
  186. const result = yield* def.execute(
  187. {
  188. description: "inspect bug",
  189. prompt: "look into the cache key path",
  190. subagent_type: "general",
  191. task_id: child.id,
  192. },
  193. {
  194. sessionID: chat.id,
  195. messageID: assistant.id,
  196. agent: "build",
  197. abort: new AbortController().signal,
  198. extra: { promptOps },
  199. messages: [],
  200. metadata: () => Effect.void,
  201. ask: () => Effect.void,
  202. },
  203. )
  204. const kids = yield* sessions.children(chat.id)
  205. expect(kids).toHaveLength(1)
  206. expect(kids[0]?.id).toBe(child.id)
  207. expect(result.metadata.sessionId).toBe(child.id)
  208. expect(result.output).toContain(`task_id: ${child.id}`)
  209. expect(seen?.sessionID).toBe(child.id)
  210. }),
  211. ),
  212. )
  213. it.live("execute asks by default and skips checks when bypassed", () =>
  214. provideTmpdirInstance(() =>
  215. Effect.gen(function* () {
  216. const { chat, assistant } = yield* seed()
  217. const tool = yield* TaskTool
  218. const def = yield* tool.init()
  219. const calls: unknown[] = []
  220. const promptOps = stubOps()
  221. const exec = (extra?: Record<string, any>) =>
  222. def.execute(
  223. {
  224. description: "inspect bug",
  225. prompt: "look into the cache key path",
  226. subagent_type: "general",
  227. },
  228. {
  229. sessionID: chat.id,
  230. messageID: assistant.id,
  231. agent: "build",
  232. abort: new AbortController().signal,
  233. extra: { promptOps, ...extra },
  234. messages: [],
  235. metadata: () => Effect.void,
  236. ask: (input) =>
  237. Effect.sync(() => {
  238. calls.push(input)
  239. }),
  240. },
  241. )
  242. yield* exec()
  243. yield* exec({ bypassAgentCheck: true })
  244. expect(calls).toHaveLength(1)
  245. expect(calls[0]).toEqual({
  246. permission: "task",
  247. patterns: ["general"],
  248. always: ["*"],
  249. metadata: {
  250. description: "inspect bug",
  251. subagent_type: "general",
  252. },
  253. })
  254. }),
  255. ),
  256. )
  257. it.live("execute creates a child when task_id does not exist", () =>
  258. provideTmpdirInstance(() =>
  259. Effect.gen(function* () {
  260. const sessions = yield* Session.Service
  261. const { chat, assistant } = yield* seed()
  262. const tool = yield* TaskTool
  263. const def = yield* tool.init()
  264. let seen: SessionPrompt.PromptInput | undefined
  265. const promptOps = stubOps({ text: "created", onPrompt: (input) => (seen = input) })
  266. const result = yield* def.execute(
  267. {
  268. description: "inspect bug",
  269. prompt: "look into the cache key path",
  270. subagent_type: "general",
  271. task_id: "ses_missing",
  272. },
  273. {
  274. sessionID: chat.id,
  275. messageID: assistant.id,
  276. agent: "build",
  277. abort: new AbortController().signal,
  278. extra: { promptOps },
  279. messages: [],
  280. metadata: () => Effect.void,
  281. ask: () => Effect.void,
  282. },
  283. )
  284. const kids = yield* sessions.children(chat.id)
  285. expect(kids).toHaveLength(1)
  286. expect(kids[0]?.id).toBe(result.metadata.sessionId)
  287. expect(result.metadata.sessionId).not.toBe("ses_missing")
  288. expect(result.output).toContain(`task_id: ${result.metadata.sessionId}`)
  289. expect(seen?.sessionID).toBe(result.metadata.sessionId)
  290. }),
  291. ),
  292. )
  293. it.live("execute shapes child permissions for task, todowrite, and primary tools", () =>
  294. provideTmpdirInstance(
  295. () =>
  296. Effect.gen(function* () {
  297. const sessions = yield* Session.Service
  298. const { chat, assistant } = yield* seed()
  299. const tool = yield* TaskTool
  300. const def = yield* tool.init()
  301. let seen: SessionPrompt.PromptInput | undefined
  302. const promptOps = stubOps({ onPrompt: (input) => (seen = input) })
  303. const result = yield* def.execute(
  304. {
  305. description: "inspect bug",
  306. prompt: "look into the cache key path",
  307. subagent_type: "reviewer",
  308. },
  309. {
  310. sessionID: chat.id,
  311. messageID: assistant.id,
  312. agent: "build",
  313. abort: new AbortController().signal,
  314. extra: { promptOps },
  315. messages: [],
  316. metadata: () => Effect.void,
  317. ask: () => Effect.void,
  318. },
  319. )
  320. const child = yield* sessions.get(result.metadata.sessionId)
  321. expect(child.parentID).toBe(chat.id)
  322. expect(child.permission).toEqual([
  323. {
  324. permission: "todowrite",
  325. pattern: "*",
  326. action: "deny",
  327. },
  328. {
  329. permission: "bash",
  330. pattern: "*",
  331. action: "allow",
  332. },
  333. {
  334. permission: "read",
  335. pattern: "*",
  336. action: "allow",
  337. },
  338. ])
  339. expect(seen?.tools).toEqual({
  340. todowrite: false,
  341. bash: false,
  342. read: false,
  343. })
  344. }),
  345. {
  346. config: {
  347. agent: {
  348. reviewer: {
  349. mode: "subagent",
  350. permission: {
  351. task: "allow",
  352. },
  353. },
  354. },
  355. experimental: {
  356. primary_tools: ["bash", "read"],
  357. },
  358. },
  359. },
  360. ),
  361. )
  362. })