langfuse-trace.test.ts 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997
  1. import { describe, expect, test, vi, beforeEach, afterEach } from "vitest";
  2. // Mock the langfuse modules at the top level
  3. const mockStartObservation = vi.fn();
  4. const mockPropagateAttributes = vi.fn();
  5. const mockSpanEnd = vi.fn();
  6. const mockGenerationEnd = vi.fn();
  7. const mockGenerationUpdate = vi.fn();
  8. const mockGuardSpanEnd = vi.fn();
  9. const mockEventEnd = vi.fn();
  10. const mockGeneration: any = {
  11. update: (...args: unknown[]) => {
  12. mockGenerationUpdate(...args);
  13. return mockGeneration;
  14. },
  15. end: mockGenerationEnd,
  16. };
  17. const mockGuardSpan: any = {
  18. end: mockGuardSpanEnd,
  19. };
  20. const mockEventObs: any = {
  21. end: mockEventEnd,
  22. };
  23. const mockUpdateTrace = vi.fn();
  24. const mockRootSpan = {
  25. startObservation: vi.fn(),
  26. updateTrace: mockUpdateTrace,
  27. end: mockSpanEnd,
  28. };
  29. // Default: route by observation name
  30. function setupDefaultStartObservation() {
  31. mockRootSpan.startObservation.mockImplementation((name: string) => {
  32. if (name === "guard-pipeline") return mockGuardSpan;
  33. if (name === "provider-attempt") return mockEventObs;
  34. return mockGeneration; // "llm-call"
  35. });
  36. }
  37. vi.mock("@langfuse/tracing", () => ({
  38. startObservation: (...args: unknown[]) => {
  39. mockStartObservation(...args);
  40. return mockRootSpan;
  41. },
  42. propagateAttributes: async (attrs: unknown, fn: () => Promise<void>) => {
  43. mockPropagateAttributes(attrs);
  44. await fn();
  45. },
  46. }));
  47. vi.mock("@/lib/logger", () => ({
  48. logger: {
  49. info: vi.fn(),
  50. warn: vi.fn(),
  51. error: vi.fn(),
  52. debug: vi.fn(),
  53. },
  54. }));
  55. let langfuseEnabled = true;
  56. vi.mock("@/lib/langfuse/index", () => ({
  57. isLangfuseEnabled: () => langfuseEnabled,
  58. }));
  59. function createMockSession(overrides: Record<string, unknown> = {}) {
  60. const startTime = (overrides.startTime as number) ?? Date.now() - 500;
  61. return {
  62. startTime,
  63. method: "POST",
  64. headers: new Headers({
  65. "content-type": "application/json",
  66. "x-api-key": "test-mock-key-not-real",
  67. "user-agent": "claude-code/1.0",
  68. }),
  69. request: {
  70. message: {
  71. model: "claude-sonnet-4-20250514",
  72. messages: [{ role: "user", content: "Hello" }],
  73. stream: true,
  74. max_tokens: 4096,
  75. tools: [{ name: "tool1" }],
  76. },
  77. model: "claude-sonnet-4-20250514",
  78. },
  79. originalFormat: "claude",
  80. userAgent: "claude-code/1.0",
  81. sessionId: "sess_abc12345_def67890",
  82. provider: {
  83. id: 1,
  84. name: "anthropic-main",
  85. providerType: "claude",
  86. },
  87. messageContext: {
  88. id: 42,
  89. user: { id: 7, name: "testuser" },
  90. key: { name: "default-key" },
  91. },
  92. ttfbMs: 200,
  93. forwardStartTime: startTime + 5,
  94. forwardedRequestBody: null,
  95. getEndpoint: () => "/v1/messages",
  96. getRequestSequence: () => 3,
  97. getMessagesLength: () => 1,
  98. getCurrentModel: () => "claude-sonnet-4-20250514",
  99. getOriginalModel: () => "claude-sonnet-4-20250514",
  100. isModelRedirected: () => false,
  101. getProviderChain: () => [
  102. {
  103. id: 1,
  104. name: "anthropic-main",
  105. providerType: "claude",
  106. reason: "initial_selection",
  107. timestamp: startTime + 2,
  108. },
  109. ],
  110. getSpecialSettings: () => null,
  111. getCacheTtlResolved: () => null,
  112. getContext1mApplied: () => false,
  113. getGroupCostMultiplier: () => 1,
  114. ...overrides,
  115. } as any;
  116. }
  117. describe("traceProxyRequest", () => {
  118. beforeEach(() => {
  119. vi.clearAllMocks();
  120. langfuseEnabled = true;
  121. setupDefaultStartObservation();
  122. });
  123. test("should not trace when Langfuse is disabled", async () => {
  124. langfuseEnabled = false;
  125. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  126. await traceProxyRequest({
  127. session: createMockSession(),
  128. responseHeaders: new Headers(),
  129. durationMs: 500,
  130. statusCode: 200,
  131. isStreaming: false,
  132. });
  133. expect(mockStartObservation).not.toHaveBeenCalled();
  134. });
  135. test("should trace when Langfuse is enabled with actual bodies", async () => {
  136. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  137. const responseBody = { content: "Hi there" };
  138. await traceProxyRequest({
  139. session: createMockSession(),
  140. responseHeaders: new Headers({ "content-type": "application/json" }),
  141. durationMs: 500,
  142. statusCode: 200,
  143. isStreaming: false,
  144. responseText: JSON.stringify(responseBody),
  145. });
  146. // Root span should have actual request body as input (not summary)
  147. const rootCall = mockStartObservation.mock.calls[0];
  148. expect(rootCall[0]).toBe("proxy-request");
  149. // Input should be the actual request message (since forwardedRequestBody is null)
  150. expect(rootCall[1].input).toEqual(
  151. expect.objectContaining({
  152. model: "claude-sonnet-4-20250514",
  153. messages: expect.any(Array),
  154. })
  155. );
  156. // Output should be actual response body
  157. expect(rootCall[1].output).toEqual(responseBody);
  158. // Should have level
  159. expect(rootCall[1].level).toBe("DEFAULT");
  160. // Should have metadata with former summaries
  161. expect(rootCall[1].metadata).toEqual(
  162. expect.objectContaining({
  163. endpoint: "/v1/messages",
  164. method: "POST",
  165. statusCode: 200,
  166. durationMs: 500,
  167. })
  168. );
  169. // Should have child observations
  170. const callNames = mockRootSpan.startObservation.mock.calls.map((c: unknown[]) => c[0]);
  171. expect(callNames).toContain("guard-pipeline");
  172. expect(callNames).toContain("llm-call");
  173. expect(mockSpanEnd).toHaveBeenCalledWith(expect.any(Date));
  174. expect(mockGenerationEnd).toHaveBeenCalledWith(expect.any(Date));
  175. });
  176. test("should use actual request messages as generation input", async () => {
  177. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  178. const session = createMockSession();
  179. await traceProxyRequest({
  180. session,
  181. responseHeaders: new Headers(),
  182. durationMs: 500,
  183. statusCode: 200,
  184. isStreaming: false,
  185. responseText: '{"content": "response"}',
  186. });
  187. // Find the llm-call invocation
  188. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  189. (c: unknown[]) => c[0] === "llm-call"
  190. );
  191. expect(llmCall).toBeDefined();
  192. expect(llmCall[1].input).toEqual(session.request.message);
  193. });
  194. test("should use actual response body as generation output", async () => {
  195. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  196. const responseBody = { content: [{ type: "text", text: "Hello!" }] };
  197. await traceProxyRequest({
  198. session: createMockSession(),
  199. responseHeaders: new Headers(),
  200. durationMs: 500,
  201. statusCode: 200,
  202. isStreaming: false,
  203. responseText: JSON.stringify(responseBody),
  204. });
  205. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  206. (c: unknown[]) => c[0] === "llm-call"
  207. );
  208. expect(llmCall[1].output).toEqual(responseBody);
  209. });
  210. test("should pass raw headers without redaction", async () => {
  211. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  212. await traceProxyRequest({
  213. session: createMockSession(),
  214. responseHeaders: new Headers({ "x-api-key": "secret-mock" }),
  215. durationMs: 500,
  216. statusCode: 200,
  217. isStreaming: false,
  218. });
  219. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  220. (c: unknown[]) => c[0] === "llm-call"
  221. );
  222. const metadata = llmCall[1].metadata;
  223. expect(metadata.requestHeaders["x-api-key"]).toBe("test-mock-key-not-real");
  224. expect(metadata.requestHeaders["content-type"]).toBe("application/json");
  225. expect(metadata.responseHeaders["x-api-key"]).toBe("secret-mock");
  226. });
  227. test("should include provider name and model in tags", async () => {
  228. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  229. await traceProxyRequest({
  230. session: createMockSession(),
  231. responseHeaders: new Headers(),
  232. durationMs: 500,
  233. statusCode: 200,
  234. isStreaming: false,
  235. });
  236. expect(mockPropagateAttributes).toHaveBeenCalledWith(
  237. expect.objectContaining({
  238. userId: "testuser",
  239. sessionId: "sess_abc12345_def67890",
  240. tags: expect.arrayContaining([
  241. "claude",
  242. "anthropic-main",
  243. "claude-sonnet-4-20250514",
  244. "2xx",
  245. ]),
  246. })
  247. );
  248. });
  249. test("should include usage details when provided", async () => {
  250. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  251. await traceProxyRequest({
  252. session: createMockSession(),
  253. responseHeaders: new Headers(),
  254. durationMs: 500,
  255. statusCode: 200,
  256. isStreaming: false,
  257. usageMetrics: {
  258. input_tokens: 100,
  259. output_tokens: 50,
  260. cache_read_input_tokens: 20,
  261. },
  262. costUsd: "0.0015",
  263. });
  264. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  265. (c: unknown[]) => c[0] === "llm-call"
  266. );
  267. expect(llmCall[1].usageDetails).toEqual({
  268. input: 100,
  269. output: 50,
  270. cache_read_input_tokens: 20,
  271. });
  272. expect(llmCall[1].costDetails).toEqual({
  273. total: 0.0015,
  274. });
  275. });
  276. test("should include providerChain, specialSettings, and model in metadata", async () => {
  277. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  278. const providerChain = [
  279. {
  280. id: 1,
  281. name: "anthropic-main",
  282. providerType: "claude",
  283. reason: "initial_selection",
  284. timestamp: Date.now(),
  285. },
  286. ];
  287. await traceProxyRequest({
  288. session: createMockSession({
  289. getSpecialSettings: () => ({ maxThinking: 8192 }),
  290. getProviderChain: () => providerChain,
  291. }),
  292. responseHeaders: new Headers(),
  293. durationMs: 500,
  294. statusCode: 200,
  295. isStreaming: false,
  296. });
  297. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  298. (c: unknown[]) => c[0] === "llm-call"
  299. );
  300. const metadata = llmCall[1].metadata;
  301. expect(metadata.providerChain).toEqual(providerChain);
  302. expect(metadata.specialSettings).toEqual({ maxThinking: 8192 });
  303. expect(metadata.model).toBe("claude-sonnet-4-20250514");
  304. expect(metadata.originalModel).toBe("claude-sonnet-4-20250514");
  305. expect(metadata.providerName).toBe("anthropic-main");
  306. expect(metadata.requestSummary).toEqual(
  307. expect.objectContaining({
  308. model: "claude-sonnet-4-20250514",
  309. messageCount: 1,
  310. })
  311. );
  312. });
  313. test("should handle model redirect metadata", async () => {
  314. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  315. await traceProxyRequest({
  316. session: createMockSession({
  317. isModelRedirected: () => true,
  318. getOriginalModel: () => "claude-sonnet-4-20250514",
  319. getCurrentModel: () => "glm-4",
  320. request: {
  321. message: { model: "glm-4", messages: [] },
  322. model: "glm-4",
  323. },
  324. }),
  325. responseHeaders: new Headers(),
  326. durationMs: 500,
  327. statusCode: 200,
  328. isStreaming: false,
  329. });
  330. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  331. (c: unknown[]) => c[0] === "llm-call"
  332. );
  333. expect(llmCall[1].metadata.modelRedirected).toBe(true);
  334. expect(llmCall[1].metadata.originalModel).toBe("claude-sonnet-4-20250514");
  335. });
  336. test("should set completionStartTime from ttfbMs", async () => {
  337. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  338. const startTime = Date.now() - 500;
  339. await traceProxyRequest({
  340. session: createMockSession({ startTime, ttfbMs: 200 }),
  341. responseHeaders: new Headers(),
  342. durationMs: 500,
  343. statusCode: 200,
  344. isStreaming: false,
  345. });
  346. expect(mockGenerationUpdate).toHaveBeenCalledWith({
  347. completionStartTime: new Date(startTime + 200),
  348. });
  349. });
  350. test("should pass correct startTime and endTime to observations", async () => {
  351. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  352. const startTime = 1700000000000;
  353. const durationMs = 5000;
  354. await traceProxyRequest({
  355. session: createMockSession({ startTime, forwardStartTime: startTime + 5 }),
  356. responseHeaders: new Headers(),
  357. durationMs,
  358. statusCode: 200,
  359. isStreaming: false,
  360. });
  361. const expectedStart = new Date(startTime);
  362. const expectedEnd = new Date(startTime + durationMs);
  363. const expectedForwardStart = new Date(startTime + 5);
  364. // Root span gets startTime in options (3rd arg)
  365. expect(mockStartObservation).toHaveBeenCalledWith("proxy-request", expect.any(Object), {
  366. startTime: expectedStart,
  367. });
  368. // Generation gets forwardStartTime in options (3rd arg)
  369. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  370. (c: unknown[]) => c[0] === "llm-call"
  371. );
  372. expect(llmCall[2]).toEqual({
  373. asType: "generation",
  374. startTime: expectedForwardStart,
  375. });
  376. // Both end() calls receive the computed endTime
  377. expect(mockGenerationEnd).toHaveBeenCalledWith(expectedEnd);
  378. expect(mockSpanEnd).toHaveBeenCalledWith(expectedEnd);
  379. });
  380. test("should handle errors gracefully without throwing", async () => {
  381. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  382. // Make startObservation throw
  383. mockStartObservation.mockImplementationOnce(() => {
  384. throw new Error("SDK error");
  385. });
  386. await expect(
  387. traceProxyRequest({
  388. session: createMockSession(),
  389. responseHeaders: new Headers(),
  390. durationMs: 500,
  391. statusCode: 200,
  392. isStreaming: false,
  393. })
  394. ).resolves.toBeUndefined();
  395. });
  396. test("should include correct tags for error responses", async () => {
  397. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  398. await traceProxyRequest({
  399. session: createMockSession(),
  400. responseHeaders: new Headers(),
  401. durationMs: 500,
  402. statusCode: 502,
  403. isStreaming: false,
  404. errorMessage: "upstream error",
  405. });
  406. expect(mockPropagateAttributes).toHaveBeenCalledWith(
  407. expect.objectContaining({
  408. tags: expect.arrayContaining(["5xx"]),
  409. })
  410. );
  411. });
  412. test("should pass large input/output without truncation", async () => {
  413. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  414. // Generate a large response text
  415. const largeContent = "x".repeat(200_000);
  416. await traceProxyRequest({
  417. session: createMockSession(),
  418. responseHeaders: new Headers(),
  419. durationMs: 500,
  420. statusCode: 200,
  421. isStreaming: false,
  422. responseText: largeContent,
  423. });
  424. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  425. (c: unknown[]) => c[0] === "llm-call"
  426. );
  427. const output = llmCall[1].output as string;
  428. // Should be the full content, no truncation
  429. expect(output).toBe(largeContent);
  430. expect(output).not.toContain("...[truncated]");
  431. });
  432. test("should show streaming output with sseEventCount when no responseText", async () => {
  433. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  434. await traceProxyRequest({
  435. session: createMockSession(),
  436. responseHeaders: new Headers(),
  437. durationMs: 500,
  438. statusCode: 200,
  439. isStreaming: true,
  440. sseEventCount: 42,
  441. });
  442. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  443. (c: unknown[]) => c[0] === "llm-call"
  444. );
  445. expect(llmCall[1].output).toEqual({
  446. streaming: true,
  447. sseEventCount: 42,
  448. });
  449. });
  450. test("should include costUsd in root span metadata", async () => {
  451. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  452. await traceProxyRequest({
  453. session: createMockSession(),
  454. responseHeaders: new Headers(),
  455. durationMs: 500,
  456. statusCode: 200,
  457. isStreaming: false,
  458. costUsd: "0.05",
  459. });
  460. const rootCall = mockStartObservation.mock.calls[0];
  461. expect(rootCall[1].metadata).toEqual(
  462. expect.objectContaining({
  463. costUsd: "0.05",
  464. })
  465. );
  466. });
  467. test("should set trace-level input/output via updateTrace with actual bodies", async () => {
  468. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  469. const responseBody = { result: "ok" };
  470. await traceProxyRequest({
  471. session: createMockSession(),
  472. responseHeaders: new Headers(),
  473. durationMs: 500,
  474. statusCode: 200,
  475. isStreaming: false,
  476. responseText: JSON.stringify(responseBody),
  477. costUsd: "0.05",
  478. });
  479. expect(mockUpdateTrace).toHaveBeenCalledWith({
  480. input: expect.objectContaining({
  481. model: "claude-sonnet-4-20250514",
  482. messages: expect.any(Array),
  483. }),
  484. output: responseBody,
  485. });
  486. });
  487. // --- New tests for multi-span hierarchy ---
  488. test("should create guard-pipeline span with correct timing", async () => {
  489. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  490. const startTime = 1700000000000;
  491. const forwardStartTime = startTime + 8; // 8ms guard pipeline
  492. await traceProxyRequest({
  493. session: createMockSession({ startTime, forwardStartTime }),
  494. responseHeaders: new Headers(),
  495. durationMs: 500,
  496. statusCode: 200,
  497. isStreaming: false,
  498. });
  499. const guardCall = mockRootSpan.startObservation.mock.calls.find(
  500. (c: unknown[]) => c[0] === "guard-pipeline"
  501. );
  502. expect(guardCall).toBeDefined();
  503. expect(guardCall[1]).toEqual({
  504. output: { durationMs: 8, passed: true },
  505. });
  506. expect(guardCall[2]).toEqual({ startTime: new Date(startTime) });
  507. // Guard span should end at forwardStartTime
  508. expect(mockGuardSpanEnd).toHaveBeenCalledWith(new Date(forwardStartTime));
  509. });
  510. test("should skip guard-pipeline span when forwardStartTime is null", async () => {
  511. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  512. await traceProxyRequest({
  513. session: createMockSession({ forwardStartTime: null }),
  514. responseHeaders: new Headers(),
  515. durationMs: 500,
  516. statusCode: 200,
  517. isStreaming: false,
  518. });
  519. const guardCall = mockRootSpan.startObservation.mock.calls.find(
  520. (c: unknown[]) => c[0] === "guard-pipeline"
  521. );
  522. expect(guardCall).toBeUndefined();
  523. expect(mockGuardSpanEnd).not.toHaveBeenCalled();
  524. });
  525. test("should create provider-attempt events for failed chain items", async () => {
  526. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  527. const startTime = 1700000000000;
  528. const failTimestamp = startTime + 100;
  529. await traceProxyRequest({
  530. session: createMockSession({
  531. startTime,
  532. getProviderChain: () => [
  533. {
  534. id: 1,
  535. name: "provider-a",
  536. providerType: "claude",
  537. reason: "retry_failed",
  538. errorMessage: "502 Bad Gateway",
  539. statusCode: 502,
  540. attemptNumber: 1,
  541. timestamp: failTimestamp,
  542. },
  543. {
  544. id: 2,
  545. name: "provider-b",
  546. providerType: "claude",
  547. reason: "system_error",
  548. errorMessage: "ECONNREFUSED",
  549. timestamp: failTimestamp + 50,
  550. },
  551. {
  552. id: 3,
  553. name: "provider-c",
  554. providerType: "claude",
  555. reason: "request_success",
  556. timestamp: failTimestamp + 200,
  557. },
  558. ],
  559. }),
  560. responseHeaders: new Headers(),
  561. durationMs: 500,
  562. statusCode: 200,
  563. isStreaming: false,
  564. });
  565. const eventCalls = mockRootSpan.startObservation.mock.calls.filter(
  566. (c: unknown[]) => c[0] === "provider-attempt"
  567. );
  568. // 2 failed items (retry_failed + system_error), success is skipped
  569. expect(eventCalls).toHaveLength(2);
  570. // First event: retry_failed -> WARNING level
  571. expect(eventCalls[0][1]).toEqual(
  572. expect.objectContaining({
  573. level: "WARNING",
  574. input: expect.objectContaining({
  575. providerId: 1,
  576. providerName: "provider-a",
  577. attempt: 1,
  578. }),
  579. output: expect.objectContaining({
  580. reason: "retry_failed",
  581. errorMessage: "502 Bad Gateway",
  582. statusCode: 502,
  583. }),
  584. })
  585. );
  586. expect(eventCalls[0][2]).toEqual({
  587. asType: "event",
  588. startTime: new Date(failTimestamp),
  589. });
  590. // Second event: system_error -> ERROR level
  591. expect(eventCalls[1][1].level).toBe("ERROR");
  592. expect(eventCalls[1][1].output.reason).toBe("system_error");
  593. });
  594. test("should set generation startTime to forwardStartTime", async () => {
  595. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  596. const startTime = 1700000000000;
  597. const forwardStartTime = startTime + 10;
  598. await traceProxyRequest({
  599. session: createMockSession({ startTime, forwardStartTime }),
  600. responseHeaders: new Headers(),
  601. durationMs: 500,
  602. statusCode: 200,
  603. isStreaming: false,
  604. });
  605. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  606. (c: unknown[]) => c[0] === "llm-call"
  607. );
  608. expect(llmCall[2]).toEqual({
  609. asType: "generation",
  610. startTime: new Date(forwardStartTime),
  611. });
  612. });
  613. test("should fall back to requestStartTime when forwardStartTime is null", async () => {
  614. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  615. const startTime = 1700000000000;
  616. await traceProxyRequest({
  617. session: createMockSession({ startTime, forwardStartTime: null }),
  618. responseHeaders: new Headers(),
  619. durationMs: 500,
  620. statusCode: 200,
  621. isStreaming: false,
  622. });
  623. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  624. (c: unknown[]) => c[0] === "llm-call"
  625. );
  626. expect(llmCall[2]).toEqual({
  627. asType: "generation",
  628. startTime: new Date(startTime),
  629. });
  630. });
  631. test("should include timingBreakdown in root span metadata and generation metadata", async () => {
  632. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  633. const startTime = 1700000000000;
  634. const forwardStartTime = startTime + 5;
  635. await traceProxyRequest({
  636. session: createMockSession({
  637. startTime,
  638. forwardStartTime,
  639. ttfbMs: 105,
  640. getProviderChain: () => [
  641. { id: 1, name: "p1", reason: "retry_failed", timestamp: startTime + 50 },
  642. { id: 2, name: "p2", reason: "request_success", timestamp: startTime + 100 },
  643. ],
  644. }),
  645. responseHeaders: new Headers(),
  646. durationMs: 500,
  647. statusCode: 200,
  648. isStreaming: false,
  649. });
  650. const expectedTimingBreakdown = {
  651. guardPipelineMs: 5,
  652. upstreamTotalMs: 495,
  653. ttfbFromForwardMs: 100, // ttfbMs(105) - guardPipelineMs(5)
  654. tokenGenerationMs: 395, // durationMs(500) - ttfbMs(105)
  655. failedAttempts: 1, // only retry_failed is non-success
  656. providersAttempted: 2, // 2 unique provider ids
  657. };
  658. // Root span metadata should have timingBreakdown
  659. const rootCall = mockStartObservation.mock.calls[0];
  660. expect(rootCall[1].metadata.timingBreakdown).toEqual(expectedTimingBreakdown);
  661. // Generation metadata should also have timingBreakdown
  662. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  663. (c: unknown[]) => c[0] === "llm-call"
  664. );
  665. expect(llmCall[1].metadata.timingBreakdown).toEqual(expectedTimingBreakdown);
  666. });
  667. test("should not create provider-attempt events when all providers succeeded", async () => {
  668. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  669. await traceProxyRequest({
  670. session: createMockSession({
  671. getProviderChain: () => [
  672. { id: 1, name: "p1", reason: "initial_selection", timestamp: Date.now() },
  673. { id: 1, name: "p1", reason: "request_success", timestamp: Date.now() },
  674. ],
  675. }),
  676. responseHeaders: new Headers(),
  677. durationMs: 500,
  678. statusCode: 200,
  679. isStreaming: false,
  680. });
  681. const eventCalls = mockRootSpan.startObservation.mock.calls.filter(
  682. (c: unknown[]) => c[0] === "provider-attempt"
  683. );
  684. expect(eventCalls).toHaveLength(0);
  685. });
  686. // --- New tests for input/output, level, and cost breakdown ---
  687. test("should use forwardedRequestBody as trace input when available", async () => {
  688. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  689. const forwardedBody = JSON.stringify({
  690. model: "claude-sonnet-4-20250514",
  691. messages: [{ role: "user", content: "Preprocessed Hello" }],
  692. stream: true,
  693. });
  694. await traceProxyRequest({
  695. session: createMockSession({
  696. forwardedRequestBody: forwardedBody,
  697. }),
  698. responseHeaders: new Headers(),
  699. durationMs: 500,
  700. statusCode: 200,
  701. isStreaming: false,
  702. responseText: '{"ok": true}',
  703. });
  704. // Root span input should be the forwarded body (parsed JSON)
  705. const rootCall = mockStartObservation.mock.calls[0];
  706. expect(rootCall[1].input).toEqual(JSON.parse(forwardedBody));
  707. // updateTrace should also use forwarded body
  708. expect(mockUpdateTrace).toHaveBeenCalledWith({
  709. input: JSON.parse(forwardedBody),
  710. output: { ok: true },
  711. });
  712. });
  713. test("should set root span level to DEFAULT for successful request", async () => {
  714. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  715. await traceProxyRequest({
  716. session: createMockSession(),
  717. responseHeaders: new Headers(),
  718. durationMs: 500,
  719. statusCode: 200,
  720. isStreaming: false,
  721. });
  722. const rootCall = mockStartObservation.mock.calls[0];
  723. expect(rootCall[1].level).toBe("DEFAULT");
  724. });
  725. test("should set root span level to WARNING when retries occurred", async () => {
  726. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  727. const startTime = Date.now() - 500;
  728. await traceProxyRequest({
  729. session: createMockSession({
  730. startTime,
  731. getProviderChain: () => [
  732. { id: 1, name: "p1", reason: "retry_failed", timestamp: startTime + 50 },
  733. { id: 2, name: "p2", reason: "request_success", timestamp: startTime + 200 },
  734. ],
  735. }),
  736. responseHeaders: new Headers(),
  737. durationMs: 500,
  738. statusCode: 200,
  739. isStreaming: false,
  740. });
  741. const rootCall = mockStartObservation.mock.calls[0];
  742. expect(rootCall[1].level).toBe("WARNING");
  743. });
  744. test("should set root span level to ERROR for non-200 status", async () => {
  745. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  746. await traceProxyRequest({
  747. session: createMockSession(),
  748. responseHeaders: new Headers(),
  749. durationMs: 500,
  750. statusCode: 502,
  751. isStreaming: false,
  752. });
  753. const rootCall = mockStartObservation.mock.calls[0];
  754. expect(rootCall[1].level).toBe("ERROR");
  755. });
  756. test("should set root span level to ERROR for 499 client abort", async () => {
  757. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  758. await traceProxyRequest({
  759. session: createMockSession(),
  760. responseHeaders: new Headers(),
  761. durationMs: 500,
  762. statusCode: 499,
  763. isStreaming: false,
  764. });
  765. const rootCall = mockStartObservation.mock.calls[0];
  766. expect(rootCall[1].level).toBe("ERROR");
  767. });
  768. test("should include cost breakdown in costDetails when provided", async () => {
  769. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  770. const costBreakdown = {
  771. input: 0.001,
  772. output: 0.002,
  773. cache_creation: 0.0005,
  774. cache_read: 0.0001,
  775. total: 0.0036,
  776. };
  777. await traceProxyRequest({
  778. session: createMockSession(),
  779. responseHeaders: new Headers(),
  780. durationMs: 500,
  781. statusCode: 200,
  782. isStreaming: false,
  783. costUsd: "0.0036",
  784. costBreakdown,
  785. });
  786. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  787. (c: unknown[]) => c[0] === "llm-call"
  788. );
  789. expect(llmCall[1].costDetails).toEqual(costBreakdown);
  790. });
  791. test("should fallback to total-only costDetails when no breakdown", async () => {
  792. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  793. await traceProxyRequest({
  794. session: createMockSession(),
  795. responseHeaders: new Headers(),
  796. durationMs: 500,
  797. statusCode: 200,
  798. isStreaming: false,
  799. costUsd: "0.05",
  800. });
  801. const llmCall = mockRootSpan.startObservation.mock.calls.find(
  802. (c: unknown[]) => c[0] === "llm-call"
  803. );
  804. expect(llmCall[1].costDetails).toEqual({ total: 0.05 });
  805. });
  806. test("should include former summaries in root span metadata", async () => {
  807. const { traceProxyRequest } = await import("@/lib/langfuse/trace-proxy-request");
  808. await traceProxyRequest({
  809. session: createMockSession(),
  810. responseHeaders: new Headers(),
  811. durationMs: 500,
  812. statusCode: 200,
  813. isStreaming: false,
  814. costUsd: "0.05",
  815. });
  816. const rootCall = mockStartObservation.mock.calls[0];
  817. const metadata = rootCall[1].metadata;
  818. // Former input summary fields
  819. expect(metadata.endpoint).toBe("/v1/messages");
  820. expect(metadata.method).toBe("POST");
  821. expect(metadata.model).toBe("claude-sonnet-4-20250514");
  822. expect(metadata.clientFormat).toBe("claude");
  823. expect(metadata.providerName).toBe("anthropic-main");
  824. // Former output summary fields
  825. expect(metadata.statusCode).toBe(200);
  826. expect(metadata.durationMs).toBe(500);
  827. expect(metadata.costUsd).toBe("0.05");
  828. expect(metadata.timingBreakdown).toBeDefined();
  829. });
  830. });
  831. describe("isLangfuseEnabled", () => {
  832. const originalPublicKey = process.env.LANGFUSE_PUBLIC_KEY;
  833. const originalSecretKey = process.env.LANGFUSE_SECRET_KEY;
  834. afterEach(() => {
  835. // Restore env
  836. if (originalPublicKey !== undefined) {
  837. process.env.LANGFUSE_PUBLIC_KEY = originalPublicKey;
  838. } else {
  839. delete process.env.LANGFUSE_PUBLIC_KEY;
  840. }
  841. if (originalSecretKey !== undefined) {
  842. process.env.LANGFUSE_SECRET_KEY = originalSecretKey;
  843. } else {
  844. delete process.env.LANGFUSE_SECRET_KEY;
  845. }
  846. });
  847. test("should return false when env vars are not set", () => {
  848. delete process.env.LANGFUSE_PUBLIC_KEY;
  849. delete process.env.LANGFUSE_SECRET_KEY;
  850. // Direct function test (not using the mock)
  851. const isEnabled = !!(process.env.LANGFUSE_PUBLIC_KEY && process.env.LANGFUSE_SECRET_KEY);
  852. expect(isEnabled).toBe(false);
  853. });
  854. test("should return true when both keys are set", () => {
  855. process.env.LANGFUSE_PUBLIC_KEY = "pk-lf-test-mock";
  856. process.env.LANGFUSE_SECRET_KEY = "test-mock-not-real";
  857. const isEnabled = !!(process.env.LANGFUSE_PUBLIC_KEY && process.env.LANGFUSE_SECRET_KEY);
  858. expect(isEnabled).toBe(true);
  859. });
  860. });