operations.ts 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. import { DatabaseClient } from './client';
  2. import {
  3. SystemPrompt,
  4. ProcessingFunctions,
  5. FileRecord,
  6. BenchmarkRun,
  7. Case,
  8. Result,
  9. CreateSystemPromptInput,
  10. CreateProcessingFunctionsInput,
  11. CreateFileInput,
  12. CreateBenchmarkRunInput,
  13. CreateCaseInput,
  14. CreateResultInput
  15. } from './types';
  16. const db = DatabaseClient.getInstance();
  17. // System Prompts Operations
  18. export async function upsertSystemPrompt(input: CreateSystemPromptInput): Promise<string> {
  19. const hash = DatabaseClient.generateHash(input.content);
  20. const stmt = db.getDatabase().prepare(`
  21. INSERT OR IGNORE INTO system_prompts (hash, name, content)
  22. VALUES (?, ?, ?)
  23. `);
  24. stmt.run(hash, input.name, input.content);
  25. return hash;
  26. }
  27. export async function getSystemPromptByHash(hash: string): Promise<SystemPrompt | null> {
  28. const stmt = db.getDatabase().prepare(`
  29. SELECT * FROM system_prompts WHERE hash = ?
  30. `);
  31. const result = stmt.get(hash) as SystemPrompt | undefined;
  32. return result || null;
  33. }
  34. // Processing Functions Operations
  35. export async function upsertProcessingFunctions(input: CreateProcessingFunctionsInput): Promise<string> {
  36. const hash = DatabaseClient.generateHash(input.parsing_function + input.diff_edit_function);
  37. const stmt = db.getDatabase().prepare(`
  38. INSERT OR IGNORE INTO processing_functions (hash, name, parsing_function, diff_edit_function)
  39. VALUES (?, ?, ?, ?)
  40. `);
  41. stmt.run(hash, input.name, input.parsing_function, input.diff_edit_function);
  42. return hash;
  43. }
  44. export async function getProcessingFunctionsByHash(hash: string): Promise<ProcessingFunctions | null> {
  45. const stmt = db.getDatabase().prepare(`
  46. SELECT * FROM processing_functions WHERE hash = ?
  47. `);
  48. const result = stmt.get(hash) as ProcessingFunctions | undefined;
  49. return result || null;
  50. }
  51. // Files Operations
  52. export async function upsertFile(input: CreateFileInput): Promise<string> {
  53. const hash = DatabaseClient.generateHash(input.content);
  54. const stmt = db.getDatabase().prepare(`
  55. INSERT OR IGNORE INTO files (hash, filepath, content, tokens)
  56. VALUES (?, ?, ?, ?)
  57. `);
  58. stmt.run(hash, input.filepath, input.content, input.tokens || null);
  59. return hash;
  60. }
  61. export async function getFileByHash(hash: string): Promise<FileRecord | null> {
  62. const stmt = db.getDatabase().prepare(`
  63. SELECT * FROM files WHERE hash = ?
  64. `);
  65. const result = stmt.get(hash) as FileRecord | undefined;
  66. return result || null;
  67. }
  68. // Benchmark Runs Operations
  69. export async function createBenchmarkRun(input: CreateBenchmarkRunInput): Promise<string> {
  70. const runId = DatabaseClient.generateId();
  71. const stmt = db.getDatabase().prepare(`
  72. INSERT INTO runs (run_id, description, system_prompt_hash)
  73. VALUES (?, ?, ?)
  74. `);
  75. stmt.run(runId, input.description || null, input.system_prompt_hash);
  76. return runId;
  77. }
  78. export async function getBenchmarkRun(runId: string): Promise<BenchmarkRun | null> {
  79. const stmt = db.getDatabase().prepare(`
  80. SELECT * FROM runs WHERE run_id = ?
  81. `);
  82. const result = stmt.get(runId) as BenchmarkRun | undefined;
  83. return result || null;
  84. }
  85. export async function getAllBenchmarkRuns(): Promise<BenchmarkRun[]> {
  86. const stmt = db.getDatabase().prepare(`
  87. SELECT * FROM runs ORDER BY created_at DESC
  88. `);
  89. return stmt.all() as BenchmarkRun[];
  90. }
  91. // Cases Operations
  92. export async function createCase(input: CreateCaseInput): Promise<string> {
  93. const caseId = DatabaseClient.generateId();
  94. const stmt = db.getDatabase().prepare(`
  95. INSERT INTO cases (case_id, run_id, description, system_prompt_hash, task_id, tokens_in_context, file_hash)
  96. VALUES (?, ?, ?, ?, ?, ?, ?)
  97. `);
  98. stmt.run(
  99. caseId,
  100. input.run_id,
  101. input.description,
  102. input.system_prompt_hash,
  103. input.task_id,
  104. input.tokens_in_context,
  105. input.file_hash || null
  106. );
  107. return caseId;
  108. }
  109. export async function getCasesByRun(runId: string): Promise<Case[]> {
  110. const stmt = db.getDatabase().prepare(`
  111. SELECT * FROM cases WHERE run_id = ? ORDER BY created_at
  112. `);
  113. return stmt.all(runId) as Case[];
  114. }
  115. export async function getCaseById(caseId: string): Promise<Case | null> {
  116. const stmt = db.getDatabase().prepare(`
  117. SELECT * FROM cases WHERE case_id = ?
  118. `);
  119. const result = stmt.get(caseId) as Case | undefined;
  120. return result || null;
  121. }
  122. // Results Operations
  123. export async function insertResult(input: CreateResultInput): Promise<string> {
  124. const resultId = DatabaseClient.generateId();
  125. const stmt = db.getDatabase().prepare(`
  126. INSERT INTO results (
  127. result_id, run_id, case_id, model_id, processing_functions_hash,
  128. succeeded, error_enum, num_edits, num_lines_deleted, num_lines_added,
  129. time_to_first_token_ms, time_to_first_edit_ms, time_round_trip_ms,
  130. cost_usd, completion_tokens, raw_model_output, file_edited_hash,
  131. parsed_tool_call_json
  132. ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
  133. `);
  134. stmt.run(
  135. resultId,
  136. input.run_id,
  137. input.case_id,
  138. input.model_id,
  139. input.processing_functions_hash,
  140. input.succeeded ? 1 : 0, // Convert boolean to integer
  141. input.error_enum || null,
  142. input.num_edits || null,
  143. input.num_lines_deleted || null,
  144. input.num_lines_added || null,
  145. input.time_to_first_token_ms || null,
  146. input.time_to_first_edit_ms || null,
  147. input.time_round_trip_ms || null,
  148. input.cost_usd || null,
  149. input.completion_tokens || null,
  150. input.raw_model_output || null,
  151. input.file_edited_hash || null,
  152. input.parsed_tool_call_json || null
  153. );
  154. return resultId;
  155. }
  156. export async function getResultsByRun(runId: string): Promise<Result[]> {
  157. const stmt = db.getDatabase().prepare(`
  158. SELECT * FROM results WHERE run_id = ? ORDER BY created_at
  159. `);
  160. return stmt.all(runId) as Result[];
  161. }
  162. export async function getResultsByCase(caseId: string): Promise<Result[]> {
  163. const stmt = db.getDatabase().prepare(`
  164. SELECT * FROM results WHERE case_id = ? ORDER BY created_at
  165. `);
  166. return stmt.all(caseId) as Result[];
  167. }
  168. export async function getResultById(resultId: string): Promise<Result | null> {
  169. const stmt = db.getDatabase().prepare(`
  170. SELECT * FROM results WHERE result_id = ?
  171. `);
  172. const result = stmt.get(resultId) as Result | undefined;
  173. return result || null;
  174. }
  175. // Batch operations for performance
  176. export async function insertResultsBatch(inputs: CreateResultInput[]): Promise<string[]> {
  177. const stmt = db.getDatabase().prepare(`
  178. INSERT INTO results (
  179. result_id, run_id, case_id, model_id, processing_functions_hash,
  180. succeeded, error_enum, num_edits, num_lines_deleted, num_lines_added,
  181. time_to_first_token_ms, time_to_first_edit_ms, time_round_trip_ms,
  182. cost_usd, completion_tokens, raw_model_output, file_edited_hash,
  183. parsed_tool_call_json
  184. ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
  185. `);
  186. return db.transaction(() => {
  187. const resultIds: string[] = [];
  188. for (const input of inputs) {
  189. const resultId = DatabaseClient.generateId();
  190. stmt.run(
  191. resultId,
  192. input.run_id,
  193. input.case_id,
  194. input.model_id,
  195. input.processing_functions_hash,
  196. input.succeeded ? 1 : 0, // Convert boolean to integer
  197. input.error_enum || null,
  198. input.num_edits || null,
  199. input.num_lines_deleted || null,
  200. input.num_lines_added || null,
  201. input.time_to_first_token_ms || null,
  202. input.time_to_first_edit_ms || null,
  203. input.time_round_trip_ms || null,
  204. input.cost_usd || null,
  205. input.completion_tokens || null,
  206. input.raw_model_output || null,
  207. input.file_edited_hash || null,
  208. input.parsed_tool_call_json || null
  209. );
  210. resultIds.push(resultId);
  211. }
  212. return resultIds;
  213. });
  214. }
  215. export async function createCasesBatch(inputs: CreateCaseInput[]): Promise<string[]> {
  216. const stmt = db.getDatabase().prepare(`
  217. INSERT INTO cases (case_id, run_id, description, system_prompt_hash, task_id, tokens_in_context)
  218. VALUES (?, ?, ?, ?, ?, ?)
  219. `);
  220. return db.transaction(() => {
  221. const caseIds: string[] = [];
  222. for (const input of inputs) {
  223. const caseId = DatabaseClient.generateId();
  224. stmt.run(
  225. caseId,
  226. input.run_id,
  227. input.description,
  228. input.system_prompt_hash,
  229. input.task_id,
  230. input.tokens_in_context
  231. );
  232. caseIds.push(caseId);
  233. }
  234. return caseIds;
  235. });
  236. }
  237. // Utility functions
  238. export async function getRunStats(runId: string): Promise<{
  239. total_cases: number;
  240. total_results: number;
  241. success_rate: number;
  242. avg_cost: number;
  243. avg_latency: number;
  244. }> {
  245. const stmt = db.getDatabase().prepare(`
  246. SELECT
  247. COUNT(DISTINCT c.case_id) as total_cases,
  248. COUNT(r.result_id) as total_results,
  249. AVG(CASE WHEN r.succeeded THEN 1.0 ELSE 0.0 END) as success_rate,
  250. AVG(r.cost_usd) as avg_cost,
  251. AVG(r.time_round_trip_ms) as avg_latency
  252. FROM cases c
  253. LEFT JOIN results r ON c.case_id = r.case_id
  254. WHERE c.run_id = ?
  255. `);
  256. const result = stmt.get(runId) as any;
  257. return {
  258. total_cases: result.total_cases || 0,
  259. total_results: result.total_results || 0,
  260. success_rate: result.success_rate || 0,
  261. avg_cost: result.avg_cost || 0,
  262. avg_latency: result.avg_latency || 0
  263. };
  264. }
  265. // Count valid attempts for a specific case and model
  266. export async function getValidAttemptCount(caseId: string, modelId: string): Promise<number> {
  267. const stmt = db.getDatabase().prepare(`
  268. SELECT COUNT(*) as count
  269. FROM results
  270. WHERE case_id = ?
  271. AND model_id = ?
  272. AND error_enum NOT IN (1, 6, 7) -- Exclude: no_tool_calls, wrong_tool_call, wrong_file_edited
  273. `);
  274. const result = stmt.get(caseId, modelId) as { count: number };
  275. return result.count;
  276. }
  277. // Get valid results for a specific case and model (for analysis)
  278. export async function getValidResults(caseId: string, modelId: string, limit?: number): Promise<Result[]> {
  279. const limitClause = limit ? `LIMIT ${limit}` : '';
  280. const stmt = db.getDatabase().prepare(`
  281. SELECT * FROM results
  282. WHERE case_id = ?
  283. AND model_id = ?
  284. AND error_enum NOT IN (1, 6, 7) -- Only valid attempts
  285. ORDER BY created_at
  286. ${limitClause}
  287. `);
  288. return stmt.all(caseId, modelId) as Result[];
  289. }