types.ts 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. // Database type definitions for diff-edits evaluation system
  2. export interface SystemPrompt {
  3. hash: string;
  4. name: string;
  5. content: string;
  6. created_at: string;
  7. }
  8. export interface ProcessingFunctions {
  9. hash: string;
  10. name: string;
  11. parsing_function: string;
  12. diff_edit_function: string;
  13. created_at: string;
  14. }
  15. export interface FileRecord {
  16. hash: string;
  17. filepath: string;
  18. content: string;
  19. tokens?: number;
  20. created_at: string;
  21. }
  22. export interface BenchmarkRun {
  23. run_id: string;
  24. created_at: string;
  25. description?: string;
  26. system_prompt_hash: string;
  27. }
  28. export interface Case {
  29. case_id: string
  30. run_id: string
  31. created_at: string
  32. description: string
  33. system_prompt_hash: string
  34. task_id: string
  35. tokens_in_context: number
  36. file_hash?: string
  37. }
  38. export interface Result {
  39. result_id: string;
  40. run_id: string;
  41. case_id: string;
  42. model_id: string;
  43. processing_functions_hash: string;
  44. succeeded: boolean;
  45. error_enum?: number;
  46. num_edits?: number;
  47. num_lines_deleted?: number;
  48. num_lines_added?: number;
  49. time_to_first_token_ms?: number;
  50. time_to_first_edit_ms?: number;
  51. time_round_trip_ms?: number;
  52. cost_usd?: number;
  53. completion_tokens?: number;
  54. raw_model_output?: string;
  55. file_edited_hash?: string;
  56. parsed_tool_call_json?: string;
  57. created_at: string;
  58. }
  59. // Input types for creating records
  60. export interface CreateSystemPromptInput {
  61. name: string;
  62. content: string;
  63. }
  64. export interface CreateProcessingFunctionsInput {
  65. name: string;
  66. parsing_function: string;
  67. diff_edit_function: string;
  68. }
  69. export interface CreateFileInput {
  70. filepath: string;
  71. content: string;
  72. tokens?: number;
  73. }
  74. export interface CreateBenchmarkRunInput {
  75. description?: string;
  76. system_prompt_hash: string;
  77. }
  78. export interface CreateCaseInput {
  79. run_id: string;
  80. description: string;
  81. system_prompt_hash: string;
  82. task_id: string;
  83. tokens_in_context: number;
  84. file_hash?: string;
  85. }
  86. export interface CreateResultInput {
  87. run_id: string;
  88. case_id: string;
  89. model_id: string;
  90. processing_functions_hash: string;
  91. succeeded: boolean;
  92. error_enum?: number;
  93. num_edits?: number;
  94. num_lines_deleted?: number;
  95. num_lines_added?: number;
  96. time_to_first_token_ms?: number;
  97. time_to_first_edit_ms?: number;
  98. time_round_trip_ms?: number;
  99. cost_usd?: number;
  100. completion_tokens?: number;
  101. raw_model_output?: string;
  102. file_edited_hash?: string;
  103. parsed_tool_call_json?: string;
  104. }
  105. // Analysis result types
  106. export interface ModelSuccessRate {
  107. model_id: string;
  108. total_runs: number;
  109. successful_runs: number;
  110. success_rate: number;
  111. }
  112. export interface ModelLatency {
  113. model_id: string;
  114. avg_time_to_first_token_ms: number;
  115. avg_time_to_first_edit_ms: number;
  116. avg_time_round_trip_ms: number;
  117. }
  118. export interface CostAnalysis {
  119. run_id: string;
  120. model_id: string;
  121. total_cost_usd: number;
  122. avg_cost_per_case: number;
  123. total_completion_tokens: number;
  124. }
  125. export interface ErrorDistribution {
  126. error_enum: number;
  127. count: number;
  128. percentage: number;
  129. }
  130. export interface FailedCase {
  131. case_id: string;
  132. model_id: string;
  133. error_enum: number;
  134. description: string;
  135. raw_model_output?: string;
  136. }
  137. export interface PerformanceTrend {
  138. date: string;
  139. model_id: string;
  140. success_rate: number;
  141. avg_latency_ms: number;
  142. avg_cost_usd: number;
  143. }
  144. export interface ModelComparison {
  145. model_id: string;
  146. success_rate: number;
  147. avg_latency_ms: number;
  148. avg_cost_usd: number;
  149. total_runs: number;
  150. }