Browse Source

Merge pull request #2820 from Kilo-Org/mark/run-benchmarks

Slight tweak to benchmark/evals
Mark IJbema 4 months ago
parent
commit
05fa357fe2
1 changed files with 12 additions and 1 deletions
  1. 12 1
      src/test-llm-autocompletion/runner.ts

+ 12 - 1
src/test-llm-autocompletion/runner.ts

@@ -45,6 +45,17 @@ class TestRunner {
 				actualValue = "(no changes parsed)"
 			}
 
+			// Auto-reject if no changes were parsed
+			if (actualValue === "(no changes parsed)") {
+				return {
+					testCase,
+					isApproved: false,
+					completion,
+					actualValue,
+					llmRequestDuration,
+				}
+			}
+
 			const approvalResult = await checkApproval(testCase.category, testCase.name, testCase.input, actualValue)
 
 			return {
@@ -196,7 +207,7 @@ class TestRunner {
 			process.exit(1)
 		}
 
-		const numRuns = 5
+		const numRuns = 10
 
 		console.log(`\n🧪 Running Single Test: ${testName} (${numRuns} times)\n`)
 		console.log("Category:", testCase.category)