Explorar el Código

refactor: update edit and search strategies to use configurable confidence thresholds

- Modified applyContextMatching and applyDMP functions to accept a confidenceThreshold parameter, enhancing flexibility in edit strategies.
- Updated validateEditResult and related functions to utilize the new confidenceThreshold, improving consistency across validation processes.
- Adjusted findExactMatch, findSimilarityMatch, findLevenshteinMatch, and findAnchorMatch functions to incorporate confidenceThreshold, ensuring adaptive behavior based on user settings.
Daniel Riccio hace 1 año
padre
commit
f9a453a44f

+ 10 - 10
src/core/diff/strategies/new-unified/edit-strategies.ts

@@ -30,7 +30,7 @@ function inferIndentation(line: string, contextLines: string[], previousIndent:
 }
 
 // Context matching edit strategy
-export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number): EditResult {
+export function applyContextMatching(hunk: Hunk, content: string[], matchPosition: number, confidenceThreshold: number): EditResult {
   if (matchPosition === -1) {
     return { confidence: 0, result: content, strategy: 'context' };
   }
@@ -71,7 +71,7 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio
     newResult.slice(matchPosition, matchPosition + windowSize).join('\n')
   )
 
-  const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), 'context');
+  const confidence = validateEditResult(hunk, newResult.slice(matchPosition, matchPosition + windowSize).join('\n'), confidenceThreshold);
 
   return { 
     confidence: similarity * confidence,
@@ -81,7 +81,7 @@ export function applyContextMatching(hunk: Hunk, content: string[], matchPositio
 }
 
 // DMP edit strategy
-export function applyDMP(hunk: Hunk, content: string[], matchPosition: number): EditResult {
+export function applyDMP(hunk: Hunk, content: string[], matchPosition: number, confidenceThreshold: number): EditResult {
   if (matchPosition === -1) {
     return { confidence: 0, result: content, strategy: 'dmp' };
   }
@@ -123,7 +123,7 @@ export function applyDMP(hunk: Hunk, content: string[], matchPosition: number):
   
   // Calculate confidence
   const similarity = getDMPSimilarity(beforeText, targetText);
-  const confidence = validateEditResult(hunk, patchedText, 'dmp');
+  const confidence = validateEditResult(hunk, patchedText, confidenceThreshold);
   
   return {
     confidence: similarity * confidence,
@@ -254,25 +254,25 @@ export async function applyEdit(
 	content: string[], 
 	matchPosition: number, 
 	confidence: number,
-	minConfidence: number = 0.9
+	confidenceThreshold: number = 0.97
 ): Promise<EditResult> {
 	// Don't attempt regular edits if confidence is too low
-	if (confidence < minConfidence) {
-		console.log(`Search confidence (${confidence}) below minimum threshold (${minConfidence}), trying git fallback...`);
+	if (confidence < confidenceThreshold) {
+		console.log(`Search confidence (${confidence}) below minimum threshold (${confidenceThreshold}), trying git fallback...`);
 		return applyGitFallback(hunk, content);
 	}
 
 	// Try each strategy in sequence until one succeeds
 	const strategies = [
-		{ name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition) },
-		{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition) },
+		{ name: 'dmp', apply: () => applyDMP(hunk, content, matchPosition, confidenceThreshold) },
+		{ name: 'context', apply: () => applyContextMatching(hunk, content, matchPosition, confidenceThreshold) },
 		{ name: 'git-fallback', apply: () => applyGitFallback(hunk, content) }
 	];
 
 	// Try strategies sequentially until one succeeds
 	for (const strategy of strategies) {
 		const result = await strategy.apply();
-		if (result.confidence >= minConfidence) {
+		if (result.confidence >= confidenceThreshold) {
 			return result;
 		}
 	}

+ 22 - 33
src/core/diff/strategies/new-unified/search-strategies.ts

@@ -9,16 +9,13 @@ export type SearchResult = {
 	strategy: string
 }
 
-//TODO: this should be configurable
-const MIN_CONFIDENCE = 0.97
-const MIN_CONFIDENCE_LARGE_FILE = 0.9
 const LARGE_FILE_THRESHOLD = 1000 // lines
 const UNIQUE_CONTENT_BOOST = 0.05
 const DEFAULT_OVERLAP_SIZE = 3 // lines of overlap between windows
 const MAX_WINDOW_SIZE = 500 // maximum lines in a window
 
 // Helper function to calculate adaptive confidence threshold based on file size
-function getAdaptiveThreshold(contentLength: number, baseThreshold: number = 0.97): number {
+function getAdaptiveThreshold(contentLength: number, baseThreshold: number): number {
 	if (contentLength <= LARGE_FILE_THRESHOLD) {
 		return baseThreshold
 	}
@@ -69,11 +66,7 @@ export function getDMPSimilarity(original: string, modified: string): number {
 }
 
 // Helper function to validate edit results using hunk information
-// Returns a confidence reduction value between 0 and 1
-// Example: If similarity is 0.8 and MIN_CONFIDENCE is 0.95,
-// returns 0.1 (0.5 * (1 - 0.8)) to reduce confidence proportionally but with less impact.
-// If similarity >= MIN_CONFIDENCE, returns 0 (no reduction).
-export function validateEditResult(hunk: Hunk, result: string, strategy: string): number {
+export function validateEditResult(hunk: Hunk, result: string, confidenceThreshold: number): number {
 	const hunkDeepCopy: Hunk = JSON.parse(JSON.stringify(hunk))
 
 	const originalSkeleton = hunkDeepCopy.changes
@@ -90,26 +83,20 @@ export function validateEditResult(hunk: Hunk, result: string, strategy: string)
 	const expectedSimilarity = evaluateSimilarity(expectedSkeleton, result)
 
 	if (originalSimilarity > 0.97 && expectedSimilarity !== 1) {
-		if (originalSimilarity === 1) {
-			if (originalSimilarity > 0.97) {
 				if (originalSimilarity === 1) {
 					return 0.5
-				} else {
-					return 0.8
-				}
-			}
 		} else {
 			return 0.8
 		}
 	}
 
-	const multiplier = expectedSimilarity < MIN_CONFIDENCE ? 0.96 + 0.04 * expectedSimilarity : 1
+	const multiplier = expectedSimilarity < confidenceThreshold ? expectedSimilarity : 1
 
 	return multiplier
 }
 
 // Helper function to validate context lines against original content
-function validateContextLines(searchStr: string, content: string, baseThreshold: number = 0.97): number {
+function validateContextLines(searchStr: string, content: string, confidenceThreshold: number): number {
 	// Extract just the context lines from the search string
 	const contextLines = searchStr.split("\n").filter((line) => !line.startsWith("-")) // Exclude removed lines
 
@@ -117,7 +104,7 @@ function validateContextLines(searchStr: string, content: string, baseThreshold:
 	const similarity = evaluateSimilarity(contextLines.join("\n"), content)
 
 	// Get adaptive threshold based on content size
-	const threshold = getAdaptiveThreshold(content.split("\n").length, baseThreshold)
+	const threshold = getAdaptiveThreshold(content.split("\n").length, confidenceThreshold)
 
 	// Calculate uniqueness boost
 	const uniquenessScore = evaluateContentUniqueness(searchStr, content.split("\n"))
@@ -207,8 +194,7 @@ function combineOverlappingMatches(
 	return combinedMatches
 }
 
-// Modified search functions to use sliding windows
-export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findExactMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
 	const searchLines = searchStr.split("\n")
 	const windows = createOverlappingWindows(content.slice(startIndex), searchLines.length)
 	const matches: (SearchResult & { windowIndex: number })[] = []
@@ -226,7 +212,7 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
 				.join("\n")
 
 			const similarity = getDMPSimilarity(searchStr, matchedContent)
-			const contextSimilarity = validateContextLines(searchStr, matchedContent)
+			const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold)
 			const confidence = Math.min(similarity, contextSimilarity)
 
 			matches.push({
@@ -243,7 +229,7 @@ export function findExactMatch(searchStr: string, content: string[], startIndex:
 }
 
 // String similarity strategy
-export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, minScore: number = 0.8): SearchResult {
+export function findSimilarityMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
 	const searchLines = searchStr.split("\n")
 	let bestScore = 0
 	let bestIndex = -1
@@ -251,9 +237,9 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
 	for (let i = startIndex; i < content.length - searchLines.length + 1; i++) {
 		const windowStr = content.slice(i, i + searchLines.length).join("\n")
 		const score = compareTwoStrings(searchStr, windowStr)
-		if (score > bestScore && score >= minScore) {
+		if (score > bestScore && score >= confidenceThreshold) {
 			const similarity = getDMPSimilarity(searchStr, windowStr)
-			const contextSimilarity = validateContextLines(searchStr, windowStr, minScore)
+			const contextSimilarity = validateContextLines(searchStr, windowStr, confidenceThreshold)
 			const adjustedScore = Math.min(similarity, contextSimilarity) * score
 
 			if (adjustedScore > bestScore) {
@@ -271,7 +257,7 @@ export function findSimilarityMatch(searchStr: string, content: string[], startI
 }
 
 // Levenshtein strategy
-export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findLevenshteinMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
 	const searchLines = searchStr.split("\n")
 	const candidates = []
 
@@ -283,7 +269,7 @@ export function findLevenshteinMatch(searchStr: string, content: string[], start
 		const closestMatch = closest(searchStr, candidates)
 		const index = startIndex + candidates.indexOf(closestMatch)
 		const similarity = getDMPSimilarity(searchStr, closestMatch)
-		const contextSimilarity = validateContextLines(searchStr, closestMatch)
+		const contextSimilarity = validateContextLines(searchStr, closestMatch, confidenceThreshold)
 		const confidence = Math.min(similarity, contextSimilarity)
 		return {
 			index,
@@ -355,7 +341,7 @@ function validateAnchorPositions(
 }
 
 // Anchor-based search strategy
-export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0): SearchResult {
+export function findAnchorMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
 	const searchLines = searchStr.split("\n")
 	const anchors = identifyAnchors(searchStr, content.slice(startIndex))
 
@@ -370,7 +356,7 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex
 		const matchPosition = startIndex + offset
 		const matchedContent = content.slice(matchPosition, matchPosition + searchLines.length).join("\n")
 		const similarity = getDMPSimilarity(searchStr, matchedContent)
-		const contextSimilarity = validateContextLines(searchStr, matchedContent)
+		const contextSimilarity = validateContextLines(searchStr, matchedContent, confidenceThreshold)
 		const confidence = Math.min(similarity, contextSimilarity) * (1 + anchors[0].weight * 0.1) // Boost confidence based on anchor weight
 
 		return {
@@ -384,15 +370,18 @@ export function findAnchorMatch(searchStr: string, content: string[], startIndex
 }
 
 // Main search function that tries all strategies
-export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, minConfidence: number = 0.97): SearchResult {
-	const strategies = [findExactMatch, findAnchorMatch, findSimilarityMatch, findLevenshteinMatch]
+export function findBestMatch(searchStr: string, content: string[], startIndex: number = 0, confidenceThreshold: number = 0.97): SearchResult {
+	const strategies = [
+		findExactMatch,
+		findAnchorMatch,
+		findSimilarityMatch,
+		findLevenshteinMatch
+	]
 
 	let bestResult: SearchResult = { index: -1, confidence: 0, strategy: "none" }
 
 	for (const strategy of strategies) {
-		const result = strategy === findSimilarityMatch 
-			? strategy(searchStr, content, startIndex, minConfidence)
-			: strategy(searchStr, content, startIndex)
+		const result = strategy(searchStr, content, startIndex, confidenceThreshold)
 		if (result.confidence > bestResult.confidence) {
 			bestResult = result
 		}