Browse Source

feat: add batch limiting to code indexer (#5891)

feat: add batch limiting to code indexer to control memory usage

- Add MAX_PENDING_BATCHES constant (20) to limit concurrent batches
- Implement backpressure mechanism to pause file parsing when limit reached
- Prevent memory overflow during large codebase indexing
Daniel 7 months ago
parent
commit
cc369da2d5

+ 1 - 0
src/services/code-index/constants/index.ts

@@ -20,6 +20,7 @@ export const BATCH_SEGMENT_THRESHOLD = 60 // Number of code segments to batch fo
 export const MAX_BATCH_RETRIES = 3
 export const INITIAL_RETRY_DELAY_MS = 500
 export const PARSING_CONCURRENCY = 10
+export const MAX_PENDING_BATCHES = 20 // Maximum number of batches to accumulate before waiting
 
 /**OpenAI Embedder */
 export const MAX_BATCH_TOKENS = 100000

+ 16 - 0
src/services/code-index/processors/scanner.ts

@@ -23,6 +23,7 @@ import {
 	INITIAL_RETRY_DELAY_MS,
 	PARSING_CONCURRENCY,
 	BATCH_PROCESSING_CONCURRENCY,
+	MAX_PENDING_BATCHES,
 } from "../constants"
 import { isPathInIgnoredDirectory } from "../../glob/ignore-utils"
 import { TelemetryService } from "@roo-code/telemetry"
@@ -98,6 +99,7 @@ export class DirectoryScanner implements IDirectoryScanner {
 		let currentBatchTexts: string[] = []
 		let currentBatchFileInfos: { filePath: string; fileHash: string; isNew: boolean }[] = []
 		const activeBatchPromises = new Set<Promise<void>>()
+		let pendingBatchCount = 0
 
 		// Initialize block counter
 		let totalBlockCount = 0
@@ -152,6 +154,12 @@ export class DirectoryScanner implements IDirectoryScanner {
 
 									// Check if batch threshold is met
 									if (currentBatchBlocks.length >= BATCH_SEGMENT_THRESHOLD) {
+										// Wait if we've reached the maximum pending batches
+										while (pendingBatchCount >= MAX_PENDING_BATCHES) {
+											// Wait for at least one batch to complete
+											await Promise.race(activeBatchPromises)
+										}
+
 										// Copy current batch data and clear accumulators
 										const batchBlocks = [...currentBatchBlocks]
 										const batchTexts = [...currentBatchTexts]
@@ -160,6 +168,9 @@ export class DirectoryScanner implements IDirectoryScanner {
 										currentBatchTexts = []
 										currentBatchFileInfos = []
 
+										// Increment pending batch count
+										pendingBatchCount++
+
 										// Queue batch processing
 										const batchPromise = batchLimiter(() =>
 											this.processBatch(
@@ -176,6 +187,7 @@ export class DirectoryScanner implements IDirectoryScanner {
 										// Clean up completed promises to prevent memory accumulation
 										batchPromise.finally(() => {
 											activeBatchPromises.delete(batchPromise)
+											pendingBatchCount--
 										})
 									}
 								} finally {
@@ -238,6 +250,9 @@ export class DirectoryScanner implements IDirectoryScanner {
 				currentBatchTexts = []
 				currentBatchFileInfos = []
 
+				// Increment pending batch count for final batch
+				pendingBatchCount++
+
 				// Queue final batch processing
 				const batchPromise = batchLimiter(() =>
 					this.processBatch(batchBlocks, batchTexts, batchFileInfos, scanWorkspace, onError, onBlocksIndexed),
@@ -247,6 +262,7 @@ export class DirectoryScanner implements IDirectoryScanner {
 				// Clean up completed promises to prevent memory accumulation
 				batchPromise.finally(() => {
 					activeBatchPromises.delete(batchPromise)
+					pendingBatchCount--
 				})
 			} finally {
 				release()