Browse Source

Fix: Enhanced codebase index recovery and reuse ('Start Indexing' button now reuses existing Qdrant index) (#8588)

Co-authored-by: daniel-lxs <[email protected]>
Seth Miller 2 months ago
parent
commit
f9d6fe7985

+ 19 - 11
src/core/webview/webviewMessageHandler.ts

@@ -2720,18 +2720,26 @@ export const webviewMessageHandler = async (
 					return
 				}
 				if (manager.isFeatureEnabled && manager.isFeatureConfigured) {
-					if (!manager.isInitialized) {
-						await manager.initialize(provider.contextProxy)
-					}
-
-					// startIndexing now handles error recovery internally
-					manager.startIndexing()
-
-					// If startIndexing recovered from error, we need to reinitialize
-					if (!manager.isInitialized) {
-						await manager.initialize(provider.contextProxy)
-						// Try starting again after initialization
+					// Mimic extension startup behavior: initialize first, which will
+					// check if Qdrant container is active and reuse existing collection
+					await manager.initialize(provider.contextProxy)
+
+					// Only call startIndexing if we're in a state that requires it
+					// (e.g., Standby or Error). If already Indexed or Indexing, the
+					// initialize() call above will have already started the watcher.
+					const currentState = manager.state
+					if (currentState === "Standby" || currentState === "Error") {
+						// startIndexing now handles error recovery internally
 						manager.startIndexing()
+
+						// If startIndexing recovered from error, we need to reinitialize
+						if (!manager.isInitialized) {
+							await manager.initialize(provider.contextProxy)
+							// Try starting again after initialization
+							if (manager.state === "Standby" || manager.state === "Error") {
+								manager.startIndexing()
+							}
+						}
 					}
 				}
 			} catch (error) {

+ 160 - 0
src/services/code-index/__tests__/orchestrator.spec.ts

@@ -0,0 +1,160 @@
+import { describe, it, expect, beforeEach, vi } from "vitest"
+import { CodeIndexOrchestrator } from "../orchestrator"
+
+// Mock vscode workspace so startIndexing passes workspace check
+vi.mock("vscode", () => {
+	const path = require("path")
+	const testWorkspacePath = path.join(path.sep, "test", "workspace")
+	return {
+		window: {
+			activeTextEditor: null,
+		},
+		workspace: {
+			workspaceFolders: [
+				{
+					uri: { fsPath: testWorkspacePath },
+					name: "test",
+					index: 0,
+				},
+			],
+			createFileSystemWatcher: vi.fn().mockReturnValue({
+				onDidCreate: vi.fn().mockReturnValue({ dispose: vi.fn() }),
+				onDidChange: vi.fn().mockReturnValue({ dispose: vi.fn() }),
+				onDidDelete: vi.fn().mockReturnValue({ dispose: vi.fn() }),
+				dispose: vi.fn(),
+			}),
+		},
+		RelativePattern: vi.fn().mockImplementation((base: string, pattern: string) => ({ base, pattern })),
+	}
+})
+
+// Mock TelemetryService
+vi.mock("@roo-code/telemetry", () => ({
+	TelemetryService: {
+		instance: {
+			captureEvent: vi.fn(),
+		},
+	},
+}))
+
+// Mock i18n translator used in orchestrator messages
+vi.mock("../../i18n", () => ({
+	t: (key: string, params?: any) => {
+		if (key === "embeddings:orchestrator.failedDuringInitialScan" && params?.errorMessage) {
+			return `Failed during initial scan: ${params.errorMessage}`
+		}
+		return key
+	},
+}))
+
+describe("CodeIndexOrchestrator - error path cleanup gating", () => {
+	const workspacePath = "/test/workspace"
+
+	let configManager: any
+	let stateManager: any
+	let cacheManager: any
+	let vectorStore: any
+	let scanner: any
+	let fileWatcher: any
+
+	beforeEach(() => {
+		vi.clearAllMocks()
+
+		configManager = {
+			isFeatureConfigured: true,
+		}
+
+		// Minimal state manager that tracks state transitions
+		let currentState = "Standby"
+		stateManager = {
+			get state() {
+				return currentState
+			},
+			setSystemState: vi.fn().mockImplementation((state: string, _msg: string) => {
+				currentState = state
+			}),
+			reportFileQueueProgress: vi.fn(),
+			reportBlockIndexingProgress: vi.fn(),
+		}
+
+		cacheManager = {
+			clearCacheFile: vi.fn().mockResolvedValue(undefined),
+		}
+
+		vectorStore = {
+			initialize: vi.fn(),
+			hasIndexedData: vi.fn(),
+			markIndexingIncomplete: vi.fn(),
+			markIndexingComplete: vi.fn(),
+			clearCollection: vi.fn().mockResolvedValue(undefined),
+		}
+
+		scanner = {
+			scanDirectory: vi.fn(),
+		}
+
+		fileWatcher = {
+			initialize: vi.fn().mockResolvedValue(undefined),
+			onDidStartBatchProcessing: vi.fn().mockReturnValue({ dispose: vi.fn() }),
+			onBatchProgressUpdate: vi.fn().mockReturnValue({ dispose: vi.fn() }),
+			onDidFinishBatchProcessing: vi.fn().mockReturnValue({ dispose: vi.fn() }),
+			dispose: vi.fn(),
+		}
+	})
+
+	it("should not call clearCollection() or clear cache when initialize() fails (indexing not started)", async () => {
+		// Arrange: fail at initialize()
+		vectorStore.initialize.mockRejectedValue(new Error("Qdrant unreachable"))
+
+		const orchestrator = new CodeIndexOrchestrator(
+			configManager,
+			stateManager,
+			workspacePath,
+			cacheManager,
+			vectorStore,
+			scanner,
+			fileWatcher,
+		)
+
+		// Act
+		await orchestrator.startIndexing()
+
+		// Assert
+		expect(vectorStore.clearCollection).not.toHaveBeenCalled()
+		expect(cacheManager.clearCacheFile).not.toHaveBeenCalled()
+
+		// Error state should be set
+		expect(stateManager.setSystemState).toHaveBeenCalled()
+		const lastCall = stateManager.setSystemState.mock.calls[stateManager.setSystemState.mock.calls.length - 1]
+		expect(lastCall[0]).toBe("Error")
+	})
+
+	it("should call clearCollection() and clear cache when an error occurs after initialize() succeeds (indexing started)", async () => {
+		// Arrange: initialize succeeds; fail soon after to enter error path with indexingStarted=true
+		vectorStore.initialize.mockResolvedValue(false) // existing collection
+		vectorStore.hasIndexedData.mockResolvedValue(false) // force full scan path
+		vectorStore.markIndexingIncomplete.mockRejectedValue(new Error("mark incomplete failure"))
+
+		const orchestrator = new CodeIndexOrchestrator(
+			configManager,
+			stateManager,
+			workspacePath,
+			cacheManager,
+			vectorStore,
+			scanner,
+			fileWatcher,
+		)
+
+		// Act
+		await orchestrator.startIndexing()
+
+		// Assert: cleanup gated behind indexingStarted should have happened
+		expect(vectorStore.clearCollection).toHaveBeenCalledTimes(1)
+		expect(cacheManager.clearCacheFile).toHaveBeenCalledTimes(1)
+
+		// Error state should be set
+		expect(stateManager.setSystemState).toHaveBeenCalled()
+		const lastCall = stateManager.setSystemState.mock.calls[stateManager.setSystemState.mock.calls.length - 1]
+		expect(lastCall[0]).toBe("Error")
+	})
+})

+ 18 - 0
src/services/code-index/interfaces/vector-store.ts

@@ -62,6 +62,24 @@ export interface IVectorStore {
 	 * @returns Promise resolving to boolean indicating if the collection exists
 	 */
 	collectionExists(): Promise<boolean>
+
+	/**
+	 * Checks if the collection exists and has indexed points
+	 * @returns Promise resolving to boolean indicating if the collection exists and has points
+	 */
+	hasIndexedData(): Promise<boolean>
+
+	/**
+	 * Marks the indexing process as complete by storing metadata
+	 * Should be called after a successful full workspace scan or incremental scan
+	 */
+	markIndexingComplete(): Promise<void>
+
+	/**
+	 * Marks the indexing process as incomplete by storing metadata
+	 * Should be called at the start of indexing to indicate work in progress
+	 */
+	markIndexingIncomplete(): Promise<void>
 }
 
 export interface VectorStoreSearchResult {

+ 161 - 68
src/services/code-index/orchestrator.ts

@@ -123,86 +123,164 @@ export class CodeIndexOrchestrator {
 		this._isProcessing = true
 		this.stateManager.setSystemState("Indexing", "Initializing services...")
 
+		// Track whether we successfully connected to Qdrant and started indexing
+		// This helps us decide whether to preserve cache on error
+		let indexingStarted = false
+
 		try {
 			const collectionCreated = await this.vectorStore.initialize()
 
+			// Successfully connected to Qdrant
+			indexingStarted = true
+
 			if (collectionCreated) {
 				await this.cacheManager.clearCacheFile()
 			}
 
-			this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...")
+			// Check if the collection already has indexed data
+			// If it does, we can skip the full scan and just start the watcher
+			const hasExistingData = await this.vectorStore.hasIndexedData()
 
-			let cumulativeBlocksIndexed = 0
-			let cumulativeBlocksFoundSoFar = 0
-			let batchErrors: Error[] = []
+			if (hasExistingData && !collectionCreated) {
+				// Collection exists with data - run incremental scan to catch any new/changed files
+				// This handles files added while workspace was closed or Qdrant was inactive
+				console.log(
+					"[CodeIndexOrchestrator] Collection already has indexed data. Running incremental scan for new/changed files...",
+				)
+				this.stateManager.setSystemState("Indexing", "Checking for new or modified files...")
 
-			const handleFileParsed = (fileBlockCount: number) => {
-				cumulativeBlocksFoundSoFar += fileBlockCount
-				this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
-			}
+				// Mark as incomplete at the start of incremental scan
+				await this.vectorStore.markIndexingIncomplete()
 
-			const handleBlocksIndexed = (indexedCount: number) => {
-				cumulativeBlocksIndexed += indexedCount
-				this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
-			}
+				let cumulativeBlocksIndexed = 0
+				let cumulativeBlocksFoundSoFar = 0
+				let batchErrors: Error[] = []
 
-			const result = await this.scanner.scanDirectory(
-				this.workspacePath,
-				(batchError: Error) => {
-					console.error(
-						`[CodeIndexOrchestrator] Error during initial scan batch: ${batchError.message}`,
-						batchError,
-					)
-					batchErrors.push(batchError)
-				},
-				handleBlocksIndexed,
-				handleFileParsed,
-			)
+				const handleFileParsed = (fileBlockCount: number) => {
+					cumulativeBlocksFoundSoFar += fileBlockCount
+					this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
+				}
 
-			if (!result) {
-				throw new Error("Scan failed, is scanner initialized?")
-			}
+				const handleBlocksIndexed = (indexedCount: number) => {
+					cumulativeBlocksIndexed += indexedCount
+					this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
+				}
 
-			const { stats } = result
+				// Run incremental scan - scanner will skip unchanged files using cache
+				const result = await this.scanner.scanDirectory(
+					this.workspacePath,
+					(batchError: Error) => {
+						console.error(
+							`[CodeIndexOrchestrator] Error during incremental scan batch: ${batchError.message}`,
+							batchError,
+						)
+						batchErrors.push(batchError)
+					},
+					handleBlocksIndexed,
+					handleFileParsed,
+				)
 
-			// Check if any blocks were actually indexed successfully
-			// If no blocks were indexed but blocks were found, it means all batches failed
-			if (cumulativeBlocksIndexed === 0 && cumulativeBlocksFoundSoFar > 0) {
-				if (batchErrors.length > 0) {
-					// Use the first batch error as it's likely representative of the main issue
-					const firstError = batchErrors[0]
-					throw new Error(`Indexing failed: ${firstError.message}`)
+				if (!result) {
+					throw new Error("Incremental scan failed, is scanner initialized?")
+				}
+
+				// If new files were found and indexed, log the results
+				if (cumulativeBlocksFoundSoFar > 0) {
+					console.log(
+						`[CodeIndexOrchestrator] Incremental scan completed: ${cumulativeBlocksIndexed} blocks indexed from new/changed files`,
+					)
 				} else {
-					throw new Error(t("embeddings:orchestrator.indexingFailedNoBlocks"))
+					console.log("[CodeIndexOrchestrator] No new or changed files found")
 				}
-			}
 
-			// Check for partial failures - if a significant portion of blocks failed
-			const failureRate = (cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed) / cumulativeBlocksFoundSoFar
-			if (batchErrors.length > 0 && failureRate > 0.1) {
-				// More than 10% of blocks failed to index
-				const firstError = batchErrors[0]
-				throw new Error(
-					`Indexing partially failed: Only ${cumulativeBlocksIndexed} of ${cumulativeBlocksFoundSoFar} blocks were indexed. ${firstError.message}`,
+				await this._startWatcher()
+
+				// Mark indexing as complete after successful incremental scan
+				await this.vectorStore.markIndexingComplete()
+
+				this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted"))
+			} else {
+				// No existing data or collection was just created - do a full scan
+				this.stateManager.setSystemState("Indexing", "Services ready. Starting workspace scan...")
+
+				// Mark as incomplete at the start of full scan
+				await this.vectorStore.markIndexingIncomplete()
+
+				let cumulativeBlocksIndexed = 0
+				let cumulativeBlocksFoundSoFar = 0
+				let batchErrors: Error[] = []
+
+				const handleFileParsed = (fileBlockCount: number) => {
+					cumulativeBlocksFoundSoFar += fileBlockCount
+					this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
+				}
+
+				const handleBlocksIndexed = (indexedCount: number) => {
+					cumulativeBlocksIndexed += indexedCount
+					this.stateManager.reportBlockIndexingProgress(cumulativeBlocksIndexed, cumulativeBlocksFoundSoFar)
+				}
+
+				const result = await this.scanner.scanDirectory(
+					this.workspacePath,
+					(batchError: Error) => {
+						console.error(
+							`[CodeIndexOrchestrator] Error during initial scan batch: ${batchError.message}`,
+							batchError,
+						)
+						batchErrors.push(batchError)
+					},
+					handleBlocksIndexed,
+					handleFileParsed,
 				)
-			}
 
-			// CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed,
-			// this is a complete failure regardless of the failure rate calculation
-			if (batchErrors.length > 0 && cumulativeBlocksIndexed === 0) {
-				const firstError = batchErrors[0]
-				throw new Error(`Indexing failed completely: ${firstError.message}`)
-			}
+				if (!result) {
+					throw new Error("Scan failed, is scanner initialized?")
+				}
 
-			// Final sanity check: If we found blocks but indexed none and somehow no errors were reported,
-			// this is still a failure
-			if (cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0) {
-				throw new Error(t("embeddings:orchestrator.indexingFailedCritical"))
-			}
+				const { stats } = result
+
+				// Check if any blocks were actually indexed successfully
+				// If no blocks were indexed but blocks were found, it means all batches failed
+				if (cumulativeBlocksIndexed === 0 && cumulativeBlocksFoundSoFar > 0) {
+					if (batchErrors.length > 0) {
+						// Use the first batch error as it's likely representative of the main issue
+						const firstError = batchErrors[0]
+						throw new Error(`Indexing failed: ${firstError.message}`)
+					} else {
+						throw new Error(t("embeddings:orchestrator.indexingFailedNoBlocks"))
+					}
+				}
+
+				// Check for partial failures - if a significant portion of blocks failed
+				const failureRate = (cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed) / cumulativeBlocksFoundSoFar
+				if (batchErrors.length > 0 && failureRate > 0.1) {
+					// More than 10% of blocks failed to index
+					const firstError = batchErrors[0]
+					throw new Error(
+						`Indexing partially failed: Only ${cumulativeBlocksIndexed} of ${cumulativeBlocksFoundSoFar} blocks were indexed. ${firstError.message}`,
+					)
+				}
+
+				// CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed,
+				// this is a complete failure regardless of the failure rate calculation
+				if (batchErrors.length > 0 && cumulativeBlocksIndexed === 0) {
+					const firstError = batchErrors[0]
+					throw new Error(`Indexing failed completely: ${firstError.message}`)
+				}
 
-			await this._startWatcher()
+				// Final sanity check: If we found blocks but indexed none and somehow no errors were reported,
+				// this is still a failure
+				if (cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0) {
+					throw new Error(t("embeddings:orchestrator.indexingFailedCritical"))
+				}
+
+				await this._startWatcher()
 
-			this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted"))
+				// Mark indexing as complete after successful full scan
+				await this.vectorStore.markIndexingComplete()
+
+				this.stateManager.setSystemState("Indexed", t("embeddings:orchestrator.fileWatcherStarted"))
+			}
 		} catch (error: any) {
 			console.error("[CodeIndexOrchestrator] Error during indexing:", error)
 			TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
@@ -210,18 +288,33 @@ export class CodeIndexOrchestrator {
 				stack: error instanceof Error ? error.stack : undefined,
 				location: "startIndexing",
 			})
-			try {
-				await this.vectorStore.clearCollection()
-			} catch (cleanupError) {
-				console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError)
-				TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
-					error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError),
-					stack: cleanupError instanceof Error ? cleanupError.stack : undefined,
-					location: "startIndexing.cleanup",
-				})
+			if (indexingStarted) {
+				try {
+					await this.vectorStore.clearCollection()
+				} catch (cleanupError) {
+					console.error("[CodeIndexOrchestrator] Failed to clean up after error:", cleanupError)
+					TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, {
+						error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError),
+						stack: cleanupError instanceof Error ? cleanupError.stack : undefined,
+						location: "startIndexing.cleanup",
+					})
+				}
 			}
 
-			await this.cacheManager.clearCacheFile()
+			// Only clear cache if indexing had started (Qdrant connection succeeded)
+			// If we never connected to Qdrant, preserve cache for incremental scan when it comes back
+			if (indexingStarted) {
+				// Indexing started but failed mid-way - clear cache to avoid cache-Qdrant mismatch
+				await this.cacheManager.clearCacheFile()
+				console.log(
+					"[CodeIndexOrchestrator] Indexing failed after starting. Clearing cache to avoid inconsistency.",
+				)
+			} else {
+				// Never connected to Qdrant - preserve cache for future incremental scan
+				console.log(
+					"[CodeIndexOrchestrator] Failed to connect to Qdrant. Preserving cache for future incremental scan.",
+				)
+			}
 
 			this.stateManager.setSystemState(
 				"Error",

+ 84 - 75
src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts

@@ -1260,9 +1260,9 @@ describe("QdrantVectorStore", () => {
 			const results = await vectorStore.search(queryVector)
 
 			expect(mockQdrantClientInstance.query).toHaveBeenCalledTimes(1)
-			expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+			const callArgs = mockQdrantClientInstance.query.mock.calls[0][1]
+			expect(callArgs).toMatchObject({
 				query: queryVector,
-				filter: undefined,
 				score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 				limit: DEFAULT_MAX_SEARCH_RESULTS,
 				params: {
@@ -1273,6 +1273,9 @@ describe("QdrantVectorStore", () => {
 					include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 				},
 			})
+			expect(callArgs.filter).toEqual({
+				must_not: [{ key: "type", match: { value: "metadata" } }],
+			})
 
 			expect(results).toEqual(mockQdrantResults.points)
 		})
@@ -1300,29 +1303,20 @@ describe("QdrantVectorStore", () => {
 
 			const results = await vectorStore.search(queryVector, directoryPrefix)
 
-			expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+			const callArgs2 = mockQdrantClientInstance.query.mock.calls[0][1]
+			expect(callArgs2).toMatchObject({
 				query: queryVector,
-				filter: {
-					must: [
-						{
-							key: "pathSegments.0",
-							match: { value: "src" },
-						},
-						{
-							key: "pathSegments.1",
-							match: { value: "components" },
-						},
-					],
-				},
 				score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 				limit: DEFAULT_MAX_SEARCH_RESULTS,
-				params: {
-					hnsw_ef: 128,
-					exact: false,
-				},
-				with_payload: {
-					include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
-				},
+				params: { hnsw_ef: 128, exact: false },
+				with_payload: { include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"] },
+			})
+			expect(callArgs2.filter).toEqual({
+				must: [
+					{ key: "pathSegments.0", match: { value: "src" } },
+					{ key: "pathSegments.1", match: { value: "components" } },
+				],
+				must_not: [{ key: "type", match: { value: "metadata" } }],
 			})
 
 			expect(results).toEqual(mockQdrantResults.points)
@@ -1337,9 +1331,9 @@ describe("QdrantVectorStore", () => {
 
 			await vectorStore.search(queryVector, undefined, customMinScore)
 
-			expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+			const callArgs3 = mockQdrantClientInstance.query.mock.calls[0][1]
+			expect(callArgs3).toMatchObject({
 				query: queryVector,
-				filter: undefined,
 				score_threshold: customMinScore,
 				limit: DEFAULT_MAX_SEARCH_RESULTS,
 				params: {
@@ -1350,6 +1344,9 @@ describe("QdrantVectorStore", () => {
 					include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 				},
 			})
+			expect(callArgs3.filter).toEqual({
+				must_not: [{ key: "type", match: { value: "metadata" } }],
+			})
 		})
 
 		it("should use custom maxResults when provided", async () => {
@@ -1361,9 +1358,9 @@ describe("QdrantVectorStore", () => {
 
 			await vectorStore.search(queryVector, undefined, undefined, customMaxResults)
 
-			expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+			const callArgs4 = mockQdrantClientInstance.query.mock.calls[0][1]
+			expect(callArgs4).toMatchObject({
 				query: queryVector,
-				filter: undefined,
 				score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 				limit: customMaxResults,
 				params: {
@@ -1374,6 +1371,9 @@ describe("QdrantVectorStore", () => {
 					include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 				},
 			})
+			expect(callArgs4.filter).toEqual({
+				must_not: [{ key: "type", match: { value: "metadata" } }],
+			})
 		})
 
 		it("should filter out results with invalid payloads", async () => {
@@ -1489,28 +1489,9 @@ describe("QdrantVectorStore", () => {
 
 			await vectorStore.search(queryVector, directoryPrefix)
 
-			expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+			const callArgs5 = mockQdrantClientInstance.query.mock.calls[0][1]
+			expect(callArgs5).toMatchObject({
 				query: queryVector,
-				filter: {
-					must: [
-						{
-							key: "pathSegments.0",
-							match: { value: "src" },
-						},
-						{
-							key: "pathSegments.1",
-							match: { value: "components" },
-						},
-						{
-							key: "pathSegments.2",
-							match: { value: "ui" },
-						},
-						{
-							key: "pathSegments.3",
-							match: { value: "forms" },
-						},
-					],
-				},
 				score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 				limit: DEFAULT_MAX_SEARCH_RESULTS,
 				params: {
@@ -1521,6 +1502,15 @@ describe("QdrantVectorStore", () => {
 					include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 				},
 			})
+			expect(callArgs5.filter).toEqual({
+				must: [
+					{ key: "pathSegments.0", match: { value: "src" } },
+					{ key: "pathSegments.1", match: { value: "components" } },
+					{ key: "pathSegments.2", match: { value: "ui" } },
+					{ key: "pathSegments.3", match: { value: "forms" } },
+				],
+				must_not: [{ key: "type", match: { value: "metadata" } }],
+			})
 		})
 
 		it("should handle error scenarios when qdrantClient.query fails", async () => {
@@ -1573,9 +1563,9 @@ describe("QdrantVectorStore", () => {
 
 				const results = await vectorStore.search(queryVector, directoryPrefix)
 
-				expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+				const callArgs7 = mockQdrantClientInstance.query.mock.calls[0][1]
+				expect(callArgs7).toMatchObject({
 					query: queryVector,
-					filter: undefined, // Should be undefined for current directory
 					score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 					limit: DEFAULT_MAX_SEARCH_RESULTS,
 					params: {
@@ -1586,6 +1576,9 @@ describe("QdrantVectorStore", () => {
 						include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 					},
 				})
+				expect(callArgs7.filter).toEqual({
+					must_not: [{ key: "type", match: { value: "metadata" } }],
+				})
 
 				expect(results).toEqual(mockQdrantResults.points)
 			})
@@ -1599,9 +1592,9 @@ describe("QdrantVectorStore", () => {
 
 				await vectorStore.search(queryVector, directoryPrefix)
 
-				expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+				const callArgs6 = mockQdrantClientInstance.query.mock.calls[0][1]
+				expect(callArgs6).toMatchObject({
 					query: queryVector,
-					filter: undefined, // Should be undefined for current directory
 					score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 					limit: DEFAULT_MAX_SEARCH_RESULTS,
 					params: {
@@ -1612,6 +1605,9 @@ describe("QdrantVectorStore", () => {
 						include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 					},
 				})
+				expect(callArgs6.filter).toEqual({
+					must_not: [{ key: "type", match: { value: "metadata" } }],
+				})
 			})
 
 			it("should not apply filter when directoryPrefix is empty string", async () => {
@@ -1623,9 +1619,9 @@ describe("QdrantVectorStore", () => {
 
 				await vectorStore.search(queryVector, directoryPrefix)
 
-				expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+				const callArgs8 = mockQdrantClientInstance.query.mock.calls[0][1]
+				expect(callArgs8).toMatchObject({
 					query: queryVector,
-					filter: undefined, // Should be undefined for empty string
 					score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 					limit: DEFAULT_MAX_SEARCH_RESULTS,
 					params: {
@@ -1636,6 +1632,9 @@ describe("QdrantVectorStore", () => {
 						include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 					},
 				})
+				expect(callArgs8.filter).toEqual({
+					must_not: [{ key: "type", match: { value: "metadata" } }],
+				})
 			})
 
 			it("should not apply filter when directoryPrefix is '.\\' (Windows style)", async () => {
@@ -1647,9 +1646,9 @@ describe("QdrantVectorStore", () => {
 
 				await vectorStore.search(queryVector, directoryPrefix)
 
-				expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+				const callArgs9 = mockQdrantClientInstance.query.mock.calls[0][1]
+				expect(callArgs9).toMatchObject({
 					query: queryVector,
-					filter: undefined, // Should be undefined for Windows current directory
 					score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 					limit: DEFAULT_MAX_SEARCH_RESULTS,
 					params: {
@@ -1660,6 +1659,9 @@ describe("QdrantVectorStore", () => {
 						include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 					},
 				})
+				expect(callArgs9.filter).toEqual({
+					must_not: [{ key: "type", match: { value: "metadata" } }],
+				})
 			})
 
 			it("should not apply filter when directoryPrefix has trailing slashes", async () => {
@@ -1671,9 +1673,9 @@ describe("QdrantVectorStore", () => {
 
 				await vectorStore.search(queryVector, directoryPrefix)
 
-				expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+				const callArgs10 = mockQdrantClientInstance.query.mock.calls[0][1]
+				expect(callArgs10).toMatchObject({
 					query: queryVector,
-					filter: undefined, // Should be undefined after normalizing trailing slashes
 					score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 					limit: DEFAULT_MAX_SEARCH_RESULTS,
 					params: {
@@ -1684,6 +1686,9 @@ describe("QdrantVectorStore", () => {
 						include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 					},
 				})
+				expect(callArgs10.filter).toEqual({
+					must_not: [{ key: "type", match: { value: "metadata" } }],
+				})
 			})
 
 			it("should still apply filter for relative paths like './src'", async () => {
@@ -1695,16 +1700,9 @@ describe("QdrantVectorStore", () => {
 
 				await vectorStore.search(queryVector, directoryPrefix)
 
-				expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+				const callArgs11 = mockQdrantClientInstance.query.mock.calls[0][1]
+				expect(callArgs11).toMatchObject({
 					query: queryVector,
-					filter: {
-						must: [
-							{
-								key: "pathSegments.0",
-								match: { value: "src" },
-							},
-						],
-					}, // Should normalize "./src" to "src"
 					score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 					limit: DEFAULT_MAX_SEARCH_RESULTS,
 					params: {
@@ -1715,6 +1713,15 @@ describe("QdrantVectorStore", () => {
 						include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 					},
 				})
+				expect(callArgs11.filter).toEqual({
+					must: [
+						{
+							key: "pathSegments.0",
+							match: { value: "src" },
+						},
+					],
+					must_not: [{ key: "type", match: { value: "metadata" } }],
+				}) // Should normalize "./src" to "src"
 			})
 
 			it("should still apply filter for regular directory paths", async () => {
@@ -1726,16 +1733,9 @@ describe("QdrantVectorStore", () => {
 
 				await vectorStore.search(queryVector, directoryPrefix)
 
-				expect(mockQdrantClientInstance.query).toHaveBeenCalledWith(expectedCollectionName, {
+				const callArgs12 = mockQdrantClientInstance.query.mock.calls[0][1]
+				expect(callArgs12).toMatchObject({
 					query: queryVector,
-					filter: {
-						must: [
-							{
-								key: "pathSegments.0",
-								match: { value: "src" },
-							},
-						],
-					}, // Should still create filter for regular paths
 					score_threshold: DEFAULT_SEARCH_MIN_SCORE,
 					limit: DEFAULT_MAX_SEARCH_RESULTS,
 					params: {
@@ -1746,6 +1746,15 @@ describe("QdrantVectorStore", () => {
 						include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
 					},
 				})
+				expect(callArgs12.filter).toEqual({
+					must: [
+						{
+							key: "pathSegments.0",
+							match: { value: "src" },
+						},
+					],
+					must_not: [{ key: "type", match: { value: "metadata" } }],
+				}) // Should still create filter for regular paths
 			})
 		})
 	})

+ 120 - 4
src/services/code-index/vector-store/qdrant-client.ts

@@ -1,10 +1,10 @@
 import { QdrantClient, Schemas } from "@qdrant/js-client-rest"
 import { createHash } from "crypto"
 import * as path from "path"
-import { getWorkspacePath } from "../../../utils/path"
+import { v5 as uuidv5 } from "uuid"
 import { IVectorStore } from "../interfaces/vector-store"
 import { Payload, VectorStoreSearchResult } from "../interfaces"
-import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE } from "../constants"
+import { DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE, QDRANT_CODE_BLOCK_NAMESPACE } from "../constants"
 import { t } from "../../../i18n"
 
 /**
@@ -386,7 +386,12 @@ export class QdrantVectorStore implements IVectorStore {
 		maxResults?: number,
 	): Promise<VectorStoreSearchResult[]> {
 		try {
-			let filter = undefined
+			let filter:
+				| {
+						must: Array<{ key: string; match: { value: string } }>
+						must_not?: Array<{ key: string; match: { value: string } }>
+				  }
+				| undefined = undefined
 
 			if (directoryPrefix) {
 				// Check if the path represents current directory
@@ -412,9 +417,18 @@ export class QdrantVectorStore implements IVectorStore {
 				}
 			}
 
+			// Always exclude metadata points at query-time to avoid wasting top-k
+			const metadataExclusion = {
+				must_not: [{ key: "type", match: { value: "metadata" } }],
+			}
+
+			const mergedFilter = filter
+				? { ...filter, must_not: [...(filter.must_not || []), ...metadataExclusion.must_not] }
+				: metadataExclusion
+
 			const searchRequest = {
 				query: queryVector,
-				filter,
+				filter: mergedFilter,
 				score_threshold: minScore ?? DEFAULT_SEARCH_MIN_SCORE,
 				limit: maxResults ?? DEFAULT_MAX_SEARCH_RESULTS,
 				params: {
@@ -548,4 +562,106 @@ export class QdrantVectorStore implements IVectorStore {
 		const collectionInfo = await this.getCollectionInfo()
 		return collectionInfo !== null
 	}
+
+	/**
+	 * Checks if the collection exists and has indexed points
+	 * @returns Promise resolving to boolean indicating if the collection exists and has points
+	 */
+	async hasIndexedData(): Promise<boolean> {
+		try {
+			const collectionInfo = await this.getCollectionInfo()
+			if (!collectionInfo) {
+				return false
+			}
+			// Check if the collection has any points indexed
+			const pointsCount = collectionInfo.points_count ?? 0
+			if (pointsCount === 0) {
+				return false
+			}
+
+			// Check if the indexing completion marker exists
+			// Use a deterministic UUID generated from a constant string
+			const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE)
+			const metadataPoints = await this.client.retrieve(this.collectionName, {
+				ids: [metadataId],
+			})
+
+			// If marker exists, use it to determine completion status
+			if (metadataPoints.length > 0) {
+				return metadataPoints[0].payload?.indexing_complete === true
+			}
+
+			// Backward compatibility: No marker exists (old index or pre-marker version)
+			// Fall back to old logic - assume complete if collection has points
+			console.log(
+				"[QdrantVectorStore] No indexing metadata marker found. Using backward compatibility mode (checking points_count > 0).",
+			)
+			return pointsCount > 0
+		} catch (error) {
+			console.warn("[QdrantVectorStore] Failed to check if collection has data:", error)
+			return false
+		}
+	}
+
+	/**
+	 * Marks the indexing process as complete by storing metadata
+	 * Should be called after a successful full workspace scan or incremental scan
+	 */
+	async markIndexingComplete(): Promise<void> {
+		try {
+			// Create a metadata point with a deterministic UUID to mark indexing as complete
+			// Use uuidv5 to generate a consistent UUID from a constant string
+			const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE)
+
+			await this.client.upsert(this.collectionName, {
+				points: [
+					{
+						id: metadataId,
+						vector: new Array(this.vectorSize).fill(0),
+						payload: {
+							type: "metadata",
+							indexing_complete: true,
+							completed_at: Date.now(),
+						},
+					},
+				],
+				wait: true,
+			})
+			console.log("[QdrantVectorStore] Marked indexing as complete")
+		} catch (error) {
+			console.error("[QdrantVectorStore] Failed to mark indexing as complete:", error)
+			throw error
+		}
+	}
+
+	/**
+	 * Marks the indexing process as incomplete by storing metadata
+	 * Should be called at the start of indexing to indicate work in progress
+	 */
+	async markIndexingIncomplete(): Promise<void> {
+		try {
+			// Create a metadata point with a deterministic UUID to mark indexing as incomplete
+			// Use uuidv5 to generate a consistent UUID from a constant string
+			const metadataId = uuidv5("__indexing_metadata__", QDRANT_CODE_BLOCK_NAMESPACE)
+
+			await this.client.upsert(this.collectionName, {
+				points: [
+					{
+						id: metadataId,
+						vector: new Array(this.vectorSize).fill(0),
+						payload: {
+							type: "metadata",
+							indexing_complete: false,
+							started_at: Date.now(),
+						},
+					},
+				],
+				wait: true,
+			})
+			console.log("[QdrantVectorStore] Marked indexing as incomplete (in progress)")
+		} catch (error) {
+			console.error("[QdrantVectorStore] Failed to mark indexing as incomplete:", error)
+			throw error
+		}
+	}
 }