Преглед на файлове

fix: resolve workspace path inconsistency in code indexing for multi-workspace scenarios (#4397) (#5403)

Co-authored-by: Daniel Riccio <[email protected]>
Hannes Rudolph преди 5 месеца
родител
ревизия
7d31966978

+ 32 - 19
src/services/code-index/__tests__/manager.spec.ts

@@ -1,18 +1,31 @@
-import { CodeIndexManager } from "../manager"
+// Mock vscode module
+vi.mock("vscode", () => ({
+	workspace: {
+		workspaceFolders: [
+			{
+				uri: { fsPath: "/test/workspace" },
+				name: "test",
+				index: 0,
+			},
+		],
+	},
+}))
 
 // Mock only the essential dependencies
-vitest.mock("../../../utils/path", () => ({
-	getWorkspacePath: vitest.fn(() => "/test/workspace"),
+vi.mock("../../../utils/path", () => ({
+	getWorkspacePath: vi.fn(() => "/test/workspace"),
 }))
 
-vitest.mock("../state-manager", () => ({
-	CodeIndexStateManager: vitest.fn().mockImplementation(() => ({
-		onProgressUpdate: vitest.fn(),
-		getCurrentStatus: vitest.fn(),
-		dispose: vitest.fn(),
+vi.mock("../state-manager", () => ({
+	CodeIndexStateManager: vi.fn().mockImplementation(() => ({
+		onProgressUpdate: vi.fn(),
+		getCurrentStatus: vi.fn(),
+		dispose: vi.fn(),
 	})),
 }))
 
+import { CodeIndexManager } from "../manager"
+
 describe("CodeIndexManager - handleSettingsChange regression", () => {
 	let mockContext: any
 	let manager: CodeIndexManager
@@ -27,7 +40,7 @@ describe("CodeIndexManager - handleSettingsChange regression", () => {
 			globalState: {} as any,
 			extensionUri: {} as any,
 			extensionPath: "/test/extension",
-			asAbsolutePath: vitest.fn(),
+			asAbsolutePath: vi.fn(),
 			storageUri: {} as any,
 			storagePath: "/test/storage",
 			globalStorageUri: {} as any,
@@ -58,13 +71,13 @@ describe("CodeIndexManager - handleSettingsChange regression", () => {
 
 			// Mock a minimal config manager that simulates first-time configuration
 			const mockConfigManager = {
-				loadConfiguration: vitest.fn().mockResolvedValue({ requiresRestart: true }),
+				loadConfiguration: vi.fn().mockResolvedValue({ requiresRestart: true }),
 			}
 			;(manager as any)._configManager = mockConfigManager
 
 			// Mock the feature state to simulate valid configuration that would normally trigger restart
-			vitest.spyOn(manager, "isFeatureEnabled", "get").mockReturnValue(true)
-			vitest.spyOn(manager, "isFeatureConfigured", "get").mockReturnValue(true)
+			vi.spyOn(manager, "isFeatureEnabled", "get").mockReturnValue(true)
+			vi.spyOn(manager, "isFeatureConfigured", "get").mockReturnValue(true)
 
 			// The key test: this should NOT throw "CodeIndexManager not initialized" error
 			await expect(manager.handleSettingsChange()).resolves.not.toThrow()
@@ -76,10 +89,10 @@ describe("CodeIndexManager - handleSettingsChange regression", () => {
 		it("should work normally when manager is initialized", async () => {
 			// Mock a complete config manager with all required properties
 			const mockConfigManager = {
-				loadConfiguration: vitest.fn().mockResolvedValue({ requiresRestart: true }),
+				loadConfiguration: vi.fn().mockResolvedValue({ requiresRestart: true }),
 				isFeatureConfigured: true,
 				isFeatureEnabled: true,
-				getConfig: vitest.fn().mockReturnValue({
+				getConfig: vi.fn().mockReturnValue({
 					isEnabled: true,
 					isConfigured: true,
 					embedderProvider: "openai",
@@ -93,7 +106,7 @@ describe("CodeIndexManager - handleSettingsChange regression", () => {
 			;(manager as any)._configManager = mockConfigManager
 
 			// Simulate an initialized manager by setting the required properties
-			;(manager as any)._orchestrator = { stopWatcher: vitest.fn() }
+			;(manager as any)._orchestrator = { stopWatcher: vi.fn() }
 			;(manager as any)._searchService = {}
 			;(manager as any)._cacheManager = {}
 
@@ -101,12 +114,12 @@ describe("CodeIndexManager - handleSettingsChange regression", () => {
 			expect(manager.isInitialized).toBe(true)
 
 			// Mock the methods that would be called during restart
-			const recreateServicesSpy = vitest.spyOn(manager as any, "_recreateServices").mockImplementation(() => {})
-			const startIndexingSpy = vitest.spyOn(manager, "startIndexing").mockResolvedValue()
+			const recreateServicesSpy = vi.spyOn(manager as any, "_recreateServices").mockImplementation(() => {})
+			const startIndexingSpy = vi.spyOn(manager, "startIndexing").mockResolvedValue()
 
 			// Mock the feature state
-			vitest.spyOn(manager, "isFeatureEnabled", "get").mockReturnValue(true)
-			vitest.spyOn(manager, "isFeatureConfigured", "get").mockReturnValue(true)
+			vi.spyOn(manager, "isFeatureEnabled", "get").mockReturnValue(true)
+			vi.spyOn(manager, "isFeatureConfigured", "get").mockReturnValue(true)
 
 			await manager.handleSettingsChange()
 

+ 8 - 3
src/services/code-index/manager.ts

@@ -26,12 +26,17 @@ export class CodeIndexManager {
 	private _cacheManager: CacheManager | undefined
 
 	public static getInstance(context: vscode.ExtensionContext): CodeIndexManager | undefined {
-		const workspacePath = getWorkspacePath() // Assumes single workspace for now
-
-		if (!workspacePath) {
+		// Use first workspace folder consistently
+		const workspaceFolders = vscode.workspace.workspaceFolders
+		if (!workspaceFolders || workspaceFolders.length === 0) {
 			return undefined
 		}
 
+		// Always use the first workspace folder for consistency across all indexing operations.
+		// This ensures that the same workspace context is used throughout the indexing pipeline,
+		// preventing path resolution errors in multi-workspace scenarios.
+		const workspacePath = workspaceFolders[0].uri.fsPath
+
 		if (!CodeIndexManager.instances.has(workspacePath)) {
 			CodeIndexManager.instances.set(workspacePath, new CodeIndexManager(workspacePath, context))
 		}

+ 3 - 3
src/services/code-index/processors/file-watcher.ts

@@ -464,7 +464,7 @@ export class FileWatcher implements IFileWatcher {
 			}
 
 			// Check if file should be ignored
-			const relativeFilePath = generateRelativeFilePath(filePath)
+			const relativeFilePath = generateRelativeFilePath(filePath, this.workspacePath)
 			if (
 				!this.ignoreController.validateAccess(filePath) ||
 				(this.ignoreInstance && this.ignoreInstance.ignores(relativeFilePath))
@@ -512,7 +512,7 @@ export class FileWatcher implements IFileWatcher {
 				const { embeddings } = await this.embedder.createEmbeddings(texts)
 
 				pointsToUpsert = blocks.map((block, index) => {
-					const normalizedAbsolutePath = generateNormalizedAbsolutePath(block.file_path)
+					const normalizedAbsolutePath = generateNormalizedAbsolutePath(block.file_path, this.workspacePath)
 					const stableName = `${normalizedAbsolutePath}:${block.start_line}`
 					const pointId = uuidv5(stableName, QDRANT_CODE_BLOCK_NAMESPACE)
 
@@ -520,7 +520,7 @@ export class FileWatcher implements IFileWatcher {
 						id: pointId,
 						vector: embeddings[index],
 						payload: {
-							filePath: generateRelativeFilePath(normalizedAbsolutePath),
+							filePath: generateRelativeFilePath(normalizedAbsolutePath, this.workspacePath),
 							codeChunk: block.content,
 							startLine: block.start_line,
 							endLine: block.end_line,

+ 34 - 14
src/services/code-index/processors/scanner.ts

@@ -4,6 +4,7 @@ import { RooIgnoreController } from "../../../core/ignore/RooIgnoreController"
 import { stat } from "fs/promises"
 import * as path from "path"
 import { generateNormalizedAbsolutePath, generateRelativeFilePath } from "../shared/get-relative-path"
+import { getWorkspacePathForContext } from "../../../utils/path"
 import { scannerExtensions } from "../shared/supported-extensions"
 import * as vscode from "vscode"
 import { CodeBlock, ICodeParser, IEmbedder, IVectorStore, IDirectoryScanner } from "../interfaces"
@@ -49,6 +50,9 @@ export class DirectoryScanner implements IDirectoryScanner {
 		onFileParsed?: (fileBlockCount: number) => void,
 	): Promise<{ codeBlocks: CodeBlock[]; stats: { processed: number; skipped: number }; totalBlockCount: number }> {
 		const directoryPath = directory
+		// Capture workspace context at scan start
+		const scanWorkspace = getWorkspacePathForContext(directoryPath)
+
 		// Get all files recursively (handles .gitignore automatically)
 		const [allPaths, _] = await listFiles(directoryPath, true, MAX_LIST_FILES_LIMIT)
 
@@ -66,7 +70,7 @@ export class DirectoryScanner implements IDirectoryScanner {
 		// Filter by supported extensions, ignore patterns, and excluded directories
 		const supportedPaths = allowedPaths.filter((filePath) => {
 			const ext = path.extname(filePath).toLowerCase()
-			const relativeFilePath = generateRelativeFilePath(filePath)
+			const relativeFilePath = generateRelativeFilePath(filePath, scanWorkspace)
 
 			// Check if file is in an ignored directory using the shared helper
 			if (isPathInIgnoredDirectory(filePath)) {
@@ -169,6 +173,7 @@ export class DirectoryScanner implements IDirectoryScanner {
 												batchBlocks,
 												batchTexts,
 												batchFileInfos,
+												scanWorkspace,
 												onError,
 												onBlocksIndexed,
 											),
@@ -185,12 +190,15 @@ export class DirectoryScanner implements IDirectoryScanner {
 						await this.cacheManager.updateHash(filePath, currentFileHash)
 					}
 				} catch (error) {
-					console.error(`Error processing file ${filePath}:`, error)
+					console.error(`Error processing file ${filePath} in workspace ${scanWorkspace}:`, error)
 					if (onError) {
 						onError(
 							error instanceof Error
-								? error
-								: new Error(t("embeddings:scanner.unknownErrorProcessingFile", { filePath })),
+								? new Error(`${error.message} (Workspace: ${scanWorkspace}, File: ${filePath})`)
+								: new Error(
+										t("embeddings:scanner.unknownErrorProcessingFile", { filePath }) +
+											` (Workspace: ${scanWorkspace})`,
+									),
 						)
 					}
 				}
@@ -214,7 +222,7 @@ export class DirectoryScanner implements IDirectoryScanner {
 
 				// Queue final batch processing
 				const batchPromise = batchLimiter(() =>
-					this.processBatch(batchBlocks, batchTexts, batchFileInfos, onError, onBlocksIndexed),
+					this.processBatch(batchBlocks, batchTexts, batchFileInfos, scanWorkspace, onError, onBlocksIndexed),
 				)
 				activeBatchPromises.push(batchPromise)
 			} finally {
@@ -235,15 +243,20 @@ export class DirectoryScanner implements IDirectoryScanner {
 						await this.qdrantClient.deletePointsByFilePath(cachedFilePath)
 						await this.cacheManager.deleteHash(cachedFilePath)
 					} catch (error) {
-						console.error(`[DirectoryScanner] Failed to delete points for ${cachedFilePath}:`, error)
+						console.error(
+							`[DirectoryScanner] Failed to delete points for ${cachedFilePath} in workspace ${scanWorkspace}:`,
+							error,
+						)
 						if (onError) {
 							onError(
 								error instanceof Error
-									? error
+									? new Error(
+											`${error.message} (Workspace: ${scanWorkspace}, File: ${cachedFilePath})`,
+										)
 									: new Error(
 											t("embeddings:scanner.unknownErrorDeletingPoints", {
 												filePath: cachedFilePath,
-											}),
+											}) + ` (Workspace: ${scanWorkspace})`,
 										),
 							)
 						}
@@ -267,6 +280,7 @@ export class DirectoryScanner implements IDirectoryScanner {
 		batchBlocks: CodeBlock[],
 		batchTexts: string[],
 		batchFileInfos: { filePath: string; fileHash: string; isNew: boolean }[],
+		scanWorkspace: string,
 		onError?: (error: Error) => void,
 		onBlocksIndexed?: (indexedCount: number) => void,
 	): Promise<void> {
@@ -292,11 +306,14 @@ export class DirectoryScanner implements IDirectoryScanner {
 						await this.qdrantClient.deletePointsByMultipleFilePaths(uniqueFilePaths)
 					} catch (deleteError) {
 						console.error(
-							`[DirectoryScanner] Failed to delete points for ${uniqueFilePaths.length} files before upsert:`,
+							`[DirectoryScanner] Failed to delete points for ${uniqueFilePaths.length} files before upsert in workspace ${scanWorkspace}:`,
 							deleteError,
 						)
-						// Re-throw the error to stop processing this batch attempt
-						throw deleteError
+						// Re-throw the error with workspace context
+						throw new Error(
+							`Failed to delete points for ${uniqueFilePaths.length} files. Workspace: ${scanWorkspace}. ${deleteError instanceof Error ? deleteError.message : String(deleteError)}`,
+							{ cause: deleteError },
+						)
 					}
 				}
 				// --- End Deletion Step ---
@@ -306,7 +323,7 @@ export class DirectoryScanner implements IDirectoryScanner {
 
 				// Prepare points for Qdrant
 				const points = batchBlocks.map((block, index) => {
-					const normalizedAbsolutePath = generateNormalizedAbsolutePath(block.file_path)
+					const normalizedAbsolutePath = generateNormalizedAbsolutePath(block.file_path, scanWorkspace)
 
 					// Use segmentHash for unique ID generation to handle multiple segments from same line
 					const pointId = uuidv5(block.segmentHash, QDRANT_CODE_BLOCK_NAMESPACE)
@@ -315,7 +332,7 @@ export class DirectoryScanner implements IDirectoryScanner {
 						id: pointId,
 						vector: embeddings[index],
 						payload: {
-							filePath: generateRelativeFilePath(normalizedAbsolutePath),
+							filePath: generateRelativeFilePath(normalizedAbsolutePath, scanWorkspace),
 							codeChunk: block.content,
 							startLine: block.start_line,
 							endLine: block.end_line,
@@ -335,7 +352,10 @@ export class DirectoryScanner implements IDirectoryScanner {
 				success = true
 			} catch (error) {
 				lastError = error as Error
-				console.error(`[DirectoryScanner] Error processing batch (attempt ${attempts}):`, error)
+				console.error(
+					`[DirectoryScanner] Error processing batch (attempt ${attempts}) in workspace ${scanWorkspace}:`,
+					error,
+				)
 
 				if (attempts < MAX_BATCH_RETRIES) {
 					const delay = INITIAL_RETRY_DELAY_MS * Math.pow(2, attempts - 1)

+ 64 - 0
src/services/code-index/shared/__tests__/get-relative-path.spec.ts

@@ -0,0 +1,64 @@
+import { describe, it, expect } from "vitest"
+import path from "path"
+import { generateNormalizedAbsolutePath, generateRelativeFilePath } from "../get-relative-path"
+
+describe("get-relative-path", () => {
+	describe("generateNormalizedAbsolutePath", () => {
+		it("should use provided workspace root", () => {
+			const filePath = "src/file.ts"
+			const workspaceRoot = path.join(path.sep, "custom", "workspace")
+			const result = generateNormalizedAbsolutePath(filePath, workspaceRoot)
+			// On Windows, path.resolve adds the drive letter, so we need to use path.resolve for the expected value
+			expect(result).toBe(path.resolve(workspaceRoot, filePath))
+		})
+
+		it("should handle absolute paths", () => {
+			const filePath = path.join(path.sep, "absolute", "path", "file.ts")
+			const workspaceRoot = path.join(path.sep, "custom", "workspace")
+			const result = generateNormalizedAbsolutePath(filePath, workspaceRoot)
+			// When an absolute path is provided, it should be resolved to include drive letter on Windows
+			expect(result).toBe(path.resolve(filePath))
+		})
+
+		it("should normalize paths with . and .. segments", () => {
+			const filePath = "./src/../src/file.ts"
+			const workspaceRoot = path.join(path.sep, "custom", "workspace")
+			const result = generateNormalizedAbsolutePath(filePath, workspaceRoot)
+			// Use path.resolve to get the expected normalized absolute path
+			expect(result).toBe(path.resolve(workspaceRoot, "src", "file.ts"))
+		})
+	})
+
+	describe("generateRelativeFilePath", () => {
+		it("should use provided workspace root", () => {
+			const workspaceRoot = path.join(path.sep, "custom", "workspace")
+			const absolutePath = path.join(workspaceRoot, "src", "file.ts")
+			const result = generateRelativeFilePath(absolutePath, workspaceRoot)
+			expect(result).toBe(path.join("src", "file.ts"))
+		})
+
+		it("should handle paths outside workspace", () => {
+			const absolutePath = path.join(path.sep, "outside", "workspace", "file.ts")
+			const workspaceRoot = path.join(path.sep, "custom", "workspace")
+			const result = generateRelativeFilePath(absolutePath, workspaceRoot)
+			// The result will have .. segments to navigate outside
+			expect(result).toContain("..")
+		})
+
+		it("should handle same path as workspace", () => {
+			const workspaceRoot = path.join(path.sep, "custom", "workspace")
+			const absolutePath = workspaceRoot
+			const result = generateRelativeFilePath(absolutePath, workspaceRoot)
+			expect(result).toBe(".")
+		})
+
+		it("should handle multi-workspace scenarios", () => {
+			// Simulate the error scenario from the issue
+			const workspaceRoot = path.join(path.sep, "Users", "test", "project")
+			const absolutePath = path.join(path.sep, "Users", "test", "admin", ".prettierrc.json")
+			const result = generateRelativeFilePath(absolutePath, workspaceRoot)
+			// Should generate a valid relative path, not throw an error
+			expect(result).toBe(path.join("..", "admin", ".prettierrc.json"))
+		})
+	})
+})

+ 4 - 7
src/services/code-index/shared/get-relative-path.ts

@@ -1,16 +1,14 @@
 import path from "path"
-import { getWorkspacePath } from "../../../utils/path"
 
 /**
  * Generates a normalized absolute path from a given file path and workspace root.
  * Handles path resolution and normalization to ensure consistent absolute paths.
  *
  * @param filePath - The file path to normalize (can be relative or absolute)
- * @param workspaceRoot - The root directory of the workspace
+ * @param workspaceRoot - The root directory of the workspace (required)
  * @returns The normalized absolute path
  */
-export function generateNormalizedAbsolutePath(filePath: string): string {
-	const workspaceRoot = getWorkspacePath()
+export function generateNormalizedAbsolutePath(filePath: string, workspaceRoot: string): string {
 	// Resolve the path to make it absolute if it's relative
 	const resolvedPath = path.resolve(workspaceRoot, filePath)
 	// Normalize to handle any . or .. segments and duplicate slashes
@@ -22,11 +20,10 @@ export function generateNormalizedAbsolutePath(filePath: string): string {
  * Ensures consistent relative path generation across different platforms.
  *
  * @param normalizedAbsolutePath - The normalized absolute path to convert
- * @param workspaceRoot - The root directory of the workspace
+ * @param workspaceRoot - The root directory of the workspace (required)
  * @returns The relative path from workspaceRoot to the file
  */
-export function generateRelativeFilePath(normalizedAbsolutePath: string): string {
-	const workspaceRoot = getWorkspacePath()
+export function generateRelativeFilePath(normalizedAbsolutePath: string, workspaceRoot: string): string {
 	// Generate the relative path
 	const relativePath = path.relative(workspaceRoot, normalizedAbsolutePath)
 	// Normalize to ensure consistent path separators

+ 15 - 0
src/utils/path.ts

@@ -115,3 +115,18 @@ export const getWorkspacePath = (defaultCwdPath = "") => {
 	}
 	return cwdPath
 }
+
+export const getWorkspacePathForContext = (contextPath?: string): string => {
+	// If context path provided, find its workspace
+	if (contextPath) {
+		const workspaceFolder = vscode.workspace.getWorkspaceFolder(vscode.Uri.file(contextPath))
+		if (workspaceFolder) {
+			return workspaceFolder.uri.fsPath
+		}
+		// Debug logging when falling back
+		console.debug(`[CodeIndex] No workspace found for context path: ${contextPath}, falling back to default`)
+	}
+
+	// Fall back to current behavior
+	return getWorkspacePath()
+}