Browse Source

fix: resolve vector dimension mismatch error when switching embedding models (#5616) (#5617)

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
Co-authored-by: Daniel Riccio <[email protected]>
Co-authored-by: Daniel <[email protected]>
Roomote Bot 7 months ago
parent
commit
f71275ef55

+ 10 - 0
src/core/webview/ClineProvider.ts

@@ -1542,6 +1542,10 @@ export class ClineProvider
 				codebaseIndexEmbedderProvider: codebaseIndexConfig?.codebaseIndexEmbedderProvider ?? "openai",
 				codebaseIndexEmbedderBaseUrl: codebaseIndexConfig?.codebaseIndexEmbedderBaseUrl ?? "",
 				codebaseIndexEmbedderModelId: codebaseIndexConfig?.codebaseIndexEmbedderModelId ?? "",
+				codebaseIndexEmbedderModelDimension: codebaseIndexConfig?.codebaseIndexEmbedderModelDimension ?? 1536,
+				codebaseIndexOpenAiCompatibleBaseUrl: codebaseIndexConfig?.codebaseIndexOpenAiCompatibleBaseUrl,
+				codebaseIndexSearchMaxResults: codebaseIndexConfig?.codebaseIndexSearchMaxResults,
+				codebaseIndexSearchMinScore: codebaseIndexConfig?.codebaseIndexSearchMinScore,
 			},
 			mdmCompliant: this.checkMdmCompliance(),
 			profileThresholds: profileThresholds ?? {},
@@ -1703,6 +1707,12 @@ export class ClineProvider
 					stateValues.codebaseIndexConfig?.codebaseIndexEmbedderProvider ?? "openai",
 				codebaseIndexEmbedderBaseUrl: stateValues.codebaseIndexConfig?.codebaseIndexEmbedderBaseUrl ?? "",
 				codebaseIndexEmbedderModelId: stateValues.codebaseIndexConfig?.codebaseIndexEmbedderModelId ?? "",
+				codebaseIndexEmbedderModelDimension:
+					stateValues.codebaseIndexConfig?.codebaseIndexEmbedderModelDimension,
+				codebaseIndexOpenAiCompatibleBaseUrl:
+					stateValues.codebaseIndexConfig?.codebaseIndexOpenAiCompatibleBaseUrl,
+				codebaseIndexSearchMaxResults: stateValues.codebaseIndexConfig?.codebaseIndexSearchMaxResults,
+				codebaseIndexSearchMinScore: stateValues.codebaseIndexConfig?.codebaseIndexSearchMinScore,
 			},
 			profileThresholds: stateValues.profileThresholds ?? {},
 		}

+ 201 - 20
src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts

@@ -10,7 +10,16 @@ vitest.mock("@qdrant/js-client-rest")
 vitest.mock("crypto")
 vitest.mock("../../../../utils/path")
 vitest.mock("../../../../i18n", () => ({
-	t: (key: string) => key, // Just return the key for testing
+	t: (key: string, params?: any) => {
+		// Mock translation function that includes parameters for testing
+		if (key === "embeddings:vectorStore.vectorDimensionMismatch" && params?.errorMessage) {
+			return `Failed to update vector index for new model. Please try clearing the index and starting again. Details: ${params.errorMessage}`
+		}
+		if (key === "embeddings:vectorStore.qdrantConnectionFailed" && params?.qdrantUrl && params?.errorMessage) {
+			return `Failed to connect to Qdrant vector database. Please ensure Qdrant is running and accessible at ${params.qdrantUrl}. Error: ${params.errorMessage}`
+		}
+		return key // Just return the key for other cases
+	},
 }))
 vitest.mock("path", () => ({
 	...vitest.importActual("path"),
@@ -564,16 +573,22 @@ describe("QdrantVectorStore", () => {
 		})
 		it("should recreate collection if it exists but vectorSize mismatches and return true", async () => {
 			const differentVectorSize = 768
-			// Mock getCollection to return existing collection info with different vector size
-			mockQdrantClientInstance.getCollection.mockResolvedValue({
-				config: {
-					params: {
-						vectors: {
-							size: differentVectorSize, // Mismatching vector size
+			// Mock getCollection to return existing collection info with different vector size first,
+			// then return 404 to confirm deletion
+			mockQdrantClientInstance.getCollection
+				.mockResolvedValueOnce({
+					config: {
+						params: {
+							vectors: {
+								size: differentVectorSize, // Mismatching vector size
+							},
 						},
 					},
-				},
-			} as any)
+				} as any)
+				.mockRejectedValueOnce({
+					response: { status: 404 },
+					message: "Not found",
+				})
 			mockQdrantClientInstance.deleteCollection.mockResolvedValue(true as any)
 			mockQdrantClientInstance.createCollection.mockResolvedValue(true as any)
 			mockQdrantClientInstance.createPayloadIndex.mockResolvedValue({} as any)
@@ -582,7 +597,7 @@ describe("QdrantVectorStore", () => {
 			const result = await vectorStore.initialize()
 
 			expect(result).toBe(true)
-			expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(1)
+			expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(2) // Once to check, once to verify deletion
 			expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledWith(expectedCollectionName)
 			expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledTimes(1)
 			expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledWith(expectedCollectionName)
@@ -703,7 +718,7 @@ describe("QdrantVectorStore", () => {
 			}
 
 			expect(caughtError).toBeDefined()
-			expect(caughtError.message).toContain("embeddings:vectorStore.vectorDimensionMismatch")
+			expect(caughtError.message).toContain("Failed to update vector index for new model")
 			expect(caughtError.cause).toBe(deleteError)
 
 			expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(1)
@@ -719,15 +734,21 @@ describe("QdrantVectorStore", () => {
 
 		it("should throw vectorDimensionMismatch error when createCollection fails during recreation", async () => {
 			const differentVectorSize = 768
-			mockQdrantClientInstance.getCollection.mockResolvedValue({
-				config: {
-					params: {
-						vectors: {
-							size: differentVectorSize,
+			mockQdrantClientInstance.getCollection
+				.mockResolvedValueOnce({
+					config: {
+						params: {
+							vectors: {
+								size: differentVectorSize,
+							},
 						},
 					},
-				},
-			} as any)
+				} as any)
+				// Second call should return 404 to confirm deletion
+				.mockRejectedValueOnce({
+					response: { status: 404 },
+					message: "Not found",
+				})
 
 			// Delete succeeds but create fails
 			mockQdrantClientInstance.deleteCollection.mockResolvedValue(true as any)
@@ -745,10 +766,10 @@ describe("QdrantVectorStore", () => {
 			}
 
 			expect(caughtError).toBeDefined()
-			expect(caughtError.message).toContain("embeddings:vectorStore.vectorDimensionMismatch")
+			expect(caughtError.message).toContain("Failed to update vector index for new model")
 			expect(caughtError.cause).toBe(createError)
 
-			expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(1)
+			expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(2)
 			expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledTimes(1)
 			expect(mockQdrantClientInstance.createCollection).toHaveBeenCalledTimes(1)
 			expect(mockQdrantClientInstance.createPayloadIndex).not.toHaveBeenCalled()
@@ -758,6 +779,166 @@ describe("QdrantVectorStore", () => {
 			;(console.error as any).mockRestore()
 			;(console.warn as any).mockRestore()
 		})
+
+		it("should verify collection deletion before proceeding with recreation", async () => {
+			const differentVectorSize = 768
+			mockQdrantClientInstance.getCollection
+				.mockResolvedValueOnce({
+					config: {
+						params: {
+							vectors: {
+								size: differentVectorSize,
+							},
+						},
+					},
+				} as any)
+				// Second call should return 404 to confirm deletion
+				.mockRejectedValueOnce({
+					response: { status: 404 },
+					message: "Not found",
+				})
+
+			mockQdrantClientInstance.deleteCollection.mockResolvedValue(true as any)
+			mockQdrantClientInstance.createCollection.mockResolvedValue(true as any)
+			mockQdrantClientInstance.createPayloadIndex.mockResolvedValue({} as any)
+			vitest.spyOn(console, "warn").mockImplementation(() => {})
+
+			const result = await vectorStore.initialize()
+
+			expect(result).toBe(true)
+			// Should call getCollection twice: once to check existing, once to verify deletion
+			expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(2)
+			expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledTimes(1)
+			expect(mockQdrantClientInstance.createCollection).toHaveBeenCalledTimes(1)
+			expect(mockQdrantClientInstance.createPayloadIndex).toHaveBeenCalledTimes(5)
+			;(console.warn as any).mockRestore()
+		})
+
+		it("should throw error if collection still exists after deletion attempt", async () => {
+			const differentVectorSize = 768
+			mockQdrantClientInstance.getCollection
+				.mockResolvedValueOnce({
+					config: {
+						params: {
+							vectors: {
+								size: differentVectorSize,
+							},
+						},
+					},
+				} as any)
+				// Second call should still return the collection (deletion failed)
+				.mockResolvedValueOnce({
+					config: {
+						params: {
+							vectors: {
+								size: differentVectorSize,
+							},
+						},
+					},
+				} as any)
+
+			mockQdrantClientInstance.deleteCollection.mockResolvedValue(true as any)
+			vitest.spyOn(console, "error").mockImplementation(() => {})
+			vitest.spyOn(console, "warn").mockImplementation(() => {})
+
+			let caughtError: any
+			try {
+				await vectorStore.initialize()
+			} catch (error: any) {
+				caughtError = error
+			}
+
+			expect(caughtError).toBeDefined()
+			expect(caughtError.message).toContain("Failed to update vector index for new model")
+			// The error message should contain the contextual error details
+			expect(caughtError.message).toContain("Deleted existing collection but failed verification step")
+
+			expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(2)
+			expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledTimes(1)
+			expect(mockQdrantClientInstance.createCollection).not.toHaveBeenCalled()
+			expect(mockQdrantClientInstance.createPayloadIndex).not.toHaveBeenCalled()
+			;(console.error as any).mockRestore()
+			;(console.warn as any).mockRestore()
+		})
+
+		it("should handle dimension mismatch scenario from 2048 to 768 dimensions", async () => {
+			// Simulate the exact scenario from the issue: switching from 2048 to 768 dimensions
+			const oldVectorSize = 2048
+			const newVectorSize = 768
+
+			// Create a new vector store with the new dimension
+			const newVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, newVectorSize, mockApiKey)
+
+			mockQdrantClientInstance.getCollection
+				.mockResolvedValueOnce({
+					config: {
+						params: {
+							vectors: {
+								size: oldVectorSize, // Existing collection has 2048 dimensions
+							},
+						},
+					},
+				} as any)
+				// Second call should return 404 to confirm deletion
+				.mockRejectedValueOnce({
+					response: { status: 404 },
+					message: "Not found",
+				})
+
+			mockQdrantClientInstance.deleteCollection.mockResolvedValue(true as any)
+			mockQdrantClientInstance.createCollection.mockResolvedValue(true as any)
+			mockQdrantClientInstance.createPayloadIndex.mockResolvedValue({} as any)
+			vitest.spyOn(console, "warn").mockImplementation(() => {})
+
+			const result = await newVectorStore.initialize()
+
+			expect(result).toBe(true)
+			expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(2)
+			expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledTimes(1)
+			expect(mockQdrantClientInstance.createCollection).toHaveBeenCalledWith(expectedCollectionName, {
+				vectors: {
+					size: newVectorSize, // Should create with new 768 dimensions
+					distance: "Cosine",
+				},
+			})
+			expect(mockQdrantClientInstance.createPayloadIndex).toHaveBeenCalledTimes(5)
+			;(console.warn as any).mockRestore()
+		})
+
+		it("should provide detailed error context for different failure scenarios", async () => {
+			const differentVectorSize = 768
+			mockQdrantClientInstance.getCollection.mockResolvedValue({
+				config: {
+					params: {
+						vectors: {
+							size: differentVectorSize,
+						},
+					},
+				},
+			} as any)
+
+			// Test deletion failure with specific error message
+			const deleteError = new Error("Qdrant server unavailable")
+			mockQdrantClientInstance.deleteCollection.mockRejectedValue(deleteError)
+			vitest.spyOn(console, "error").mockImplementation(() => {})
+			vitest.spyOn(console, "warn").mockImplementation(() => {})
+
+			let caughtError: any
+			try {
+				await vectorStore.initialize()
+			} catch (error: any) {
+				caughtError = error
+			}
+
+			expect(caughtError).toBeDefined()
+			expect(caughtError.message).toContain("Failed to update vector index for new model")
+			// The error message should contain the contextual error details
+			expect(caughtError.message).toContain("Failed to delete existing collection with vector size")
+			expect(caughtError.message).toContain("Qdrant server unavailable")
+			expect(caughtError.cause).toBe(deleteError)
+			;(console.error as any).mockRestore()
+			;(console.warn as any).mockRestore()
+		})
 	})
 
 	it("should return true when collection exists", async () => {

+ 113 - 45
src/services/code-index/vector-store/qdrant-client.ts

@@ -160,58 +160,32 @@ export class QdrantVectorStore implements IVectorStore {
 				created = true
 			} else {
 				// Collection exists, check vector size
-				const existingVectorSize = collectionInfo.config?.params?.vectors?.size
+				const vectorsConfig = collectionInfo.config?.params?.vectors
+				let existingVectorSize: number
+
+				if (typeof vectorsConfig === "number") {
+					existingVectorSize = vectorsConfig
+				} else if (
+					vectorsConfig &&
+					typeof vectorsConfig === "object" &&
+					"size" in vectorsConfig &&
+					typeof vectorsConfig.size === "number"
+				) {
+					existingVectorSize = vectorsConfig.size
+				} else {
+					existingVectorSize = 0 // Fallback for unknown configuration
+				}
+
 				if (existingVectorSize === this.vectorSize) {
 					created = false // Exists and correct
 				} else {
-					// Exists but wrong vector size, recreate
-					try {
-						console.warn(
-							`[QdrantVectorStore] Collection ${this.collectionName} exists with vector size ${existingVectorSize}, but expected ${this.vectorSize}. Recreating collection.`,
-						)
-						await this.client.deleteCollection(this.collectionName)
-						await this.client.createCollection(this.collectionName, {
-							vectors: {
-								size: this.vectorSize,
-								distance: this.DISTANCE_METRIC,
-							},
-						})
-						created = true
-					} catch (recreationError) {
-						const errorMessage =
-							recreationError instanceof Error ? recreationError.message : String(recreationError)
-						console.error(
-							`[QdrantVectorStore] CRITICAL: Failed to recreate collection ${this.collectionName} for new vector size. Error: ${errorMessage}`,
-						)
-						const dimensionMismatchError = new Error(
-							t("embeddings:vectorStore.vectorDimensionMismatch", {
-								errorMessage,
-							}),
-						)
-						// Use error.cause to preserve the original error context
-						dimensionMismatchError.cause = recreationError
-						throw dimensionMismatchError
-					}
+					// Exists but wrong vector size, recreate with enhanced error handling
+					created = await this._recreateCollectionWithNewDimension(existingVectorSize)
 				}
 			}
 
 			// Create payload indexes
-			for (let i = 0; i <= 4; i++) {
-				try {
-					await this.client.createPayloadIndex(this.collectionName, {
-						field_name: `pathSegments.${i}`,
-						field_schema: "keyword",
-					})
-				} catch (indexError: any) {
-					const errorMessage = (indexError?.message || "").toLowerCase()
-					if (!errorMessage.includes("already exists")) {
-						console.warn(
-							`[QdrantVectorStore] Could not create payload index for pathSegments.${i} on ${this.collectionName}. Details:`,
-							indexError?.message || indexError,
-						)
-					}
-				}
-			}
+			await this._createPayloadIndexes()
 			return created
 		} catch (error: any) {
 			const errorMessage = error?.message || error
@@ -232,6 +206,100 @@ export class QdrantVectorStore implements IVectorStore {
 		}
 	}
 
+	/**
+	 * Recreates the collection with a new vector dimension, handling failures gracefully.
+	 * @param existingVectorSize The current vector size of the existing collection
+	 * @returns Promise resolving to boolean indicating if a new collection was created
+	 */
+	private async _recreateCollectionWithNewDimension(existingVectorSize: number): Promise<boolean> {
+		console.warn(
+			`[QdrantVectorStore] Collection ${this.collectionName} exists with vector size ${existingVectorSize}, but expected ${this.vectorSize}. Recreating collection.`,
+		)
+
+		let deletionSucceeded = false
+		let recreationAttempted = false
+
+		try {
+			// Step 1: Attempt to delete the existing collection
+			console.log(`[QdrantVectorStore] Deleting existing collection ${this.collectionName}...`)
+			await this.client.deleteCollection(this.collectionName)
+			deletionSucceeded = true
+			console.log(`[QdrantVectorStore] Successfully deleted collection ${this.collectionName}`)
+
+			// Step 2: Wait a brief moment to ensure deletion is processed
+			await new Promise((resolve) => setTimeout(resolve, 100))
+
+			// Step 3: Verify the collection is actually deleted
+			const verificationInfo = await this.getCollectionInfo()
+			if (verificationInfo !== null) {
+				throw new Error("Collection still exists after deletion attempt")
+			}
+
+			// Step 4: Create the new collection with correct dimensions
+			console.log(
+				`[QdrantVectorStore] Creating new collection ${this.collectionName} with vector size ${this.vectorSize}...`,
+			)
+			recreationAttempted = true
+			await this.client.createCollection(this.collectionName, {
+				vectors: {
+					size: this.vectorSize,
+					distance: this.DISTANCE_METRIC,
+				},
+			})
+			console.log(`[QdrantVectorStore] Successfully created new collection ${this.collectionName}`)
+			return true
+		} catch (recreationError) {
+			const errorMessage = recreationError instanceof Error ? recreationError.message : String(recreationError)
+
+			// Provide detailed error context based on what stage failed
+			let contextualErrorMessage: string
+			if (!deletionSucceeded) {
+				contextualErrorMessage = `Failed to delete existing collection with vector size ${existingVectorSize}. ${errorMessage}`
+			} else if (!recreationAttempted) {
+				contextualErrorMessage = `Deleted existing collection but failed verification step. ${errorMessage}`
+			} else {
+				contextualErrorMessage = `Deleted existing collection but failed to create new collection with vector size ${this.vectorSize}. ${errorMessage}`
+			}
+
+			console.error(
+				`[QdrantVectorStore] CRITICAL: Failed to recreate collection ${this.collectionName} for dimension change (${existingVectorSize} -> ${this.vectorSize}). ${contextualErrorMessage}`,
+			)
+
+			// Create a comprehensive error message for the user
+			const dimensionMismatchError = new Error(
+				t("embeddings:vectorStore.vectorDimensionMismatch", {
+					errorMessage: contextualErrorMessage,
+				}),
+			)
+
+			// Preserve the original error context
+			dimensionMismatchError.cause = recreationError
+			throw dimensionMismatchError
+		}
+	}
+
+	/**
+	 * Creates payload indexes for the collection, handling errors gracefully.
+	 */
+	private async _createPayloadIndexes(): Promise<void> {
+		for (let i = 0; i <= 4; i++) {
+			try {
+				await this.client.createPayloadIndex(this.collectionName, {
+					field_name: `pathSegments.${i}`,
+					field_schema: "keyword",
+				})
+			} catch (indexError: any) {
+				const errorMessage = (indexError?.message || "").toLowerCase()
+				if (!errorMessage.includes("already exists")) {
+					console.warn(
+						`[QdrantVectorStore] Could not create payload index for pathSegments.${i} on ${this.collectionName}. Details:`,
+						indexError?.message || indexError,
+					)
+				}
+			}
+		}
+	}
+
 	/**
 	 * Upserts points into the vector store
 	 * @param points Array of points to upsert