8 months ago · c481827cbb
--- a/src/services/code-index/embedders/__tests__/openai-compatible.spec.ts
+++ b/src/services/code-index/embedders/__tests__/openai-compatible.spec.ts
@@ -1,4 +1,4 @@
 
				-import { vitest, describe, it, expect, beforeEach, afterEach } from "vitest"
			
 
				+import { vitest, describe, it, expect, beforeEach, afterEach, vi } from "vitest"
			
 
				 import type { MockedClass, MockedFunction } from "vitest"
			
 
				 import { OpenAI } from "openai"
			
 
				 import { OpenAICompatibleEmbedder } from "../openai-compatible"
			
@@ -110,6 +110,7 @@ describe("OpenAICompatibleEmbedder", () => {
 
				 			expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
			
 
				 				input: testTexts,
			
 
				 				model: testModelId,
			
 
				+				encoding_format: "base64",
			
 
				 			})
			
 
				 			expect(result).toEqual({
			
 
				 				embeddings: [[0.1, 0.2, 0.3]],
			
@@ -130,6 +131,7 @@ describe("OpenAICompatibleEmbedder", () => {
 
				 			expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
			
 
				 				input: testTexts,
			
 
				 				model: testModelId,
			
 
				+				encoding_format: "base64",
			
 
				 			})
			
 
				 			expect(result).toEqual({
			
 
				 				embeddings: [
			
@@ -154,6 +156,7 @@ describe("OpenAICompatibleEmbedder", () => {
 
				 			expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
			
 
				 				input: testTexts,
			
 
				 				model: customModel,
			
 
				+				encoding_format: "base64",
			
 
				 			})
			
 
				 		})
			
 
				 
			
@@ -173,6 +176,97 @@ describe("OpenAICompatibleEmbedder", () => {
 
				 			})
			
 
				 		})
			
 
				 
			
 
				+		/**
			
 
				+		 * Test base64 conversion logic
			
 
				+		 */
			
 
				+		describe("base64 conversion", () => {
			
 
				+			it("should convert base64 encoded embeddings to float arrays", async () => {
			
 
				+				const testTexts = ["Hello world"]
			
 
				+
			
 
				+				// Create a Float32Array with test values that can be exactly represented in Float32
			
 
				+				const testEmbedding = new Float32Array([0.25, 0.5, 0.75, 1.0])
			
 
				+
			
 
				+				// Convert to base64 string (simulating what OpenAI API returns)
			
 
				+				const buffer = Buffer.from(testEmbedding.buffer)
			
 
				+				const base64String = buffer.toString("base64")
			
 
				+
			
 
				+				const mockResponse = {
			
 
				+					data: [{ embedding: base64String }], // Base64 string instead of array
			
 
				+					usage: { prompt_tokens: 10, total_tokens: 15 },
			
 
				+				}
			
 
				+				mockEmbeddingsCreate.mockResolvedValue(mockResponse)
			
 
				+
			
 
				+				const result = await embedder.createEmbeddings(testTexts)
			
 
				+
			
 
				+				expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
			
 
				+					input: testTexts,
			
 
				+					model: testModelId,
			
 
				+					encoding_format: "base64",
			
 
				+				})
			
 
				+
			
 
				+				// Verify the base64 string was converted back to the original float array
			
 
				+				expect(result).toEqual({
			
 
				+					embeddings: [[0.25, 0.5, 0.75, 1.0]],
			
 
				+					usage: { promptTokens: 10, totalTokens: 15 },
			
 
				+				})
			
 
				+			})
			
 
				+
			
 
				+			it("should handle multiple base64 encoded embeddings", async () => {
			
 
				+				const testTexts = ["Hello world", "Goodbye world"]
			
 
				+
			
 
				+				// Create test embeddings with values that can be exactly represented in Float32
			
 
				+				const embedding1 = new Float32Array([0.25, 0.5, 0.75])
			
 
				+				const embedding2 = new Float32Array([1.0, 1.25, 1.5])
			
 
				+
			
 
				+				// Convert to base64 strings
			
 
				+				const base64String1 = Buffer.from(embedding1.buffer).toString("base64")
			
 
				+				const base64String2 = Buffer.from(embedding2.buffer).toString("base64")
			
 
				+
			
 
				+				const mockResponse = {
			
 
				+					data: [{ embedding: base64String1 }, { embedding: base64String2 }],
			
 
				+					usage: { prompt_tokens: 20, total_tokens: 30 },
			
 
				+				}
			
 
				+				mockEmbeddingsCreate.mockResolvedValue(mockResponse)
			
 
				+
			
 
				+				const result = await embedder.createEmbeddings(testTexts)
			
 
				+
			
 
				+				expect(result).toEqual({
			
 
				+					embeddings: [
			
 
				+						[0.25, 0.5, 0.75],
			
 
				+						[1.0, 1.25, 1.5],
			
 
				+					],
			
 
				+					usage: { promptTokens: 20, totalTokens: 30 },
			
 
				+				})
			
 
				+			})
			
 
				+
			
 
				+			it("should handle mixed base64 and array embeddings", async () => {
			
 
				+				const testTexts = ["Hello world", "Goodbye world"]
			
 
				+
			
 
				+				// Create one base64 embedding and one regular array (edge case)
			
 
				+				const embedding1 = new Float32Array([0.25, 0.5, 0.75])
			
 
				+				const base64String1 = Buffer.from(embedding1.buffer).toString("base64")
			
 
				+
			
 
				+				const mockResponse = {
			
 
				+					data: [
			
 
				+						{ embedding: base64String1 }, // Base64 string
			
 
				+						{ embedding: [1.0, 1.25, 1.5] }, // Regular array
			
 
				+					],
			
 
				+					usage: { prompt_tokens: 20, total_tokens: 30 },
			
 
				+				}
			
 
				+				mockEmbeddingsCreate.mockResolvedValue(mockResponse)
			
 
				+
			
 
				+				const result = await embedder.createEmbeddings(testTexts)
			
 
				+
			
 
				+				expect(result).toEqual({
			
 
				+					embeddings: [
			
 
				+						[0.25, 0.5, 0.75],
			
 
				+						[1.0, 1.25, 1.5],
			
 
				+					],
			
 
				+					usage: { promptTokens: 20, totalTokens: 30 },
			
 
				+				})
			
 
				+			})
			
 
				+		})
			
 
				+
			
 
				 		/**
			
 
				 		 * Test batching logic when texts exceed token limits
			
 
				 		 */
			
@@ -249,11 +343,15 @@ describe("OpenAICompatibleEmbedder", () => {
 
				 				const testTexts = ["Hello world"]
			
 
				 				const rateLimitError = { status: 429, message: "Rate limit exceeded" }
			
 
				 
			
 
				+				// Create base64 encoded embedding for successful response
			
 
				+				const testEmbedding = new Float32Array([0.25, 0.5, 0.75])
			
 
				+				const base64String = Buffer.from(testEmbedding.buffer).toString("base64")
			
 
				+
			
 
				 				mockEmbeddingsCreate
			
 
				 					.mockRejectedValueOnce(rateLimitError)
			
 
				 					.mockRejectedValueOnce(rateLimitError)
			
 
				 					.mockResolvedValueOnce({
			
 
				-						data: [{ embedding: [0.1, 0.2, 0.3] }],
			
 
				+						data: [{ embedding: base64String }],
			
 
				 						usage: { prompt_tokens: 10, total_tokens: 15 },
			
 
				 					})
			
 
				 
			
@@ -268,7 +366,7 @@ describe("OpenAICompatibleEmbedder", () => {
 
				 				expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(3)
			
 
				 				expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("Rate limit hit, retrying in"))
			
 
				 				expect(result).toEqual({
			
 
				-					embeddings: [[0.1, 0.2, 0.3]],
			
 
				+					embeddings: [[0.25, 0.5, 0.75]],
			
 
				 					usage: { promptTokens: 10, totalTokens: 15 },
			
 
				 				})
			
 
				 			})
			
@@ -360,5 +458,84 @@ describe("OpenAICompatibleEmbedder", () => {
 
				 				await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow()
			
 
				 			})
			
 
				 		})
			
 
				+
			
 
				+		/**
			
 
				+		 * Test to confirm OpenAI package bug with base64 encoding
			
 
				+		 * This test verifies that when we request encoding_format: "base64",
			
 
				+		 * the OpenAI package returns unparsed base64 strings as expected.
			
 
				+		 * This is the behavior we rely on in our workaround.
			
 
				+		 */
			
 
				+		describe("OpenAI package base64 behavior verification", () => {
			
 
				+			it("should return unparsed base64 when encoding_format is base64", async () => {
			
 
				+				const testTexts = ["Hello world"]
			
 
				+
			
 
				+				// Create a real OpenAI instance to test the actual package behavior
			
 
				+				const realOpenAI = new ((await vi.importActual("openai")) as any).OpenAI({
			
 
				+					baseURL: testBaseUrl,
			
 
				+					apiKey: testApiKey,
			
 
				+				})
			
 
				+
			
 
				+				// Create test embedding data as base64 using values that can be exactly represented in Float32
			
 
				+				const testEmbedding = new Float32Array([0.25, 0.5, 0.75, 1.0])
			
 
				+				const buffer = Buffer.from(testEmbedding.buffer)
			
 
				+				const base64String = buffer.toString("base64")
			
 
				+
			
 
				+				// Mock the raw API response that would come from OpenAI
			
 
				+				const mockApiResponse = {
			
 
				+					data: [
			
 
				+						{
			
 
				+							object: "embedding",
			
 
				+							embedding: base64String, // Raw base64 string from API
			
 
				+							index: 0,
			
 
				+						},
			
 
				+					],
			
 
				+					model: "text-embedding-3-small",
			
 
				+					object: "list",
			
 
				+					usage: {
			
 
				+						prompt_tokens: 2,
			
 
				+						total_tokens: 2,
			
 
				+					},
			
 
				+				}
			
 
				+
			
 
				+				// Mock the methodRequest method which is called by post()
			
 
				+				const mockMethodRequest = vi.fn()
			
 
				+				const mockAPIPromise = {
			
 
				+					then: vi.fn().mockImplementation((callback) => {
			
 
				+						return Promise.resolve(callback(mockApiResponse))
			
 
				+					}),
			
 
				+					catch: vi.fn(),
			
 
				+					finally: vi.fn(),
			
 
				+				}
			
 
				+				mockMethodRequest.mockReturnValue(mockAPIPromise)
			
 
				+
			
 
				+				// Replace the methodRequest method on the client
			
 
				+				;(realOpenAI as any).post = vi.fn().mockImplementation((path, opts) => {
			
 
				+					return mockMethodRequest("post", path, opts)
			
 
				+				})
			
 
				+
			
 
				+				// Call the embeddings.create method with base64 encoding
			
 
				+				const response = await realOpenAI.embeddings.create({
			
 
				+					input: testTexts,
			
 
				+					model: "text-embedding-3-small",
			
 
				+					encoding_format: "base64",
			
 
				+				})
			
 
				+
			
 
				+				// Verify that the response contains the raw base64 string
			
 
				+				// This confirms the OpenAI package doesn't parse base64 when explicitly requested
			
 
				+				expect(response.data[0].embedding).toBe(base64String)
			
 
				+				expect(typeof response.data[0].embedding).toBe("string")
			
 
				+
			
 
				+				// Verify we can manually convert it back to the original float array
			
 
				+				const returnedBuffer = Buffer.from(response.data[0].embedding as string, "base64")
			
 
				+				const returnedFloat32Array = new Float32Array(
			
 
				+					returnedBuffer.buffer,
			
 
				+					returnedBuffer.byteOffset,
			
 
				+					returnedBuffer.byteLength / 4,
			
 
				+				)
			
 
				+				const returnedArray = Array.from(returnedFloat32Array)
			
 
				+
			
 
				+				expect(returnedArray).toEqual([0.25, 0.5, 0.75, 1.0])
			
 
				+			})
			
 
				+		})
			
 
				 	})
			
 
				 })
			
--- a/src/services/code-index/embedders/openai-compatible.ts
+++ b/src/services/code-index/embedders/openai-compatible.ts
@@ -8,6 +8,19 @@ import {
 
				 } from "../constants"
			
 
				 import { getDefaultModelId } from "../../../shared/embeddingModels"
			
 
				 
			
 
				+interface EmbeddingItem {
			
 
				+	embedding: string | number[]
			
 
				+	[key: string]: any
			
 
				+}
			
 
				+
			
 
				+interface OpenAIEmbeddingResponse {
			
 
				+	data: EmbeddingItem[]
			
 
				+	usage?: {
			
 
				+		prompt_tokens?: number
			
 
				+		total_tokens?: number
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * OpenAI Compatible implementation of the embedder interface with batching and rate limiting.
			
 
				  * This embedder allows using any OpenAI-compatible API endpoint by specifying a custom baseURL.
			
@@ -108,13 +121,38 @@ export class OpenAICompatibleEmbedder implements IEmbedder {
 
				 	): Promise<{ embeddings: number[][]; usage: { promptTokens: number; totalTokens: number } }> {
			
 
				 		for (let attempts = 0; attempts < MAX_RETRIES; attempts++) {
			
 
				 			try {
			
 
				-				const response = await this.embeddingsClient.embeddings.create({
			
 
				+				const response = (await this.embeddingsClient.embeddings.create({
			
 
				 					input: batchTexts,
			
 
				 					model: model,
			
 
				+					// OpenAI package (as of v4.78.1) has a parsing issue that truncates embedding dimensions to 256
			
 
				+					// when processing numeric arrays, which breaks compatibility with models using larger dimensions.
			
 
				+					// By requesting base64 encoding, we bypass the package's parser and handle decoding ourselves.
			
 
				+					encoding_format: "base64",
			
 
				+				})) as OpenAIEmbeddingResponse
			
 
				+
			
 
				+				// Convert base64 embeddings to float32 arrays
			
 
				+				const processedEmbeddings = response.data.map((item: EmbeddingItem) => {
			
 
				+					if (typeof item.embedding === "string") {
			
 
				+						const buffer = Buffer.from(item.embedding, "base64")
			
 
				+
			
 
				+						// Create Float32Array view over the buffer
			
 
				+						const float32Array = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4)
			
 
				+
			
 
				+						return {
			
 
				+							...item,
			
 
				+							embedding: Array.from(float32Array),
			
 
				+						}
			
 
				+					}
			
 
				+					return item
			
 
				 				})
			
 
				 
			
 
				+				// Replace the original data with processed embeddings
			
 
				+				response.data = processedEmbeddings
			
 
				+
			
 
				+				const embeddings = response.data.map((item) => item.embedding as number[])
			
 
				+
			
 
				 				return {
			
 
				-					embeddings: response.data.map((item) => item.embedding),
			
 
				+					embeddings: embeddings,
			
 
				 					usage: {
			
 
				 						promptTokens: response.usage?.prompt_tokens || 0,
			
 
				 						totalTokens: response.usage?.total_tokens || 0,