|
|
@@ -1,4 +1,4 @@
|
|
|
-import { vitest, describe, it, expect, beforeEach, afterEach } from "vitest"
|
|
|
+import { vitest, describe, it, expect, beforeEach, afterEach, vi } from "vitest"
|
|
|
import type { MockedClass, MockedFunction } from "vitest"
|
|
|
import { OpenAI } from "openai"
|
|
|
import { OpenAICompatibleEmbedder } from "../openai-compatible"
|
|
|
@@ -110,6 +110,7 @@ describe("OpenAICompatibleEmbedder", () => {
|
|
|
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
|
|
|
input: testTexts,
|
|
|
model: testModelId,
|
|
|
+ encoding_format: "base64",
|
|
|
})
|
|
|
expect(result).toEqual({
|
|
|
embeddings: [[0.1, 0.2, 0.3]],
|
|
|
@@ -130,6 +131,7 @@ describe("OpenAICompatibleEmbedder", () => {
|
|
|
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
|
|
|
input: testTexts,
|
|
|
model: testModelId,
|
|
|
+ encoding_format: "base64",
|
|
|
})
|
|
|
expect(result).toEqual({
|
|
|
embeddings: [
|
|
|
@@ -154,6 +156,7 @@ describe("OpenAICompatibleEmbedder", () => {
|
|
|
expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
|
|
|
input: testTexts,
|
|
|
model: customModel,
|
|
|
+ encoding_format: "base64",
|
|
|
})
|
|
|
})
|
|
|
|
|
|
@@ -173,6 +176,97 @@ describe("OpenAICompatibleEmbedder", () => {
|
|
|
})
|
|
|
})
|
|
|
|
|
|
+ /**
|
|
|
+ * Test base64 conversion logic
|
|
|
+ */
|
|
|
+ describe("base64 conversion", () => {
|
|
|
+ it("should convert base64 encoded embeddings to float arrays", async () => {
|
|
|
+ const testTexts = ["Hello world"]
|
|
|
+
|
|
|
+ // Create a Float32Array with test values that can be exactly represented in Float32
|
|
|
+ const testEmbedding = new Float32Array([0.25, 0.5, 0.75, 1.0])
|
|
|
+
|
|
|
+ // Convert to base64 string (simulating what OpenAI API returns)
|
|
|
+ const buffer = Buffer.from(testEmbedding.buffer)
|
|
|
+ const base64String = buffer.toString("base64")
|
|
|
+
|
|
|
+ const mockResponse = {
|
|
|
+ data: [{ embedding: base64String }], // Base64 string instead of array
|
|
|
+ usage: { prompt_tokens: 10, total_tokens: 15 },
|
|
|
+ }
|
|
|
+ mockEmbeddingsCreate.mockResolvedValue(mockResponse)
|
|
|
+
|
|
|
+ const result = await embedder.createEmbeddings(testTexts)
|
|
|
+
|
|
|
+ expect(mockEmbeddingsCreate).toHaveBeenCalledWith({
|
|
|
+ input: testTexts,
|
|
|
+ model: testModelId,
|
|
|
+ encoding_format: "base64",
|
|
|
+ })
|
|
|
+
|
|
|
+ // Verify the base64 string was converted back to the original float array
|
|
|
+ expect(result).toEqual({
|
|
|
+ embeddings: [[0.25, 0.5, 0.75, 1.0]],
|
|
|
+ usage: { promptTokens: 10, totalTokens: 15 },
|
|
|
+ })
|
|
|
+ })
|
|
|
+
|
|
|
+ it("should handle multiple base64 encoded embeddings", async () => {
|
|
|
+ const testTexts = ["Hello world", "Goodbye world"]
|
|
|
+
|
|
|
+ // Create test embeddings with values that can be exactly represented in Float32
|
|
|
+ const embedding1 = new Float32Array([0.25, 0.5, 0.75])
|
|
|
+ const embedding2 = new Float32Array([1.0, 1.25, 1.5])
|
|
|
+
|
|
|
+ // Convert to base64 strings
|
|
|
+ const base64String1 = Buffer.from(embedding1.buffer).toString("base64")
|
|
|
+ const base64String2 = Buffer.from(embedding2.buffer).toString("base64")
|
|
|
+
|
|
|
+ const mockResponse = {
|
|
|
+ data: [{ embedding: base64String1 }, { embedding: base64String2 }],
|
|
|
+ usage: { prompt_tokens: 20, total_tokens: 30 },
|
|
|
+ }
|
|
|
+ mockEmbeddingsCreate.mockResolvedValue(mockResponse)
|
|
|
+
|
|
|
+ const result = await embedder.createEmbeddings(testTexts)
|
|
|
+
|
|
|
+ expect(result).toEqual({
|
|
|
+ embeddings: [
|
|
|
+ [0.25, 0.5, 0.75],
|
|
|
+ [1.0, 1.25, 1.5],
|
|
|
+ ],
|
|
|
+ usage: { promptTokens: 20, totalTokens: 30 },
|
|
|
+ })
|
|
|
+ })
|
|
|
+
|
|
|
+ it("should handle mixed base64 and array embeddings", async () => {
|
|
|
+ const testTexts = ["Hello world", "Goodbye world"]
|
|
|
+
|
|
|
+ // Create one base64 embedding and one regular array (edge case)
|
|
|
+ const embedding1 = new Float32Array([0.25, 0.5, 0.75])
|
|
|
+ const base64String1 = Buffer.from(embedding1.buffer).toString("base64")
|
|
|
+
|
|
|
+ const mockResponse = {
|
|
|
+ data: [
|
|
|
+ { embedding: base64String1 }, // Base64 string
|
|
|
+ { embedding: [1.0, 1.25, 1.5] }, // Regular array
|
|
|
+ ],
|
|
|
+ usage: { prompt_tokens: 20, total_tokens: 30 },
|
|
|
+ }
|
|
|
+ mockEmbeddingsCreate.mockResolvedValue(mockResponse)
|
|
|
+
|
|
|
+ const result = await embedder.createEmbeddings(testTexts)
|
|
|
+
|
|
|
+ expect(result).toEqual({
|
|
|
+ embeddings: [
|
|
|
+ [0.25, 0.5, 0.75],
|
|
|
+ [1.0, 1.25, 1.5],
|
|
|
+ ],
|
|
|
+ usage: { promptTokens: 20, totalTokens: 30 },
|
|
|
+ })
|
|
|
+ })
|
|
|
+ })
|
|
|
+
|
|
|
/**
|
|
|
* Test batching logic when texts exceed token limits
|
|
|
*/
|
|
|
@@ -249,11 +343,15 @@ describe("OpenAICompatibleEmbedder", () => {
|
|
|
const testTexts = ["Hello world"]
|
|
|
const rateLimitError = { status: 429, message: "Rate limit exceeded" }
|
|
|
|
|
|
+ // Create base64 encoded embedding for successful response
|
|
|
+ const testEmbedding = new Float32Array([0.25, 0.5, 0.75])
|
|
|
+ const base64String = Buffer.from(testEmbedding.buffer).toString("base64")
|
|
|
+
|
|
|
mockEmbeddingsCreate
|
|
|
.mockRejectedValueOnce(rateLimitError)
|
|
|
.mockRejectedValueOnce(rateLimitError)
|
|
|
.mockResolvedValueOnce({
|
|
|
- data: [{ embedding: [0.1, 0.2, 0.3] }],
|
|
|
+ data: [{ embedding: base64String }],
|
|
|
usage: { prompt_tokens: 10, total_tokens: 15 },
|
|
|
})
|
|
|
|
|
|
@@ -268,7 +366,7 @@ describe("OpenAICompatibleEmbedder", () => {
|
|
|
expect(mockEmbeddingsCreate).toHaveBeenCalledTimes(3)
|
|
|
expect(console.warn).toHaveBeenCalledWith(expect.stringContaining("Rate limit hit, retrying in"))
|
|
|
expect(result).toEqual({
|
|
|
- embeddings: [[0.1, 0.2, 0.3]],
|
|
|
+ embeddings: [[0.25, 0.5, 0.75]],
|
|
|
usage: { promptTokens: 10, totalTokens: 15 },
|
|
|
})
|
|
|
})
|
|
|
@@ -360,5 +458,84 @@ describe("OpenAICompatibleEmbedder", () => {
|
|
|
await expect(embedder.createEmbeddings(testTexts)).rejects.toThrow()
|
|
|
})
|
|
|
})
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Test to confirm OpenAI package bug with base64 encoding
|
|
|
+ * This test verifies that when we request encoding_format: "base64",
|
|
|
+ * the OpenAI package returns unparsed base64 strings as expected.
|
|
|
+ * This is the behavior we rely on in our workaround.
|
|
|
+ */
|
|
|
+ describe("OpenAI package base64 behavior verification", () => {
|
|
|
+ it("should return unparsed base64 when encoding_format is base64", async () => {
|
|
|
+ const testTexts = ["Hello world"]
|
|
|
+
|
|
|
+ // Create a real OpenAI instance to test the actual package behavior
|
|
|
+ const realOpenAI = new ((await vi.importActual("openai")) as any).OpenAI({
|
|
|
+ baseURL: testBaseUrl,
|
|
|
+ apiKey: testApiKey,
|
|
|
+ })
|
|
|
+
|
|
|
+ // Create test embedding data as base64 using values that can be exactly represented in Float32
|
|
|
+ const testEmbedding = new Float32Array([0.25, 0.5, 0.75, 1.0])
|
|
|
+ const buffer = Buffer.from(testEmbedding.buffer)
|
|
|
+ const base64String = buffer.toString("base64")
|
|
|
+
|
|
|
+ // Mock the raw API response that would come from OpenAI
|
|
|
+ const mockApiResponse = {
|
|
|
+ data: [
|
|
|
+ {
|
|
|
+ object: "embedding",
|
|
|
+ embedding: base64String, // Raw base64 string from API
|
|
|
+ index: 0,
|
|
|
+ },
|
|
|
+ ],
|
|
|
+ model: "text-embedding-3-small",
|
|
|
+ object: "list",
|
|
|
+ usage: {
|
|
|
+ prompt_tokens: 2,
|
|
|
+ total_tokens: 2,
|
|
|
+ },
|
|
|
+ }
|
|
|
+
|
|
|
+ // Mock the methodRequest method which is called by post()
|
|
|
+ const mockMethodRequest = vi.fn()
|
|
|
+ const mockAPIPromise = {
|
|
|
+ then: vi.fn().mockImplementation((callback) => {
|
|
|
+ return Promise.resolve(callback(mockApiResponse))
|
|
|
+ }),
|
|
|
+ catch: vi.fn(),
|
|
|
+ finally: vi.fn(),
|
|
|
+ }
|
|
|
+ mockMethodRequest.mockReturnValue(mockAPIPromise)
|
|
|
+
|
|
|
+ // Replace the methodRequest method on the client
|
|
|
+ ;(realOpenAI as any).post = vi.fn().mockImplementation((path, opts) => {
|
|
|
+ return mockMethodRequest("post", path, opts)
|
|
|
+ })
|
|
|
+
|
|
|
+ // Call the embeddings.create method with base64 encoding
|
|
|
+ const response = await realOpenAI.embeddings.create({
|
|
|
+ input: testTexts,
|
|
|
+ model: "text-embedding-3-small",
|
|
|
+ encoding_format: "base64",
|
|
|
+ })
|
|
|
+
|
|
|
+ // Verify that the response contains the raw base64 string
|
|
|
+ // This confirms the OpenAI package doesn't parse base64 when explicitly requested
|
|
|
+ expect(response.data[0].embedding).toBe(base64String)
|
|
|
+ expect(typeof response.data[0].embedding).toBe("string")
|
|
|
+
|
|
|
+ // Verify we can manually convert it back to the original float array
|
|
|
+ const returnedBuffer = Buffer.from(response.data[0].embedding as string, "base64")
|
|
|
+ const returnedFloat32Array = new Float32Array(
|
|
|
+ returnedBuffer.buffer,
|
|
|
+ returnedBuffer.byteOffset,
|
|
|
+ returnedBuffer.byteLength / 4,
|
|
|
+ )
|
|
|
+ const returnedArray = Array.from(returnedFloat32Array)
|
|
|
+
|
|
|
+ expect(returnedArray).toEqual([0.25, 0.5, 0.75, 1.0])
|
|
|
+ })
|
|
|
+ })
|
|
|
})
|
|
|
})
|