1 year ago · 773e5560db
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -27,6 +27,16 @@ export interface SingleCompletionHandler {
 
				 export interface ApiHandler {
			
 
				 	createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
			
 
				 	getModel(): { id: string; info: ModelInfo }
			
 
				+
			
 
				+	/**
			
 
				+	 * Counts tokens for content blocks
			
 
				+	 * All providers extend BaseProvider which provides a default tiktoken implementation,
			
 
				+	 * but they can override this to use their native token counting endpoints
			
 
				+	 *
			
 
				+	 * @param content The content to count tokens for
			
 
				+	 * @returns A promise resolving to the token count
			
 
				+	 */
			
 
				+	countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number>
			
 
				 }
			
 
				 
			
 
				 export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
			
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -9,16 +9,17 @@ import {
 
				 	ModelInfo,
			
 
				 } from "../../shared/api"
			
 
				 import { ApiStream } from "../transform/stream"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "./constants"
			
 
				-import { ApiHandler, SingleCompletionHandler, getModelParams } from "../index"
			
 
				+import { SingleCompletionHandler, getModelParams } from "../index"
			
 
				 
			
 
				-export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
			
 
				+export class AnthropicHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				 	private options: ApiHandlerOptions
			
 
				 	private client: Anthropic
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				-
			
 
				 		this.client = new Anthropic({
			
 
				 			apiKey: this.options.apiKey,
			
 
				 			baseURL: this.options.anthropicBaseUrl || undefined,
			
@@ -212,4 +213,35 @@ export class AnthropicHandler implements ApiHandler, SingleCompletionHandler {
 
				 		const content = message.content.find(({ type }) => type === "text")
			
 
				 		return content?.type === "text" ? content.text : ""
			
 
				 	}
			
 
				+
			
 
				+	/**
			
 
				+	 * Counts tokens for the given content using Anthropic's API
			
 
				+	 *
			
 
				+	 * @param content The content blocks to count tokens for
			
 
				+	 * @returns A promise resolving to the token count
			
 
				+	 */
			
 
				+	override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
			
 
				+		try {
			
 
				+			// Use the current model
			
 
				+			const actualModelId = this.getModel().id
			
 
				+
			
 
				+			const response = await this.client.messages.countTokens({
			
 
				+				model: actualModelId,
			
 
				+				messages: [
			
 
				+					{
			
 
				+						role: "user",
			
 
				+						content: content,
			
 
				+					},
			
 
				+				],
			
 
				+			})
			
 
				+
			
 
				+			return response.input_tokens
			
 
				+		} catch (error) {
			
 
				+			// Log error but fallback to tiktoken estimation
			
 
				+			console.warn("Anthropic token counting failed, using fallback", error)
			
 
				+
			
 
				+			// Use the base provider's implementation as fallback
			
 
				+			return super.countTokens(content)
			
 
				+		}
			
 
				+	}
			
 
				 }
			
--- a/src/api/providers/base-provider.ts
+++ b/src/api/providers/base-provider.ts
@@ -0,0 +1,64 @@
 
				+import { Anthropic } from "@anthropic-ai/sdk"
			
 
				+import { ApiHandler } from ".."
			
 
				+import { ModelInfo } from "../../shared/api"
			
 
				+import { ApiStream } from "../transform/stream"
			
 
				+import { Tiktoken } from "js-tiktoken/lite"
			
 
				+import o200kBase from "js-tiktoken/ranks/o200k_base"
			
 
				+
			
 
				+// Reuse the fudge factor used in the original code
			
 
				+const TOKEN_FUDGE_FACTOR = 1.5
			
 
				+
			
 
				+/**
			
 
				+ * Base class for API providers that implements common functionality
			
 
				+ */
			
 
				+export abstract class BaseProvider implements ApiHandler {
			
 
				+	// Cache the Tiktoken encoder instance since it's stateless
			
 
				+	private encoder: Tiktoken | null = null
			
 
				+	abstract createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
			
 
				+	abstract getModel(): { id: string; info: ModelInfo }
			
 
				+
			
 
				+	/**
			
 
				+	 * Default token counting implementation using tiktoken
			
 
				+	 * Providers can override this to use their native token counting endpoints
			
 
				+	 *
			
 
				+	 * Uses a cached Tiktoken encoder instance for performance since it's stateless.
			
 
				+	 * The encoder is created lazily on first use and reused for subsequent calls.
			
 
				+	 *
			
 
				+	 * @param content The content to count tokens for
			
 
				+	 * @returns A promise resolving to the token count
			
 
				+	 */
			
 
				+	async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
			
 
				+		if (!content || content.length === 0) return 0
			
 
				+
			
 
				+		let totalTokens = 0
			
 
				+
			
 
				+		// Lazily create and cache the encoder if it doesn't exist
			
 
				+		if (!this.encoder) {
			
 
				+			this.encoder = new Tiktoken(o200kBase)
			
 
				+		}
			
 
				+
			
 
				+		// Process each content block using the cached encoder
			
 
				+		for (const block of content) {
			
 
				+			if (block.type === "text") {
			
 
				+				// Use tiktoken for text token counting
			
 
				+				const text = block.text || ""
			
 
				+				if (text.length > 0) {
			
 
				+					const tokens = this.encoder.encode(text)
			
 
				+					totalTokens += tokens.length
			
 
				+				}
			
 
				+			} else if (block.type === "image") {
			
 
				+				// For images, calculate based on data size
			
 
				+				const imageSource = block.source
			
 
				+				if (imageSource && typeof imageSource === "object" && "data" in imageSource) {
			
 
				+					const base64Data = imageSource.data as string
			
 
				+					totalTokens += Math.ceil(Math.sqrt(base64Data.length))
			
 
				+				} else {
			
 
				+					totalTokens += 300 // Conservative estimate for unknown images
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// Add a fudge factor to account for the fact that tiktoken is not always accurate
			
 
				+		return Math.ceil(totalTokens * TOKEN_FUDGE_FACTOR)
			
 
				+	}
			
 
				+}
			
--- a/src/api/providers/bedrock.ts
+++ b/src/api/providers/bedrock.ts
@@ -6,10 +6,11 @@ import {
 
				 } from "@aws-sdk/client-bedrock-runtime"
			
 
				 import { fromIni } from "@aws-sdk/credential-providers"
			
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				-import { ApiHandler, SingleCompletionHandler } from "../"
			
 
				+import { SingleCompletionHandler } from "../"
			
 
				 import { ApiHandlerOptions, BedrockModelId, ModelInfo, bedrockDefaultModelId, bedrockModels } from "../../shared/api"
			
 
				 import { ApiStream } from "../transform/stream"
			
 
				 import { convertToBedrockConverseMessages } from "../transform/bedrock-converse-format"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				 const BEDROCK_DEFAULT_TEMPERATURE = 0.3
			
 
				 
			
@@ -46,11 +47,12 @@ export interface StreamEvent {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-export class AwsBedrockHandler implements ApiHandler, SingleCompletionHandler {
			
 
				-	private options: ApiHandlerOptions
			
 
				+export class AwsBedrockHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private client: BedrockRuntimeClient
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 
			
 
				 		const clientConfig: BedrockRuntimeClientConfig = {
			
@@ -74,7 +76,7 @@ export class AwsBedrockHandler implements ApiHandler, SingleCompletionHandler {
 
				 		this.client = new BedrockRuntimeClient(clientConfig)
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		const modelConfig = this.getModel()
			
 
				 
			
 
				 		// Handle cross-region inference
			
@@ -205,7 +207,7 @@ export class AwsBedrockHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	getModel(): { id: BedrockModelId | string; info: ModelInfo } {
			
 
				+	override getModel(): { id: BedrockModelId | string; info: ModelInfo } {
			
 
				 		const modelId = this.options.apiModelId
			
 
				 		if (modelId) {
			
 
				 			// For tests, allow any model ID
			
--- a/src/api/providers/gemini.ts
+++ b/src/api/providers/gemini.ts
@@ -1,22 +1,24 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import { GoogleGenerativeAI } from "@google/generative-ai"
			
 
				-import { ApiHandler, SingleCompletionHandler } from "../"
			
 
				+import { SingleCompletionHandler } from "../"
			
 
				 import { ApiHandlerOptions, geminiDefaultModelId, GeminiModelId, geminiModels, ModelInfo } from "../../shared/api"
			
 
				 import { convertAnthropicMessageToGemini } from "../transform/gemini-format"
			
 
				 import { ApiStream } from "../transform/stream"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				 const GEMINI_DEFAULT_TEMPERATURE = 0
			
 
				 
			
 
				-export class GeminiHandler implements ApiHandler, SingleCompletionHandler {
			
 
				-	private options: ApiHandlerOptions
			
 
				+export class GeminiHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private client: GoogleGenerativeAI
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 		this.client = new GoogleGenerativeAI(options.geminiApiKey ?? "not-provided")
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		const model = this.client.getGenerativeModel({
			
 
				 			model: this.getModel().id,
			
 
				 			systemInstruction: systemPrompt,
			
@@ -44,7 +46,7 @@ export class GeminiHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	getModel(): { id: GeminiModelId; info: ModelInfo } {
			
 
				+	override getModel(): { id: GeminiModelId; info: ModelInfo } {
			
 
				 		const modelId = this.options.apiModelId
			
 
				 		if (modelId && modelId in geminiModels) {
			
 
				 			const id = modelId as GeminiModelId
			
--- a/src/api/providers/glama.ts
+++ b/src/api/providers/glama.ts
@@ -6,22 +6,39 @@ import { ApiHandlerOptions, ModelInfo, glamaDefaultModelId, glamaDefaultModelInf
 
				 import { parseApiPrice } from "../../utils/cost"
			
 
				 import { convertToOpenAiMessages } from "../transform/openai-format"
			
 
				 import { ApiStream } from "../transform/stream"
			
 
				-import { ApiHandler, SingleCompletionHandler } from "../"
			
 
				+import { SingleCompletionHandler } from "../"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				 const GLAMA_DEFAULT_TEMPERATURE = 0
			
 
				 
			
 
				-export class GlamaHandler implements ApiHandler, SingleCompletionHandler {
			
 
				-	private options: ApiHandlerOptions
			
 
				+export class GlamaHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private client: OpenAI
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 		const baseURL = "https://glama.ai/api/gateway/openai/v1"
			
 
				 		const apiKey = this.options.glamaApiKey ?? "not-provided"
			
 
				 		this.client = new OpenAI({ baseURL, apiKey })
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	private supportsTemperature(): boolean {
			
 
				+		return !this.getModel().id.startsWith("openai/o3-mini")
			
 
				+	}
			
 
				+
			
 
				+	override getModel(): { id: string; info: ModelInfo } {
			
 
				+		const modelId = this.options.glamaModelId
			
 
				+		const modelInfo = this.options.glamaModelInfo
			
 
				+
			
 
				+		if (modelId && modelInfo) {
			
 
				+			return { id: modelId, info: modelInfo }
			
 
				+		}
			
 
				+
			
 
				+		return { id: glamaDefaultModelId, info: glamaDefaultModelInfo }
			
 
				+	}
			
 
				+
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		// Convert Anthropic messages to OpenAI format
			
 
				 		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
			
 
				 			{ role: "system", content: systemPrompt },
			
@@ -152,21 +169,6 @@ export class GlamaHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	private supportsTemperature(): boolean {
			
 
				-		return !this.getModel().id.startsWith("openai/o3-mini")
			
 
				-	}
			
 
				-
			
 
				-	getModel(): { id: string; info: ModelInfo } {
			
 
				-		const modelId = this.options.glamaModelId
			
 
				-		const modelInfo = this.options.glamaModelInfo
			
 
				-
			
 
				-		if (modelId && modelInfo) {
			
 
				-			return { id: modelId, info: modelInfo }
			
 
				-		}
			
 
				-
			
 
				-		return { id: glamaDefaultModelId, info: glamaDefaultModelInfo }
			
 
				-	}
			
 
				-
			
 
				 	async completePrompt(prompt: string): Promise<string> {
			
 
				 		try {
			
 
				 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
			
--- a/src/api/providers/lmstudio.ts
+++ b/src/api/providers/lmstudio.ts
@@ -2,18 +2,20 @@ import { Anthropic } from "@anthropic-ai/sdk"
 
				 import OpenAI from "openai"
			
 
				 import axios from "axios"
			
 
				 
			
 
				-import { ApiHandler, SingleCompletionHandler } from "../"
			
 
				+import { SingleCompletionHandler } from "../"
			
 
				 import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"
			
 
				 import { convertToOpenAiMessages } from "../transform/openai-format"
			
 
				 import { ApiStream } from "../transform/stream"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				 const LMSTUDIO_DEFAULT_TEMPERATURE = 0
			
 
				 
			
 
				-export class LmStudioHandler implements ApiHandler, SingleCompletionHandler {
			
 
				-	private options: ApiHandlerOptions
			
 
				+export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private client: OpenAI
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 		this.client = new OpenAI({
			
 
				 			baseURL: (this.options.lmStudioBaseUrl || "http://localhost:1234") + "/v1",
			
@@ -21,7 +23,7 @@ export class LmStudioHandler implements ApiHandler, SingleCompletionHandler {
 
				 		})
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
			
 
				 			{ role: "system", content: systemPrompt },
			
 
				 			...convertToOpenAiMessages(messages),
			
@@ -51,7 +53,7 @@ export class LmStudioHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	getModel(): { id: string; info: ModelInfo } {
			
 
				+	override getModel(): { id: string; info: ModelInfo } {
			
 
				 		return {
			
 
				 			id: this.options.lmStudioModelId || "",
			
 
				 			info: openAiModelInfoSaneDefaults,
			
--- a/src/api/providers/mistral.ts
+++ b/src/api/providers/mistral.ts
@@ -1,6 +1,6 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import { Mistral } from "@mistralai/mistralai"
			
 
				-import { ApiHandler } from "../"
			
 
				+import { SingleCompletionHandler } from "../"
			
 
				 import {
			
 
				 	ApiHandlerOptions,
			
 
				 	mistralDefaultModelId,
			
@@ -13,14 +13,16 @@ import {
 
				 } from "../../shared/api"
			
 
				 import { convertToMistralMessages } from "../transform/mistral-format"
			
 
				 import { ApiStream } from "../transform/stream"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				 const MISTRAL_DEFAULT_TEMPERATURE = 0
			
 
				 
			
 
				-export class MistralHandler implements ApiHandler {
			
 
				-	private options: ApiHandlerOptions
			
 
				+export class MistralHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private client: Mistral
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		if (!options.mistralApiKey) {
			
 
				 			throw new Error("Mistral API key is required")
			
 
				 		}
			
@@ -48,7 +50,7 @@ export class MistralHandler implements ApiHandler {
 
				 		return "https://api.mistral.ai"
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		const response = await this.client.chat.stream({
			
 
				 			model: this.options.apiModelId || mistralDefaultModelId,
			
 
				 			messages: [{ role: "system", content: systemPrompt }, ...convertToMistralMessages(messages)],
			
@@ -81,7 +83,7 @@ export class MistralHandler implements ApiHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	getModel(): { id: MistralModelId; info: ModelInfo } {
			
 
				+	override getModel(): { id: MistralModelId; info: ModelInfo } {
			
 
				 		const modelId = this.options.apiModelId
			
 
				 		if (modelId && modelId in mistralModels) {
			
 
				 			const id = modelId as MistralModelId
			
--- a/src/api/providers/ollama.ts
+++ b/src/api/providers/ollama.ts
@@ -2,19 +2,21 @@ import { Anthropic } from "@anthropic-ai/sdk"
 
				 import OpenAI from "openai"
			
 
				 import axios from "axios"
			
 
				 
			
 
				-import { ApiHandler, SingleCompletionHandler } from "../"
			
 
				+import { SingleCompletionHandler } from "../"
			
 
				 import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"
			
 
				 import { convertToOpenAiMessages } from "../transform/openai-format"
			
 
				 import { convertToR1Format } from "../transform/r1-format"
			
 
				 import { ApiStream } from "../transform/stream"
			
 
				 import { DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
			
 
				 import { XmlMatcher } from "../../utils/xml-matcher"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				-export class OllamaHandler implements ApiHandler, SingleCompletionHandler {
			
 
				-	private options: ApiHandlerOptions
			
 
				+export class OllamaHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private client: OpenAI
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 		this.client = new OpenAI({
			
 
				 			baseURL: (this.options.ollamaBaseUrl || "http://localhost:11434") + "/v1",
			
@@ -22,7 +24,7 @@ export class OllamaHandler implements ApiHandler, SingleCompletionHandler {
 
				 		})
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		const modelId = this.getModel().id
			
 
				 		const useR1Format = modelId.toLowerCase().includes("deepseek-r1")
			
 
				 		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
			
@@ -58,7 +60,7 @@ export class OllamaHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	getModel(): { id: string; info: ModelInfo } {
			
 
				+	override getModel(): { id: string; info: ModelInfo } {
			
 
				 		return {
			
 
				 			id: this.options.ollamaModelId || "",
			
 
				 			info: openAiModelInfoSaneDefaults,
			
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -1,6 +1,6 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import OpenAI from "openai"
			
 
				-import { ApiHandler, SingleCompletionHandler } from "../"
			
 
				+import { SingleCompletionHandler } from "../"
			
 
				 import {
			
 
				 	ApiHandlerOptions,
			
 
				 	ModelInfo,
			
@@ -10,20 +10,22 @@ import {
 
				 } from "../../shared/api"
			
 
				 import { convertToOpenAiMessages } from "../transform/openai-format"
			
 
				 import { ApiStream } from "../transform/stream"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				 const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0
			
 
				 
			
 
				-export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler {
			
 
				-	private options: ApiHandlerOptions
			
 
				+export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private client: OpenAI
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 		const apiKey = this.options.openAiNativeApiKey ?? "not-provided"
			
 
				 		this.client = new OpenAI({ apiKey })
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		const modelId = this.getModel().id
			
 
				 
			
 
				 		if (modelId.startsWith("o1")) {
			
@@ -133,7 +135,7 @@ export class OpenAiNativeHandler implements ApiHandler, SingleCompletionHandler
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	getModel(): { id: OpenAiNativeModelId; info: ModelInfo } {
			
 
				+	override getModel(): { id: OpenAiNativeModelId; info: ModelInfo } {
			
 
				 		const modelId = this.options.apiModelId
			
 
				 		if (modelId && modelId in openAiNativeModels) {
			
 
				 			const id = modelId as OpenAiNativeModelId
			
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -8,22 +8,24 @@ import {
 
				 	ModelInfo,
			
 
				 	openAiModelInfoSaneDefaults,
			
 
				 } from "../../shared/api"
			
 
				-import { ApiHandler, SingleCompletionHandler } from "../index"
			
 
				+import { SingleCompletionHandler } from "../index"
			
 
				 import { convertToOpenAiMessages } from "../transform/openai-format"
			
 
				 import { convertToR1Format } from "../transform/r1-format"
			
 
				 import { convertToSimpleMessages } from "../transform/simple-format"
			
 
				 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
			
 
				-import { DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				+const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.6
			
 
				 export interface OpenAiHandlerOptions extends ApiHandlerOptions {
			
 
				 	defaultHeaders?: Record<string, string>
			
 
				 }
			
 
				 
			
 
				-export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
			
 
				+export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				 	protected options: OpenAiHandlerOptions
			
 
				 	private client: OpenAI
			
 
				 
			
 
				 	constructor(options: OpenAiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 
			
 
				 		const baseURL = this.options.openAiBaseUrl ?? "https://api.openai.com/v1"
			
@@ -51,7 +53,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		const modelInfo = this.getModel().info
			
 
				 		const modelUrl = this.options.openAiBaseUrl ?? ""
			
 
				 		const modelId = this.options.openAiModelId ?? ""
			
@@ -139,7 +141,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	getModel(): { id: string; info: ModelInfo } {
			
 
				+	override getModel(): { id: string; info: ModelInfo } {
			
 
				 		return {
			
 
				 			id: this.options.openAiModelId ?? "",
			
 
				 			info: this.options.openAiCustomModelInfo ?? openAiModelInfoSaneDefaults,
			
--- a/src/api/providers/openrouter.ts
+++ b/src/api/providers/openrouter.ts
@@ -9,8 +9,10 @@ import { parseApiPrice } from "../../utils/cost"
 
				 import { convertToOpenAiMessages } from "../transform/openai-format"
			
 
				 import { ApiStreamChunk, ApiStreamUsageChunk } from "../transform/stream"
			
 
				 import { convertToR1Format } from "../transform/r1-format"
			
 
				+
			
 
				 import { DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
			
 
				-import { ApiHandler, getModelParams, SingleCompletionHandler } from ".."
			
 
				+import { getModelParams, SingleCompletionHandler } from ".."
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				 // Add custom interface for OpenRouter params.
			
 
				 type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
			
@@ -24,11 +26,12 @@ interface OpenRouterApiStreamUsageChunk extends ApiStreamUsageChunk {
 
				 	fullResponseText: string
			
 
				 }
			
 
				 
			
 
				-export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
			
 
				-	private options: ApiHandlerOptions
			
 
				+export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private client: OpenAI
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 
			
 
				 		const baseURL = this.options.openRouterBaseUrl || "https://openrouter.ai/api/v1"
			
@@ -42,7 +45,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
 
				 		this.client = new OpenAI({ baseURL, apiKey, defaultHeaders })
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(
			
 
				+	override async *createMessage(
			
 
				 		systemPrompt: string,
			
 
				 		messages: Anthropic.Messages.MessageParam[],
			
 
				 	): AsyncGenerator<ApiStreamChunk> {
			
@@ -191,7 +194,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	getModel() {
			
 
				+	override getModel() {
			
 
				 		const modelId = this.options.openRouterModelId
			
 
				 		const modelInfo = this.options.openRouterModelInfo
			
 
				 
			
--- a/src/api/providers/unbound.ts
+++ b/src/api/providers/unbound.ts
@@ -5,25 +5,27 @@ import OpenAI from "openai"
 
				 import { ApiHandlerOptions, ModelInfo, unboundDefaultModelId, unboundDefaultModelInfo } from "../../shared/api"
			
 
				 import { convertToOpenAiMessages } from "../transform/openai-format"
			
 
				 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
			
 
				-import { ApiHandler, SingleCompletionHandler } from "../"
			
 
				+import { SingleCompletionHandler } from "../"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				 interface UnboundUsage extends OpenAI.CompletionUsage {
			
 
				 	cache_creation_input_tokens?: number
			
 
				 	cache_read_input_tokens?: number
			
 
				 }
			
 
				 
			
 
				-export class UnboundHandler implements ApiHandler, SingleCompletionHandler {
			
 
				-	private options: ApiHandlerOptions
			
 
				+export class UnboundHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private client: OpenAI
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 		const baseURL = "https://api.getunbound.ai/v1"
			
 
				 		const apiKey = this.options.unboundApiKey ?? "not-provided"
			
 
				 		this.client = new OpenAI({ baseURL, apiKey })
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		// Convert Anthropic messages to OpenAI format
			
 
				 		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
			
 
				 			{ role: "system", content: systemPrompt },
			
@@ -131,7 +133,7 @@ export class UnboundHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	getModel(): { id: string; info: ModelInfo } {
			
 
				+	override getModel(): { id: string; info: ModelInfo } {
			
 
				 		const modelId = this.options.unboundModelId
			
 
				 		const modelInfo = this.options.unboundModelInfo
			
 
				 		if (modelId && modelInfo) {
			
--- a/src/api/providers/vertex.ts
+++ b/src/api/providers/vertex.ts
@@ -1,13 +1,16 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
			
 
				 import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
			
 
				+
			
 
				 import { VertexAI } from "@google-cloud/vertexai"
			
 
				 
			
 
				 import { ApiHandlerOptions, ModelInfo, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api"
			
 
				 import { ApiStream } from "../transform/stream"
			
 
				 import { convertAnthropicMessageToVertexGemini } from "../transform/vertex-gemini-format"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				+
			
 
				 import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "./constants"
			
 
				-import { ApiHandler, getModelParams, SingleCompletionHandler } from "../"
			
 
				+import { getModelParams, SingleCompletionHandler } from "../"
			
 
				 
			
 
				 // Types for Vertex SDK
			
 
				 
			
@@ -94,17 +97,19 @@ interface VertexMessageStreamEvent {
 
				 				thinking: string
			
 
				 		  }
			
 
				 }
			
 
				+
			
 
				 // https://docs.anthropic.com/en/api/claude-on-vertex-ai
			
 
				-export class VertexHandler implements ApiHandler, SingleCompletionHandler {
			
 
				+export class VertexHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				 	MODEL_CLAUDE = "claude"
			
 
				 	MODEL_GEMINI = "gemini"
			
 
				 
			
 
				-	private options: ApiHandlerOptions
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private anthropicClient: AnthropicVertex
			
 
				 	private geminiClient: VertexAI
			
 
				 	private modelType: string
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 
			
 
				 		if (this.options.apiModelId?.startsWith(this.MODEL_CLAUDE)) {
			
@@ -329,7 +334,7 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		switch (this.modelType) {
			
 
				 			case this.MODEL_CLAUDE: {
			
 
				 				yield* this.createClaudeMessage(systemPrompt, messages)
			
--- a/src/api/providers/vscode-lm.ts
+++ b/src/api/providers/vscode-lm.ts
@@ -1,18 +1,19 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 import * as vscode from "vscode"
			
 
				 
			
 
				-import { ApiHandler, SingleCompletionHandler } from "../"
			
 
				+import { SingleCompletionHandler } from "../"
			
 
				 import { calculateApiCost } from "../../utils/cost"
			
 
				 import { ApiStream } from "../transform/stream"
			
 
				 import { convertToVsCodeLmMessages } from "../transform/vscode-lm-format"
			
 
				 import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils"
			
 
				 import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../../shared/api"
			
 
				+import { BaseProvider } from "./base-provider"
			
 
				 
			
 
				 /**
			
 
				  * Handles interaction with VS Code's Language Model API for chat-based operations.
			
 
				- * This handler implements the ApiHandler interface to provide VS Code LM specific functionality.
			
 
				+ * This handler extends BaseProvider to provide VS Code LM specific functionality.
			
 
				  *
			
 
				- * @implements {ApiHandler}
			
 
				+ * @extends {BaseProvider}
			
 
				  *
			
 
				  * @remarks
			
 
				  * The handler manages a VS Code language model chat client and provides methods to:
			
@@ -35,13 +36,14 @@ import { ApiHandlerOptions, ModelInfo, openAiModelInfoSaneDefaults } from "../..
 
				  * }
			
 
				  * ```
			
 
				  */
			
 
				-export class VsCodeLmHandler implements ApiHandler, SingleCompletionHandler {
			
 
				-	private options: ApiHandlerOptions
			
 
				+export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHandler {
			
 
				+	protected options: ApiHandlerOptions
			
 
				 	private client: vscode.LanguageModelChat | null
			
 
				 	private disposable: vscode.Disposable | null
			
 
				 	private currentRequestCancellation: vscode.CancellationTokenSource | null
			
 
				 
			
 
				 	constructor(options: ApiHandlerOptions) {
			
 
				+		super()
			
 
				 		this.options = options
			
 
				 		this.client = null
			
 
				 		this.disposable = null
			
@@ -145,7 +147,33 @@ export class VsCodeLmHandler implements ApiHandler, SingleCompletionHandler {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	private async countTokens(text: string | vscode.LanguageModelChatMessage): Promise<number> {
			
 
				+	/**
			
 
				+	 * Implements the ApiHandler countTokens interface method
			
 
				+	 * Provides token counting for Anthropic content blocks
			
 
				+	 *
			
 
				+	 * @param content The content blocks to count tokens for
			
 
				+	 * @returns A promise resolving to the token count
			
 
				+	 */
			
 
				+	override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
			
 
				+		// Convert Anthropic content blocks to a string for VSCode LM token counting
			
 
				+		let textContent = ""
			
 
				+
			
 
				+		for (const block of content) {
			
 
				+			if (block.type === "text") {
			
 
				+				textContent += block.text || ""
			
 
				+			} else if (block.type === "image") {
			
 
				+				// VSCode LM doesn't support images directly, so we'll just use a placeholder
			
 
				+				textContent += "[IMAGE]"
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		return this.internalCountTokens(textContent)
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+	 * Private implementation of token counting used internally by VsCodeLmHandler
			
 
				+	 */
			
 
				+	private async internalCountTokens(text: string | vscode.LanguageModelChatMessage): Promise<number> {
			
 
				 		// Check for required dependencies
			
 
				 		if (!this.client) {
			
 
				 			console.warn("Roo Code <Language Model API>: No client available for token counting")
			
@@ -216,9 +244,9 @@ export class VsCodeLmHandler implements ApiHandler, SingleCompletionHandler {
 
				 		systemPrompt: string,
			
 
				 		vsCodeLmMessages: vscode.LanguageModelChatMessage[],
			
 
				 	): Promise<number> {
			
 
				-		const systemTokens: number = await this.countTokens(systemPrompt)
			
 
				+		const systemTokens: number = await this.internalCountTokens(systemPrompt)
			
 
				 
			
 
				-		const messageTokens: number[] = await Promise.all(vsCodeLmMessages.map((msg) => this.countTokens(msg)))
			
 
				+		const messageTokens: number[] = await Promise.all(vsCodeLmMessages.map((msg) => this.internalCountTokens(msg)))
			
 
				 
			
 
				 		return systemTokens + messageTokens.reduce((sum: number, tokens: number): number => sum + tokens, 0)
			
 
				 	}
			
@@ -319,7 +347,7 @@ export class VsCodeLmHandler implements ApiHandler, SingleCompletionHandler {
 
				 		return content
			
 
				 	}
			
 
				 
			
 
				-	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				+	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
			
 
				 		// Ensure clean state before starting a new request
			
 
				 		this.ensureCleanState()
			
 
				 		const client: vscode.LanguageModelChat = await this.getClient()
			
@@ -427,7 +455,7 @@ export class VsCodeLmHandler implements ApiHandler, SingleCompletionHandler {
 
				 			}
			
 
				 
			
 
				 			// Count tokens in the accumulated text after stream completion
			
 
				-			const totalOutputTokens: number = await this.countTokens(accumulatedText)
			
 
				+			const totalOutputTokens: number = await this.internalCountTokens(accumulatedText)
			
 
				 
			
 
				 			// Report final usage after stream completion
			
 
				 			yield {
			
@@ -467,7 +495,7 @@ export class VsCodeLmHandler implements ApiHandler, SingleCompletionHandler {
 
				 	}
			
 
				 
			
 
				 	// Return model information based on the current client state
			
 
				-	getModel(): { id: string; info: ModelInfo } {
			
 
				+	override getModel(): { id: string; info: ModelInfo } {
			
 
				 		if (this.client) {
			
 
				 			// Validate client properties
			
 
				 			const requiredProps = {
			
--- a/src/core/Cline.ts
+++ b/src/core/Cline.ts
@@ -990,12 +990,12 @@ export class Cline {
 
				 				? this.apiConfiguration.modelMaxTokens || modelInfo.maxTokens
			
 
				 				: modelInfo.maxTokens
			
 
				 			const contextWindow = modelInfo.contextWindow
			
 
				-
			
 
				-			const trimmedMessages = truncateConversationIfNeeded({
			
 
				+			const trimmedMessages = await truncateConversationIfNeeded({
			
 
				 				messages: this.apiConversationHistory,
			
 
				 				totalTokens,
			
 
				 				maxTokens,
			
 
				 				contextWindow,
			
 
				+				apiHandler: this.api,
			
 
				 			})
			
 
				 
			
 
				 			if (trimmedMessages !== this.apiConversationHistory) {
			
--- a/src/core/sliding-window/__tests__/sliding-window.test.ts
+++ b/src/core/sliding-window/__tests__/sliding-window.test.ts
@@ -3,12 +3,35 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				 
			
 
				 import { ModelInfo } from "../../../shared/api"
			
 
				-import {
			
 
				-	TOKEN_BUFFER_PERCENTAGE,
			
 
				-	estimateTokenCount,
			
 
				-	truncateConversation,
			
 
				-	truncateConversationIfNeeded,
			
 
				-} from "../index"
			
 
				+import { ApiHandler } from "../../../api"
			
 
				+import { BaseProvider } from "../../../api/providers/base-provider"
			
 
				+import { TOKEN_BUFFER_PERCENTAGE } from "../index"
			
 
				+import { estimateTokenCount, truncateConversation, truncateConversationIfNeeded } from "../index"
			
 
				+
			
 
				+// Create a mock ApiHandler for testing
			
 
				+class MockApiHandler extends BaseProvider {
			
 
				+	createMessage(): any {
			
 
				+		throw new Error("Method not implemented.")
			
 
				+	}
			
 
				+
			
 
				+	getModel(): { id: string; info: ModelInfo } {
			
 
				+		return {
			
 
				+			id: "test-model",
			
 
				+			info: {
			
 
				+				contextWindow: 100000,
			
 
				+				maxTokens: 50000,
			
 
				+				supportsPromptCache: true,
			
 
				+				supportsImages: false,
			
 
				+				inputPrice: 0,
			
 
				+				outputPrice: 0,
			
 
				+				description: "Test model",
			
 
				+			},
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Create a singleton instance for tests
			
 
				+const mockApiHandler = new MockApiHandler()
			
 
				 
			
 
				 /**
			
 
				  * Tests for the truncateConversation function
			
@@ -100,134 +123,91 @@ describe("truncateConversation", () => {
 
				 })
			
 
				 
			
 
				 /**
			
 
				- * Tests for the getMaxTokens function (private but tested through truncateConversationIfNeeded)
			
 
				+ * Tests for the estimateTokenCount function
			
 
				  */
			
 
				-describe("getMaxTokens", () => {
			
 
				-	// We'll test this indirectly through truncateConversationIfNeeded
			
 
				-	const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
			
 
				-		contextWindow,
			
 
				-		supportsPromptCache: true, // Not relevant for getMaxTokens
			
 
				-		maxTokens,
			
 
				+describe("estimateTokenCount", () => {
			
 
				+	it("should return 0 for empty or undefined content", async () => {
			
 
				+		expect(await estimateTokenCount([], mockApiHandler)).toBe(0)
			
 
				+		// @ts-ignore - Testing with undefined
			
 
				+		expect(await estimateTokenCount(undefined, mockApiHandler)).toBe(0)
			
 
				 	})
			
 
				 
			
 
				-	// Reuse across tests for consistency
			
 
				-	const messages: Anthropic.Messages.MessageParam[] = [
			
 
				-		{ role: "user", content: "First message" },
			
 
				-		{ role: "assistant", content: "Second message" },
			
 
				-		{ role: "user", content: "Third message" },
			
 
				-		{ role: "assistant", content: "Fourth message" },
			
 
				-		{ role: "user", content: "Fifth message" },
			
 
				-	]
			
 
				-
			
 
				-	it("should use maxTokens as buffer when specified", () => {
			
 
				-		const modelInfo = createModelInfo(100000, 50000)
			
 
				-		// Max tokens = 100000 - 50000 = 50000
			
 
				-
			
 
				-		// Create messages with very small content in the last one to avoid token overflow
			
 
				-		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				+	it("should estimate tokens for text blocks", async () => {
			
 
				+		const content: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				+			{ type: "text", text: "This is a text block with 36 characters" },
			
 
				+		]
			
 
				 
			
 
				-		// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
			
 
				-		// Below max tokens and buffer - no truncation
			
 
				-		const result1 = truncateConversationIfNeeded({
			
 
				-			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 39999, // Well below threshold + dynamic buffer
			
 
				-			contextWindow: modelInfo.contextWindow,
			
 
				-			maxTokens: modelInfo.maxTokens,
			
 
				-		})
			
 
				-		expect(result1).toEqual(messagesWithSmallContent)
			
 
				+		// With tiktoken, the exact token count may differ from character-based estimation
			
 
				+		// Instead of expecting an exact number, we verify it's a reasonable positive number
			
 
				+		const result = await estimateTokenCount(content, mockApiHandler)
			
 
				+		expect(result).toBeGreaterThan(0)
			
 
				 
			
 
				-		// Above max tokens - truncate
			
 
				-		const result2 = truncateConversationIfNeeded({
			
 
				-			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 50001, // Above threshold
			
 
				-			contextWindow: modelInfo.contextWindow,
			
 
				-			maxTokens: modelInfo.maxTokens,
			
 
				-		})
			
 
				-		expect(result2).not.toEqual(messagesWithSmallContent)
			
 
				-		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
			
 
				+		// We can also verify that longer text results in more tokens
			
 
				+		const longerContent: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				+			{
			
 
				+				type: "text",
			
 
				+				text: "This is a longer text block with significantly more characters to encode into tokens",
			
 
				+			},
			
 
				+		]
			
 
				+		const longerResult = await estimateTokenCount(longerContent, mockApiHandler)
			
 
				+		expect(longerResult).toBeGreaterThan(result)
			
 
				 	})
			
 
				 
			
 
				-	it("should use 20% of context window as buffer when maxTokens is undefined", () => {
			
 
				-		const modelInfo = createModelInfo(100000, undefined)
			
 
				-		// Max tokens = 100000 - (100000 * 0.2) = 80000
			
 
				+	it("should estimate tokens for image blocks based on data size", async () => {
			
 
				+		// Small image
			
 
				+		const smallImage: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				+			{ type: "image", source: { type: "base64", media_type: "image/jpeg", data: "small_dummy_data" } },
			
 
				+		]
			
 
				+		// Larger image with more data
			
 
				+		const largerImage: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				+			{ type: "image", source: { type: "base64", media_type: "image/png", data: "X".repeat(1000) } },
			
 
				+		]
			
 
				 
			
 
				-		// Create messages with very small content in the last one to avoid token overflow
			
 
				-		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				+		// Verify the token count scales with the size of the image data
			
 
				+		const smallImageTokens = await estimateTokenCount(smallImage, mockApiHandler)
			
 
				+		const largerImageTokens = await estimateTokenCount(largerImage, mockApiHandler)
			
 
				 
			
 
				-		// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
			
 
				-		// Below max tokens and buffer - no truncation
			
 
				-		const result1 = truncateConversationIfNeeded({
			
 
				-			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 69999, // Well below threshold + dynamic buffer
			
 
				-			contextWindow: modelInfo.contextWindow,
			
 
				-			maxTokens: modelInfo.maxTokens,
			
 
				-		})
			
 
				-		expect(result1).toEqual(messagesWithSmallContent)
			
 
				+		// Small image should have some tokens
			
 
				+		expect(smallImageTokens).toBeGreaterThan(0)
			
 
				 
			
 
				-		// Above max tokens - truncate
			
 
				-		const result2 = truncateConversationIfNeeded({
			
 
				-			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 80001, // Above threshold
			
 
				-			contextWindow: modelInfo.contextWindow,
			
 
				-			maxTokens: modelInfo.maxTokens,
			
 
				-		})
			
 
				-		expect(result2).not.toEqual(messagesWithSmallContent)
			
 
				-		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
			
 
				+		// Larger image should have proportionally more tokens
			
 
				+		expect(largerImageTokens).toBeGreaterThan(smallImageTokens)
			
 
				+
			
 
				+		// Verify the larger image calculation matches our formula including the 50% fudge factor
			
 
				+		expect(largerImageTokens).toBe(48)
			
 
				 	})
			
 
				 
			
 
				-	it("should handle small context windows appropriately", () => {
			
 
				-		const modelInfo = createModelInfo(50000, 10000)
			
 
				-		// Max tokens = 50000 - 10000 = 40000
			
 
				+	it("should estimate tokens for mixed content blocks", async () => {
			
 
				+		const content: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				+			{ type: "text", text: "A text block with 30 characters" },
			
 
				+			{ type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } },
			
 
				+			{ type: "text", text: "Another text with 24 chars" },
			
 
				+		]
			
 
				 
			
 
				-		// Create messages with very small content in the last one to avoid token overflow
			
 
				-		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				+		// We know image tokens calculation should be consistent
			
 
				+		const imageTokens = Math.ceil(Math.sqrt("dummy_data".length)) * 1.5
			
 
				 
			
 
				-		// Below max tokens and buffer - no truncation
			
 
				-		const result1 = truncateConversationIfNeeded({
			
 
				-			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 34999, // Well below threshold + buffer
			
 
				-			contextWindow: modelInfo.contextWindow,
			
 
				-			maxTokens: modelInfo.maxTokens,
			
 
				-		})
			
 
				-		expect(result1).toEqual(messagesWithSmallContent)
			
 
				+		// With tiktoken, we can't predict exact text token counts,
			
 
				+		// but we can verify the total is greater than just the image tokens
			
 
				+		const result = await estimateTokenCount(content, mockApiHandler)
			
 
				+		expect(result).toBeGreaterThan(imageTokens)
			
 
				 
			
 
				-		// Above max tokens - truncate
			
 
				-		const result2 = truncateConversationIfNeeded({
			
 
				-			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 40001, // Above threshold
			
 
				-			contextWindow: modelInfo.contextWindow,
			
 
				-			maxTokens: modelInfo.maxTokens,
			
 
				-		})
			
 
				-		expect(result2).not.toEqual(messagesWithSmallContent)
			
 
				-		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
			
 
				+		// Also test against a version with only the image to verify text adds tokens
			
 
				+		const imageOnlyContent: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				+			{ type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } },
			
 
				+		]
			
 
				+		const imageOnlyResult = await estimateTokenCount(imageOnlyContent, mockApiHandler)
			
 
				+		expect(result).toBeGreaterThan(imageOnlyResult)
			
 
				 	})
			
 
				 
			
 
				-	it("should handle large context windows appropriately", () => {
			
 
				-		const modelInfo = createModelInfo(200000, 30000)
			
 
				-		// Max tokens = 200000 - 30000 = 170000
			
 
				-
			
 
				-		// Create messages with very small content in the last one to avoid token overflow
			
 
				-		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				-
			
 
				-		// Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test)
			
 
				-		// Below max tokens and buffer - no truncation
			
 
				-		const result1 = truncateConversationIfNeeded({
			
 
				-			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 149999, // Well below threshold + dynamic buffer
			
 
				-			contextWindow: modelInfo.contextWindow,
			
 
				-			maxTokens: modelInfo.maxTokens,
			
 
				-		})
			
 
				-		expect(result1).toEqual(messagesWithSmallContent)
			
 
				+	it("should handle empty text blocks", async () => {
			
 
				+		const content: Array<Anthropic.Messages.ContentBlockParam> = [{ type: "text", text: "" }]
			
 
				+		expect(await estimateTokenCount(content, mockApiHandler)).toBe(0)
			
 
				+	})
			
 
				 
			
 
				-		// Above max tokens - truncate
			
 
				-		const result2 = truncateConversationIfNeeded({
			
 
				-			messages: messagesWithSmallContent,
			
 
				-			totalTokens: 170001, // Above threshold
			
 
				-			contextWindow: modelInfo.contextWindow,
			
 
				-			maxTokens: modelInfo.maxTokens,
			
 
				-		})
			
 
				-		expect(result2).not.toEqual(messagesWithSmallContent)
			
 
				-		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
			
 
				+	it("should handle plain string messages", async () => {
			
 
				+		const content = "This is a plain text message"
			
 
				+		expect(await estimateTokenCount([{ type: "text", text: content }], mockApiHandler)).toBeGreaterThan(0)
			
 
				 	})
			
 
				 })
			
 
				 
			
@@ -235,9 +215,9 @@ describe("getMaxTokens", () => {
 
				  * Tests for the truncateConversationIfNeeded function
			
 
				  */
			
 
				 describe("truncateConversationIfNeeded", () => {
			
 
				-	const createModelInfo = (contextWindow: number, supportsPromptCache: boolean, maxTokens?: number): ModelInfo => ({
			
 
				+	const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
			
 
				 		contextWindow,
			
 
				-		supportsPromptCache,
			
 
				+		supportsPromptCache: true,
			
 
				 		maxTokens,
			
 
				 	})
			
 
				 
			
@@ -249,8 +229,8 @@ describe("truncateConversationIfNeeded", () => {
 
				 		{ role: "user", content: "Fifth message" },
			
 
				 	]
			
 
				 
			
 
				-	it("should not truncate if tokens are below max tokens threshold", () => {
			
 
				-		const modelInfo = createModelInfo(100000, true, 30000)
			
 
				+	it("should not truncate if tokens are below max tokens threshold", async () => {
			
 
				+		const modelInfo = createModelInfo(100000, 30000)
			
 
				 		const maxTokens = 100000 - 30000 // 70000
			
 
				 		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10000
			
 
				 		const totalTokens = 70000 - dynamicBuffer - 1 // Just below threshold - buffer
			
@@ -258,17 +238,18 @@ describe("truncateConversationIfNeeded", () => {
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				-		const result = truncateConversationIfNeeded({
			
 
				+		const result = await truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				 			totalTokens,
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				 		})
			
 
				 		expect(result).toEqual(messagesWithSmallContent) // No truncation occurs
			
 
				 	})
			
 
				 
			
 
				-	it("should truncate if tokens are above max tokens threshold", () => {
			
 
				-		const modelInfo = createModelInfo(100000, true, 30000)
			
 
				+	it("should truncate if tokens are above max tokens threshold", async () => {
			
 
				+		const modelInfo = createModelInfo(100000, 30000)
			
 
				 		const maxTokens = 100000 - 30000 // 70000
			
 
				 		const totalTokens = 70001 // Above threshold
			
 
				 
			
@@ -279,68 +260,73 @@ describe("truncateConversationIfNeeded", () => {
 
				 		// With 4 messages after the first, 0.5 fraction means remove 2 messages
			
 
				 		const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
			
 
				 
			
 
				-		const result = truncateConversationIfNeeded({
			
 
				+		const result = await truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				 			totalTokens,
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				 		})
			
 
				 		expect(result).toEqual(expectedResult)
			
 
				 	})
			
 
				 
			
 
				-	it("should work with non-prompt caching models the same as prompt caching models", () => {
			
 
				+	it("should work with non-prompt caching models the same as prompt caching models", async () => {
			
 
				 		// The implementation no longer differentiates between prompt caching and non-prompt caching models
			
 
				-		const modelInfo1 = createModelInfo(100000, true, 30000)
			
 
				-		const modelInfo2 = createModelInfo(100000, false, 30000)
			
 
				+		const modelInfo1 = createModelInfo(100000, 30000)
			
 
				+		const modelInfo2 = createModelInfo(100000, 30000)
			
 
				 
			
 
				 		// Create messages with very small content in the last one to avoid token overflow
			
 
				 		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				 		// Test below threshold
			
 
				 		const belowThreshold = 69999
			
 
				-		expect(
			
 
				-			truncateConversationIfNeeded({
			
 
				-				messages: messagesWithSmallContent,
			
 
				-				totalTokens: belowThreshold,
			
 
				-				contextWindow: modelInfo1.contextWindow,
			
 
				-				maxTokens: modelInfo1.maxTokens,
			
 
				-			}),
			
 
				-		).toEqual(
			
 
				-			truncateConversationIfNeeded({
			
 
				-				messages: messagesWithSmallContent,
			
 
				-				totalTokens: belowThreshold,
			
 
				-				contextWindow: modelInfo2.contextWindow,
			
 
				-				maxTokens: modelInfo2.maxTokens,
			
 
				-			}),
			
 
				-		)
			
 
				+		const result1 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: belowThreshold,
			
 
				+			contextWindow: modelInfo1.contextWindow,
			
 
				+			maxTokens: modelInfo1.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+
			
 
				+		const result2 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: belowThreshold,
			
 
				+			contextWindow: modelInfo2.contextWindow,
			
 
				+			maxTokens: modelInfo2.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+
			
 
				+		expect(result1).toEqual(result2)
			
 
				 
			
 
				 		// Test above threshold
			
 
				 		const aboveThreshold = 70001
			
 
				-		expect(
			
 
				-			truncateConversationIfNeeded({
			
 
				-				messages: messagesWithSmallContent,
			
 
				-				totalTokens: aboveThreshold,
			
 
				-				contextWindow: modelInfo1.contextWindow,
			
 
				-				maxTokens: modelInfo1.maxTokens,
			
 
				-			}),
			
 
				-		).toEqual(
			
 
				-			truncateConversationIfNeeded({
			
 
				-				messages: messagesWithSmallContent,
			
 
				-				totalTokens: aboveThreshold,
			
 
				-				contextWindow: modelInfo2.contextWindow,
			
 
				-				maxTokens: modelInfo2.maxTokens,
			
 
				-			}),
			
 
				-		)
			
 
				+		const result3 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: aboveThreshold,
			
 
				+			contextWindow: modelInfo1.contextWindow,
			
 
				+			maxTokens: modelInfo1.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+
			
 
				+		const result4 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: aboveThreshold,
			
 
				+			contextWindow: modelInfo2.contextWindow,
			
 
				+			maxTokens: modelInfo2.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+
			
 
				+		expect(result3).toEqual(result4)
			
 
				 	})
			
 
				 
			
 
				-	it("should consider incoming content when deciding to truncate", () => {
			
 
				-		const modelInfo = createModelInfo(100000, true, 30000)
			
 
				+	it("should consider incoming content when deciding to truncate", async () => {
			
 
				+		const modelInfo = createModelInfo(100000, 30000)
			
 
				 		const maxTokens = 30000
			
 
				 		const availableTokens = modelInfo.contextWindow - maxTokens
			
 
				 
			
 
				 		// Test case 1: Small content that won't push us over the threshold
			
 
				 		const smallContent = [{ type: "text" as const, text: "Small content" }]
			
 
				-		const smallContentTokens = estimateTokenCount(smallContent)
			
 
				+		const smallContentTokens = await estimateTokenCount(smallContent, mockApiHandler)
			
 
				 		const messagesWithSmallContent: Anthropic.Messages.MessageParam[] = [
			
 
				 			...messages.slice(0, -1),
			
 
				 			{ role: messages[messages.length - 1].role, content: smallContent },
			
@@ -349,11 +335,12 @@ describe("truncateConversationIfNeeded", () => {
 
				 		// Set base tokens so total is well below threshold + buffer even with small content added
			
 
				 		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE
			
 
				 		const baseTokensForSmall = availableTokens - smallContentTokens - dynamicBuffer - 10
			
 
				-		const resultWithSmall = truncateConversationIfNeeded({
			
 
				+		const resultWithSmall = await truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				 			totalTokens: baseTokensForSmall,
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				 		})
			
 
				 		expect(resultWithSmall).toEqual(messagesWithSmallContent) // No truncation
			
 
				 
			
@@ -364,7 +351,7 @@ describe("truncateConversationIfNeeded", () => {
 
				 				text: "A very large incoming message that would consume a significant number of tokens and push us over the threshold",
			
 
				 			},
			
 
				 		]
			
 
				-		const largeContentTokens = estimateTokenCount(largeContent)
			
 
				+		const largeContentTokens = await estimateTokenCount(largeContent, mockApiHandler)
			
 
				 		const messagesWithLargeContent: Anthropic.Messages.MessageParam[] = [
			
 
				 			...messages.slice(0, -1),
			
 
				 			{ role: messages[messages.length - 1].role, content: largeContent },
			
@@ -372,17 +359,18 @@ describe("truncateConversationIfNeeded", () => {
 
				 
			
 
				 		// Set base tokens so we're just below threshold without content, but over with content
			
 
				 		const baseTokensForLarge = availableTokens - Math.floor(largeContentTokens / 2)
			
 
				-		const resultWithLarge = truncateConversationIfNeeded({
			
 
				+		const resultWithLarge = await truncateConversationIfNeeded({
			
 
				 			messages: messagesWithLargeContent,
			
 
				 			totalTokens: baseTokensForLarge,
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				 		})
			
 
				 		expect(resultWithLarge).not.toEqual(messagesWithLargeContent) // Should truncate
			
 
				 
			
 
				 		// Test case 3: Very large content that will definitely exceed threshold
			
 
				 		const veryLargeContent = [{ type: "text" as const, text: "X".repeat(1000) }]
			
 
				-		const veryLargeContentTokens = estimateTokenCount(veryLargeContent)
			
 
				+		const veryLargeContentTokens = await estimateTokenCount(veryLargeContent, mockApiHandler)
			
 
				 		const messagesWithVeryLargeContent: Anthropic.Messages.MessageParam[] = [
			
 
				 			...messages.slice(0, -1),
			
 
				 			{ role: messages[messages.length - 1].role, content: veryLargeContent },
			
@@ -390,17 +378,18 @@ describe("truncateConversationIfNeeded", () => {
 
				 
			
 
				 		// Set base tokens so we're just below threshold without content
			
 
				 		const baseTokensForVeryLarge = availableTokens - Math.floor(veryLargeContentTokens / 2)
			
 
				-		const resultWithVeryLarge = truncateConversationIfNeeded({
			
 
				+		const resultWithVeryLarge = await truncateConversationIfNeeded({
			
 
				 			messages: messagesWithVeryLargeContent,
			
 
				 			totalTokens: baseTokensForVeryLarge,
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				 		})
			
 
				 		expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate
			
 
				 	})
			
 
				 
			
 
				-	it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", () => {
			
 
				-		const modelInfo = createModelInfo(100000, true, 30000)
			
 
				+	it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", async () => {
			
 
				+		const modelInfo = createModelInfo(100000, 30000)
			
 
				 		const maxTokens = 100000 - 30000 // 70000
			
 
				 		const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE // 10% of 100000 = 10000
			
 
				 		const totalTokens = 70000 - dynamicBuffer + 1 // Just within the dynamic buffer of threshold (70000)
			
@@ -412,101 +401,153 @@ describe("truncateConversationIfNeeded", () => {
 
				 		// With 4 messages after the first, 0.5 fraction means remove 2 messages
			
 
				 		const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]]
			
 
				 
			
 
				-		const result = truncateConversationIfNeeded({
			
 
				+		const result = await truncateConversationIfNeeded({
			
 
				 			messages: messagesWithSmallContent,
			
 
				 			totalTokens,
			
 
				 			contextWindow: modelInfo.contextWindow,
			
 
				 			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				 		})
			
 
				 		expect(result).toEqual(expectedResult)
			
 
				 	})
			
 
				 })
			
 
				 
			
 
				 /**
			
 
				- * Tests for the estimateTokenCount function
			
 
				+ * Tests for the getMaxTokens function (private but tested through truncateConversationIfNeeded)
			
 
				  */
			
 
				-describe("estimateTokenCount", () => {
			
 
				-	it("should return 0 for empty or undefined content", () => {
			
 
				-		expect(estimateTokenCount([])).toBe(0)
			
 
				-		// @ts-ignore - Testing with undefined
			
 
				-		expect(estimateTokenCount(undefined)).toBe(0)
			
 
				+describe("getMaxTokens", () => {
			
 
				+	// We'll test this indirectly through truncateConversationIfNeeded
			
 
				+	const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
			
 
				+		contextWindow,
			
 
				+		supportsPromptCache: true, // Not relevant for getMaxTokens
			
 
				+		maxTokens,
			
 
				 	})
			
 
				 
			
 
				-	it("should estimate tokens for text blocks", () => {
			
 
				-		const content: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				-			{ type: "text", text: "This is a text block with 36 characters" },
			
 
				-		]
			
 
				+	// Reuse across tests for consistency
			
 
				+	const messages: Anthropic.Messages.MessageParam[] = [
			
 
				+		{ role: "user", content: "First message" },
			
 
				+		{ role: "assistant", content: "Second message" },
			
 
				+		{ role: "user", content: "Third message" },
			
 
				+		{ role: "assistant", content: "Fourth message" },
			
 
				+		{ role: "user", content: "Fifth message" },
			
 
				+	]
			
 
				 
			
 
				-		// With tiktoken, the exact token count may differ from character-based estimation
			
 
				-		// Instead of expecting an exact number, we verify it's a reasonable positive number
			
 
				-		const result = estimateTokenCount(content)
			
 
				-		expect(result).toBeGreaterThan(0)
			
 
				+	it("should use maxTokens as buffer when specified", async () => {
			
 
				+		const modelInfo = createModelInfo(100000, 50000)
			
 
				+		// Max tokens = 100000 - 50000 = 50000
			
 
				 
			
 
				-		// We can also verify that longer text results in more tokens
			
 
				-		const longerContent: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				-			{
			
 
				-				type: "text",
			
 
				-				text: "This is a longer text block with significantly more characters to encode into tokens",
			
 
				-			},
			
 
				-		]
			
 
				-		const longerResult = estimateTokenCount(longerContent)
			
 
				-		expect(longerResult).toBeGreaterThan(result)
			
 
				-	})
			
 
				+		// Create messages with very small content in the last one to avoid token overflow
			
 
				+		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				-	it("should estimate tokens for image blocks based on data size", () => {
			
 
				-		// Small image
			
 
				-		const smallImage: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				-			{ type: "image", source: { type: "base64", media_type: "image/jpeg", data: "small_dummy_data" } },
			
 
				-		]
			
 
				-		// Larger image with more data
			
 
				-		const largerImage: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				-			{ type: "image", source: { type: "base64", media_type: "image/png", data: "X".repeat(1000) } },
			
 
				-		]
			
 
				+		// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
			
 
				+		// Below max tokens and buffer - no truncation
			
 
				+		const result1 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: 39999, // Well below threshold + dynamic buffer
			
 
				+			contextWindow: modelInfo.contextWindow,
			
 
				+			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+		expect(result1).toEqual(messagesWithSmallContent)
			
 
				 
			
 
				-		// Verify the token count scales with the size of the image data
			
 
				-		const smallImageTokens = estimateTokenCount(smallImage)
			
 
				-		const largerImageTokens = estimateTokenCount(largerImage)
			
 
				+		// Above max tokens - truncate
			
 
				+		const result2 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: 50001, // Above threshold
			
 
				+			contextWindow: modelInfo.contextWindow,
			
 
				+			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+		expect(result2).not.toEqual(messagesWithSmallContent)
			
 
				+		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
			
 
				+	})
			
 
				 
			
 
				-		// Small image should have some tokens
			
 
				-		expect(smallImageTokens).toBeGreaterThan(0)
			
 
				+	it("should use 20% of context window as buffer when maxTokens is undefined", async () => {
			
 
				+		const modelInfo = createModelInfo(100000, undefined)
			
 
				+		// Max tokens = 100000 - (100000 * 0.2) = 80000
			
 
				 
			
 
				-		// Larger image should have proportionally more tokens
			
 
				-		expect(largerImageTokens).toBeGreaterThan(smallImageTokens)
			
 
				+		// Create messages with very small content in the last one to avoid token overflow
			
 
				+		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				-		// Verify the larger image calculation matches our formula including the 50% fudge factor
			
 
				-		expect(largerImageTokens).toBe(48)
			
 
				+		// Account for the dynamic buffer which is 10% of context window (10,000 tokens)
			
 
				+		// Below max tokens and buffer - no truncation
			
 
				+		const result1 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: 69999, // Well below threshold + dynamic buffer
			
 
				+			contextWindow: modelInfo.contextWindow,
			
 
				+			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+		expect(result1).toEqual(messagesWithSmallContent)
			
 
				+
			
 
				+		// Above max tokens - truncate
			
 
				+		const result2 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: 80001, // Above threshold
			
 
				+			contextWindow: modelInfo.contextWindow,
			
 
				+			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+		expect(result2).not.toEqual(messagesWithSmallContent)
			
 
				+		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
			
 
				 	})
			
 
				 
			
 
				-	it("should estimate tokens for mixed content blocks", () => {
			
 
				-		const content: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				-			{ type: "text", text: "A text block with 30 characters" },
			
 
				-			{ type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } },
			
 
				-			{ type: "text", text: "Another text with 24 chars" },
			
 
				-		]
			
 
				+	it("should handle small context windows appropriately", async () => {
			
 
				+		const modelInfo = createModelInfo(50000, 10000)
			
 
				+		// Max tokens = 50000 - 10000 = 40000
			
 
				 
			
 
				-		// We know image tokens calculation should be consistent
			
 
				-		const imageTokens = Math.ceil(Math.sqrt("dummy_data".length)) * 1.5
			
 
				+		// Create messages with very small content in the last one to avoid token overflow
			
 
				+		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				 
			
 
				-		// With tiktoken, we can't predict exact text token counts,
			
 
				-		// but we can verify the total is greater than just the image tokens
			
 
				-		const result = estimateTokenCount(content)
			
 
				-		expect(result).toBeGreaterThan(imageTokens)
			
 
				+		// Below max tokens and buffer - no truncation
			
 
				+		const result1 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: 34999, // Well below threshold + buffer
			
 
				+			contextWindow: modelInfo.contextWindow,
			
 
				+			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+		expect(result1).toEqual(messagesWithSmallContent)
			
 
				 
			
 
				-		// Also test against a version with only the image to verify text adds tokens
			
 
				-		const imageOnlyContent: Array<Anthropic.Messages.ContentBlockParam> = [
			
 
				-			{ type: "image", source: { type: "base64", media_type: "image/jpeg", data: "dummy_data" } },
			
 
				-		]
			
 
				-		const imageOnlyResult = estimateTokenCount(imageOnlyContent)
			
 
				-		expect(result).toBeGreaterThan(imageOnlyResult)
			
 
				+		// Above max tokens - truncate
			
 
				+		const result2 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: 40001, // Above threshold
			
 
				+			contextWindow: modelInfo.contextWindow,
			
 
				+			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+		expect(result2).not.toEqual(messagesWithSmallContent)
			
 
				+		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
			
 
				 	})
			
 
				 
			
 
				-	it("should handle empty text blocks", () => {
			
 
				-		const content: Array<Anthropic.Messages.ContentBlockParam> = [{ type: "text", text: "" }]
			
 
				-		expect(estimateTokenCount(content)).toBe(0)
			
 
				-	})
			
 
				+	it("should handle large context windows appropriately", async () => {
			
 
				+		const modelInfo = createModelInfo(200000, 30000)
			
 
				+		// Max tokens = 200000 - 30000 = 170000
			
 
				 
			
 
				-	it("should handle plain string messages", () => {
			
 
				-		const content = "This is a plain text message"
			
 
				-		expect(estimateTokenCount([{ type: "text", text: content }])).toBeGreaterThan(0)
			
 
				+		// Create messages with very small content in the last one to avoid token overflow
			
 
				+		const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]
			
 
				+
			
 
				+		// Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test)
			
 
				+		// Below max tokens and buffer - no truncation
			
 
				+		const result1 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: 149999, // Well below threshold + dynamic buffer
			
 
				+			contextWindow: modelInfo.contextWindow,
			
 
				+			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+		expect(result1).toEqual(messagesWithSmallContent)
			
 
				+
			
 
				+		// Above max tokens - truncate
			
 
				+		const result2 = await truncateConversationIfNeeded({
			
 
				+			messages: messagesWithSmallContent,
			
 
				+			totalTokens: 170001, // Above threshold
			
 
				+			contextWindow: modelInfo.contextWindow,
			
 
				+			maxTokens: modelInfo.maxTokens,
			
 
				+			apiHandler: mockApiHandler,
			
 
				+		})
			
 
				+		expect(result2).not.toEqual(messagesWithSmallContent)
			
 
				+		expect(result2.length).toBe(3) // Truncated with 0.5 fraction
			
 
				 	})
			
 
				 })
			
--- a/src/core/sliding-window/index.ts
+++ b/src/core/sliding-window/index.ts
@@ -1,53 +1,24 @@
 
				 import { Anthropic } from "@anthropic-ai/sdk"
			
 
				+import { ApiHandler } from "../../api"
			
 
				 
			
 
				-import { Tiktoken } from "js-tiktoken/lite"
			
 
				-import o200kBase from "js-tiktoken/ranks/o200k_base"
			
 
				-
			
 
				-export const TOKEN_FUDGE_FACTOR = 1.5
			
 
				 /**
			
 
				  * Default percentage of the context window to use as a buffer when deciding when to truncate
			
 
				  */
			
 
				 export const TOKEN_BUFFER_PERCENTAGE = 0.1
			
 
				 
			
 
				 /**
			
 
				- * Counts tokens for user content using tiktoken for text
			
 
				- * and a size-based calculation for images.
			
 
				+ * Counts tokens for user content using the provider's token counting implementation.
			
 
				  *
			
 
				  * @param {Array<Anthropic.Messages.ContentBlockParam>} content - The content to count tokens for
			
 
				- * @returns {number} The token count
			
 
				+ * @param {ApiHandler} apiHandler - The API handler to use for token counting
			
 
				+ * @returns {Promise<number>} A promise resolving to the token count
			
 
				  */
			
 
				-export function estimateTokenCount(content: Array<Anthropic.Messages.ContentBlockParam>): number {
			
 
				+export async function estimateTokenCount(
			
 
				+	content: Array<Anthropic.Messages.ContentBlockParam>,
			
 
				+	apiHandler: ApiHandler,
			
 
				+): Promise<number> {
			
 
				 	if (!content || content.length === 0) return 0
			
 
				-
			
 
				-	let totalTokens = 0
			
 
				-	let encoder = null
			
 
				-
			
 
				-	// Create encoder
			
 
				-	encoder = new Tiktoken(o200kBase)
			
 
				-
			
 
				-	// Process each content block
			
 
				-	for (const block of content) {
			
 
				-		if (block.type === "text") {
			
 
				-			// Use tiktoken for text token counting
			
 
				-			const text = block.text || ""
			
 
				-			if (text.length > 0) {
			
 
				-				const tokens = encoder.encode(text)
			
 
				-				totalTokens += tokens.length
			
 
				-			}
			
 
				-		} else if (block.type === "image") {
			
 
				-			// For images, calculate based on data size
			
 
				-			const imageSource = block.source
			
 
				-			if (imageSource && typeof imageSource === "object" && "data" in imageSource) {
			
 
				-				const base64Data = imageSource.data as string
			
 
				-				totalTokens += Math.ceil(Math.sqrt(base64Data.length))
			
 
				-			} else {
			
 
				-				totalTokens += 300 // Conservative estimate for unknown images
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	// Add a fudge factor to account for the fact that tiktoken is not always accurate
			
 
				-	return Math.ceil(totalTokens * TOKEN_FUDGE_FACTOR)
			
 
				+	return apiHandler.countTokens(content)
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -81,6 +52,7 @@ export function truncateConversation(
 
				  * @param {number} totalTokens - The total number of tokens in the conversation (excluding the last user message).
			
 
				  * @param {number} contextWindow - The context window size.
			
 
				  * @param {number} maxTokens - The maximum number of tokens allowed.
			
 
				+ * @param {ApiHandler} apiHandler - The API handler to use for token counting.
			
 
				  * @returns {Anthropic.Messages.MessageParam[]} The original or truncated conversation messages.
			
 
				  */
			
 
				 
			
@@ -89,14 +61,23 @@ type TruncateOptions = {
 
				 	totalTokens: number
			
 
				 	contextWindow: number
			
 
				 	maxTokens?: number
			
 
				+	apiHandler: ApiHandler
			
 
				 }
			
 
				 
			
 
				-export function truncateConversationIfNeeded({
			
 
				+/**
			
 
				+ * Conditionally truncates the conversation messages if the total token count
			
 
				+ * exceeds the model's limit, considering the size of incoming content.
			
 
				+ *
			
 
				+ * @param {TruncateOptions} options - The options for truncation
			
 
				+ * @returns {Promise<Anthropic.Messages.MessageParam[]>} The original or truncated conversation messages.
			
 
				+ */
			
 
				+export async function truncateConversationIfNeeded({
			
 
				 	messages,
			
 
				 	totalTokens,
			
 
				 	contextWindow,
			
 
				 	maxTokens,
			
 
				-}: TruncateOptions): Anthropic.Messages.MessageParam[] {
			
 
				+	apiHandler,
			
 
				+}: TruncateOptions): Promise<Anthropic.Messages.MessageParam[]> {
			
 
				 	// Calculate the maximum tokens reserved for response
			
 
				 	const reservedTokens = maxTokens || contextWindow * 0.2
			
 
				 
			
@@ -104,8 +85,8 @@ export function truncateConversationIfNeeded({
 
				 	const lastMessage = messages[messages.length - 1]
			
 
				 	const lastMessageContent = lastMessage.content
			
 
				 	const lastMessageTokens = Array.isArray(lastMessageContent)
			
 
				-		? estimateTokenCount(lastMessageContent)
			
 
				-		: estimateTokenCount([{ type: "text", text: lastMessageContent as string }])
			
 
				+		? await estimateTokenCount(lastMessageContent, apiHandler)
			
 
				+		: await estimateTokenCount([{ type: "text", text: lastMessageContent as string }], apiHandler)
			
 
				 
			
 
				 	// Calculate total effective tokens (totalTokens never includes the last message)
			
 
				 	const effectiveTokens = totalTokens + lastMessageTokens