|
|
@@ -12,6 +12,8 @@ const TOKEN_FUDGE_FACTOR = 1.5
|
|
|
* Base class for API providers that implements common functionality
|
|
|
*/
|
|
|
export abstract class BaseProvider implements ApiHandler {
|
|
|
+ // Cache the Tiktoken encoder instance since it's stateless
|
|
|
+ private encoder: Tiktoken | null = null
|
|
|
abstract createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
|
|
|
abstract getModel(): { id: string; info: ModelInfo }
|
|
|
|
|
|
@@ -19,6 +21,9 @@ export abstract class BaseProvider implements ApiHandler {
|
|
|
* Default token counting implementation using tiktoken
|
|
|
* Providers can override this to use their native token counting endpoints
|
|
|
*
|
|
|
+ * Uses a cached Tiktoken encoder instance for performance since it's stateless.
|
|
|
+ * The encoder is created lazily on first use and reused for subsequent calls.
|
|
|
+ *
|
|
|
* @param content The content to count tokens for
|
|
|
* @returns A promise resolving to the token count
|
|
|
*/
|
|
|
@@ -27,17 +32,18 @@ export abstract class BaseProvider implements ApiHandler {
|
|
|
|
|
|
let totalTokens = 0
|
|
|
|
|
|
- // Create encoder - currently we only use o200kBase
|
|
|
- // In the future, providers could override this method to use more specific tokenizers
|
|
|
- const encoder = new Tiktoken(o200kBase)
|
|
|
+ // Lazily create and cache the encoder if it doesn't exist
|
|
|
+ if (!this.encoder) {
|
|
|
+ this.encoder = new Tiktoken(o200kBase)
|
|
|
+ }
|
|
|
|
|
|
- // Process each content block
|
|
|
+ // Process each content block using the cached encoder
|
|
|
for (const block of content) {
|
|
|
if (block.type === "text") {
|
|
|
// Use tiktoken for text token counting
|
|
|
const text = block.text || ""
|
|
|
if (text.length > 0) {
|
|
|
- const tokens = encoder.encode(text)
|
|
|
+ const tokens = this.encoder.encode(text)
|
|
|
totalTokens += tokens.length
|
|
|
}
|
|
|
} else if (block.type === "image") {
|