| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- package service
- import (
- "sync"
- "github.com/QuantumNous/new-api/common"
- "github.com/tiktoken-go/tokenizer"
- "github.com/tiktoken-go/tokenizer/codec"
- )
- // tokenEncoderMap won't grow after initialization
- var defaultTokenEncoder tokenizer.Codec
- // tokenEncoderMap is used to store token encoders for different models
- var tokenEncoderMap = make(map[string]tokenizer.Codec)
- // tokenEncoderMutex protects tokenEncoderMap for concurrent access
- var tokenEncoderMutex sync.RWMutex
- func InitTokenEncoders() {
- common.SysLog("initializing token encoders")
- defaultTokenEncoder = codec.NewCl100kBase()
- common.SysLog("token encoders initialized")
- }
- func getTokenEncoder(model string) tokenizer.Codec {
- // First, try to get the encoder from cache with read lock
- tokenEncoderMutex.RLock()
- if encoder, exists := tokenEncoderMap[model]; exists {
- tokenEncoderMutex.RUnlock()
- return encoder
- }
- tokenEncoderMutex.RUnlock()
- // If not in cache, create new encoder with write lock
- tokenEncoderMutex.Lock()
- defer tokenEncoderMutex.Unlock()
- // Double-check if another goroutine already created the encoder
- if encoder, exists := tokenEncoderMap[model]; exists {
- return encoder
- }
- // Create new encoder
- modelCodec, err := tokenizer.ForModel(tokenizer.Model(model))
- if err != nil {
- // Cache the default encoder for this model to avoid repeated failures
- tokenEncoderMap[model] = defaultTokenEncoder
- return defaultTokenEncoder
- }
- // Cache the new encoder
- tokenEncoderMap[model] = modelCodec
- return modelCodec
- }
- func getTokenNum(tokenEncoder tokenizer.Codec, text string) int {
- if text == "" {
- return 0
- }
- tkm, _ := tokenEncoder.Count(text)
- return tkm
- }
|