tokenizer.go 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. package service
  2. import (
  3. "sync"
  4. "github.com/QuantumNous/new-api/common"
  5. "github.com/tiktoken-go/tokenizer"
  6. "github.com/tiktoken-go/tokenizer/codec"
  7. )
  8. // tokenEncoderMap won't grow after initialization
  9. var defaultTokenEncoder tokenizer.Codec
  10. // tokenEncoderMap is used to store token encoders for different models
  11. var tokenEncoderMap = make(map[string]tokenizer.Codec)
  12. // tokenEncoderMutex protects tokenEncoderMap for concurrent access
  13. var tokenEncoderMutex sync.RWMutex
  14. func InitTokenEncoders() {
  15. common.SysLog("initializing token encoders")
  16. defaultTokenEncoder = codec.NewCl100kBase()
  17. common.SysLog("token encoders initialized")
  18. }
  19. func getTokenEncoder(model string) tokenizer.Codec {
  20. // First, try to get the encoder from cache with read lock
  21. tokenEncoderMutex.RLock()
  22. if encoder, exists := tokenEncoderMap[model]; exists {
  23. tokenEncoderMutex.RUnlock()
  24. return encoder
  25. }
  26. tokenEncoderMutex.RUnlock()
  27. // If not in cache, create new encoder with write lock
  28. tokenEncoderMutex.Lock()
  29. defer tokenEncoderMutex.Unlock()
  30. // Double-check if another goroutine already created the encoder
  31. if encoder, exists := tokenEncoderMap[model]; exists {
  32. return encoder
  33. }
  34. // Create new encoder
  35. modelCodec, err := tokenizer.ForModel(tokenizer.Model(model))
  36. if err != nil {
  37. // Cache the default encoder for this model to avoid repeated failures
  38. tokenEncoderMap[model] = defaultTokenEncoder
  39. return defaultTokenEncoder
  40. }
  41. // Cache the new encoder
  42. tokenEncoderMap[model] = modelCodec
  43. return modelCodec
  44. }
  45. func getTokenNum(tokenEncoder tokenizer.Codec, text string) int {
  46. if text == "" {
  47. return 0
  48. }
  49. tkm, _ := tokenEncoder.Count(text)
  50. return tkm
  51. }