model-utils.ts 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. /**
  2. * Utility functions for working with language models and tokens
  3. */
  4. /**
  5. * Default maximum tokens for thinking-capable models when no specific value is provided
  6. */
  7. export const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384
  8. /**
  9. * Model information interface with properties used in token calculations
  10. */
  11. export interface ModelInfo {
  12. /**
  13. * Maximum number of tokens the model can process
  14. */
  15. maxTokens?: number
  16. /**
  17. * Whether the model supports thinking/reasoning capabilities
  18. */
  19. thinking?: boolean
  20. }
  21. /**
  22. * API configuration interface with token-related settings
  23. */
  24. export interface ApiConfig {
  25. /**
  26. * Maximum tokens to use for model responses
  27. */
  28. modelMaxTokens?: number
  29. }
  30. /**
  31. * Result of token distribution calculation
  32. */
  33. export interface TokenDistributionResult {
  34. /**
  35. * Percentage of context window used by current tokens (0-100)
  36. */
  37. currentPercent: number
  38. /**
  39. * Percentage of context window reserved for model output (0-100)
  40. */
  41. reservedPercent: number
  42. /**
  43. * Percentage of context window still available (0-100)
  44. */
  45. availablePercent: number
  46. /**
  47. * Number of tokens reserved for model output
  48. */
  49. reservedForOutput: number
  50. /**
  51. * Number of tokens still available in the context window
  52. */
  53. availableSize: number
  54. }
  55. /**
  56. * Determines the maximum tokens based on model configuration
  57. * If the model supports thinking, prioritize the API configuration's modelMaxTokens,
  58. * falling back to the model's own maxTokens. Otherwise, just use the model's maxTokens.
  59. *
  60. * @param modelInfo The model information object with properties like maxTokens and thinking
  61. * @param apiConfig The API configuration object with properties like modelMaxTokens
  62. * @returns The maximum tokens value or undefined if no valid value is available
  63. */
  64. export const getMaxTokensForModel = (
  65. modelInfo: ModelInfo | undefined,
  66. apiConfig: ApiConfig | undefined,
  67. ): number | undefined => {
  68. if (modelInfo?.thinking) {
  69. return apiConfig?.modelMaxTokens || DEFAULT_THINKING_MODEL_MAX_TOKENS
  70. }
  71. return modelInfo?.maxTokens
  72. }
  73. /**
  74. * Calculates distribution of tokens within the context window
  75. * This is used for visualizing the token distribution in the UI
  76. *
  77. * @param contextWindow The total size of the context window
  78. * @param contextTokens The number of tokens currently used
  79. * @param maxTokens Optional override for tokens reserved for model output (otherwise uses 20% of window)
  80. * @returns Distribution of tokens with percentages and raw numbers
  81. */
  82. export const calculateTokenDistribution = (
  83. contextWindow: number,
  84. contextTokens: number,
  85. maxTokens?: number,
  86. ): TokenDistributionResult => {
  87. // Handle potential invalid inputs with positive fallbacks
  88. const safeContextWindow = Math.max(0, contextWindow)
  89. const safeContextTokens = Math.max(0, contextTokens)
  90. // Get the actual max tokens value from the model
  91. // If maxTokens is valid, use it, otherwise reserve 20% of the context window as a default
  92. const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : Math.ceil(safeContextWindow * 0.2)
  93. // Calculate sizes directly without buffer display
  94. const availableSize = Math.max(0, safeContextWindow - safeContextTokens - reservedForOutput)
  95. // Calculate percentages - ensure they sum to exactly 100%
  96. // Use the ratio of each part to the total context window
  97. const total = safeContextTokens + reservedForOutput + availableSize
  98. // Safeguard against division by zero
  99. if (total <= 0) {
  100. return {
  101. currentPercent: 0,
  102. reservedPercent: 0,
  103. availablePercent: 0,
  104. reservedForOutput,
  105. availableSize,
  106. }
  107. }
  108. return {
  109. currentPercent: (safeContextTokens / total) * 100,
  110. reservedPercent: (reservedForOutput / total) * 100,
  111. availablePercent: (availableSize / total) * 100,
  112. reservedForOutput,
  113. availableSize,
  114. }
  115. }