|
|
@@ -98,6 +98,7 @@ func clampThinkingBudget(modelName string, budget int) int {
|
|
|
// "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens)
|
|
|
// "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens)
|
|
|
// "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens)
|
|
|
+// "effort": "minimal" - Allocates a minimal portion of tokens (approximately 5% of max_tokens)
|
|
|
func clampThinkingBudgetByEffort(modelName string, effort string) int {
|
|
|
isNew25Pro := isNew25ProModel(modelName)
|
|
|
is25FlashLite := is25FlashLiteModel(modelName)
|
|
|
@@ -118,18 +119,12 @@ func clampThinkingBudgetByEffort(modelName string, effort string) int {
|
|
|
maxBudget = maxBudget * 50 / 100
|
|
|
case "low":
|
|
|
maxBudget = maxBudget * 20 / 100
|
|
|
+ case "minimal":
|
|
|
+ maxBudget = maxBudget * 5 / 100
|
|
|
}
|
|
|
return clampThinkingBudget(modelName, maxBudget)
|
|
|
}
|
|
|
|
|
|
-func parseThinkingLevelSuffix(modelName string) (string, string) {
|
|
|
- base, level, ok := reasoning.TrimEffortSuffix(modelName)
|
|
|
- if !ok {
|
|
|
- return modelName, ""
|
|
|
- }
|
|
|
- return base, level
|
|
|
-}
|
|
|
-
|
|
|
func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) {
|
|
|
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
|
|
modelName := info.UpstreamModelName
|
|
|
@@ -186,7 +181,7 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
|
|
|
ThinkingBudget: common.GetPointer(0),
|
|
|
}
|
|
|
}
|
|
|
- } else if _, level := parseThinkingLevelSuffix(modelName); level != "" {
|
|
|
+ } else if _, level, ok := reasoning.TrimEffortSuffix(info.UpstreamModelName); ok && level != "" {
|
|
|
geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
|
|
|
IncludeThoughts: true,
|
|
|
ThinkingLevel: level,
|