2 дней назад · 654bb10b45
--- a/relay/channel/gemini/adaptor.go
+++ b/relay/channel/gemini/adaptor.go
@@ -13,6 +13,7 @@ import (
 
				 	relaycommon "github.com/QuantumNous/new-api/relay/common"
			
 
				 	"github.com/QuantumNous/new-api/relay/constant"
			
 
				 	"github.com/QuantumNous/new-api/setting/model_setting"
			
 
				+	"github.com/QuantumNous/new-api/setting/reasoning"
			
 
				 	"github.com/QuantumNous/new-api/types"
			
 
				 
			
 
				 	"github.com/gin-gonic/gin"
			
@@ -137,7 +138,7 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 
				 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
			
 
				 		} else if strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
			
 
				 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
			
 
				-		} else if baseModel, level := parseThinkingLevelSuffix(info.UpstreamModelName); level != "" {
			
 
				+		} else if baseModel, level, ok := reasoning.TrimEffortSuffix(info.UpstreamModelName); ok && level != "" {
			
 
				 			info.UpstreamModelName = baseModel
			
 
				 		}
			
 
				 	}
			
--- a/relay/channel/gemini/relay-gemini.go
+++ b/relay/channel/gemini/relay-gemini.go
@@ -98,6 +98,7 @@ func clampThinkingBudget(modelName string, budget int) int {
 
				 // "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens)
			
 
				 // "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens)
			
 
				 // "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens)
			
 
				+// "effort": "minimal" - Allocates a minimal portion of tokens (approximately 5% of max_tokens)
			
 
				 func clampThinkingBudgetByEffort(modelName string, effort string) int {
			
 
				 	isNew25Pro := isNew25ProModel(modelName)
			
 
				 	is25FlashLite := is25FlashLiteModel(modelName)
			
@@ -118,18 +119,12 @@ func clampThinkingBudgetByEffort(modelName string, effort string) int {
 
				 		maxBudget = maxBudget * 50 / 100
			
 
				 	case "low":
			
 
				 		maxBudget = maxBudget * 20 / 100
			
 
				+	case "minimal":
			
 
				+		maxBudget = maxBudget * 5 / 100
			
 
				 	}
			
 
				 	return clampThinkingBudget(modelName, maxBudget)
			
 
				 }
			
 
				 
			
 
				-func parseThinkingLevelSuffix(modelName string) (string, string) {
			
 
				-	base, level, ok := reasoning.TrimEffortSuffix(modelName)
			
 
				-	if !ok {
			
 
				-		return modelName, ""
			
 
				-	}
			
 
				-	return base, level
			
 
				-}
			
 
				-
			
 
				 func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) {
			
 
				 	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
			
 
				 		modelName := info.UpstreamModelName
			
@@ -186,7 +181,7 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
 
				 					ThinkingBudget: common.GetPointer(0),
			
 
				 				}
			
 
				 			}
			
 
				-		} else if _, level := parseThinkingLevelSuffix(modelName); level != "" {
			
 
				+		} else if _, level, ok := reasoning.TrimEffortSuffix(info.UpstreamModelName); ok && level != "" {
			
 
				 			geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
			
 
				 				IncludeThoughts: true,
			
 
				 				ThinkingLevel:   level,
			
--- a/setting/reasoning/suffix.go
+++ b/setting/reasoning/suffix.go
@@ -6,7 +6,7 @@ import (
 
				 	"github.com/samber/lo"
			
 
				 )
			
 
				 
			
 
				-var EffortSuffixes = []string{"-high", "-medium", "-low"}
			
 
				+var EffortSuffixes = []string{"-high", "-medium", "-low", "-minimal"}
			
 
				 
			
 
				 // TrimEffortSuffix -> modelName level(low) exists
			
 
				 func TrimEffortSuffix(modelName string) (string, string, bool) {