Browse Source

Merge pull request #2344 from seefs001/feature/gemini-thinking-level

feat: gemini 3 thinking level gemini-3-pro-preview-high
Calcium-Ion 4 weeks ago
parent
commit
ffc45a756e

+ 1 - 0
README.en.md

@@ -238,6 +238,7 @@ docker run --name new-api -d --restart always \
 - `gemini-2.5-flash-nothinking` - Disable thinking mode
 - `gemini-2.5-pro-thinking` - Enable thinking mode
 - `gemini-2.5-pro-thinking-128` - Enable thinking mode with thinking budget of 128 tokens
+- You can also append `-low`, `-medium`, or `-high` to any Gemini model name to request the corresponding reasoning effort (no extra thinking-budget suffix needed).
 
 </details>
 

+ 1 - 0
README.fr.md

@@ -234,6 +234,7 @@ docker run --name new-api -d --restart always \
 - `gemini-2.5-flash-nothinking` - Désactiver le mode de pensée
 - `gemini-2.5-pro-thinking` - Activer le mode de pensée
 - `gemini-2.5-pro-thinking-128` - Activer le mode de pensée avec budget de pensée de 128 tokens
+- Vous pouvez également ajouter les suffixes `-low`, `-medium` ou `-high` aux modèles Gemini pour fixer le niveau d’effort de raisonnement (sans suffixe de budget supplémentaire).
 
 </details>
 

+ 1 - 0
README.ja.md

@@ -243,6 +243,7 @@ docker run --name new-api -d --restart always \
 - `gemini-2.5-flash-nothinking` - 思考モードを無効にする
 - `gemini-2.5-pro-thinking` - 思考モードを有効にする
 - `gemini-2.5-pro-thinking-128` - 思考モードを有効にし、思考予算を128トークンに設定する
+- Gemini モデル名の末尾に `-low` / `-medium` / `-high` を付けることで推論強度を直接指定できます(追加の思考予算サフィックスは不要です)。
 
 </details>
 

+ 1 - 0
README.md

@@ -239,6 +239,7 @@ docker run --name new-api -d --restart always \
 - `gemini-2.5-flash-nothinking` - 禁用思考模式
 - `gemini-2.5-pro-thinking` - 启用思考模式
 - `gemini-2.5-pro-thinking-128` - 启用思考模式,并设置思考预算为128tokens
+- 也可以直接在 Gemini 模型名称后追加 `-low` / `-medium` / `-high` 来控制思考力度(无需再设置思考预算后缀)
 
 </details>
 

+ 5 - 5
dto/gemini.go

@@ -142,7 +142,7 @@ type GeminiThinkingConfig struct {
 	IncludeThoughts bool `json:"includeThoughts,omitempty"`
 	ThinkingBudget  *int `json:"thinkingBudget,omitempty"`
 	// TODO Conflict with thinkingbudget.
-	ThinkingLevel json.RawMessage `json:"thinkingLevel,omitempty"`
+	ThinkingLevel string `json:"thinkingLevel,omitempty"`
 }
 
 // UnmarshalJSON allows GeminiThinkingConfig to accept both snake_case and camelCase fields.
@@ -150,9 +150,9 @@ func (c *GeminiThinkingConfig) UnmarshalJSON(data []byte) error {
 	type Alias GeminiThinkingConfig
 	var aux struct {
 		Alias
-		IncludeThoughtsSnake *bool           `json:"include_thoughts,omitempty"`
-		ThinkingBudgetSnake  *int            `json:"thinking_budget,omitempty"`
-		ThinkingLevelSnake   json.RawMessage `json:"thinking_level,omitempty"`
+		IncludeThoughtsSnake *bool  `json:"include_thoughts,omitempty"`
+		ThinkingBudgetSnake  *int   `json:"thinking_budget,omitempty"`
+		ThinkingLevelSnake   string `json:"thinking_level,omitempty"`
 	}
 
 	if err := common.Unmarshal(data, &aux); err != nil {
@@ -169,7 +169,7 @@ func (c *GeminiThinkingConfig) UnmarshalJSON(data []byte) error {
 		c.ThinkingBudget = aux.ThinkingBudgetSnake
 	}
 
-	if len(aux.ThinkingLevelSnake) > 0 {
+	if aux.ThinkingLevelSnake != "" {
 		c.ThinkingLevel = aux.ThinkingLevelSnake
 	}
 

+ 2 - 0
relay/channel/gemini/adaptor.go

@@ -137,6 +137,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
 		} else if strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
+		} else if baseModel, level := parseThinkingLevelSuffix(info.UpstreamModelName); level != "" {
+			info.UpstreamModelName = baseModel
 		}
 	}
 

+ 15 - 1
relay/channel/gemini/relay-gemini.go

@@ -19,8 +19,8 @@ import (
 	"github.com/QuantumNous/new-api/relay/helper"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting/model_setting"
+	"github.com/QuantumNous/new-api/setting/reasoning"
 	"github.com/QuantumNous/new-api/types"
-
 	"github.com/gin-gonic/gin"
 )
 
@@ -122,6 +122,14 @@ func clampThinkingBudgetByEffort(modelName string, effort string) int {
 	return clampThinkingBudget(modelName, maxBudget)
 }
 
+func parseThinkingLevelSuffix(modelName string) (string, string) {
+	base, level, ok := reasoning.TrimEffortSuffix(modelName)
+	if !ok {
+		return modelName, ""
+	}
+	return base, level
+}
+
 func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) {
 	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
 		modelName := info.UpstreamModelName
@@ -178,6 +186,12 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
 					ThinkingBudget: common.GetPointer(0),
 				}
 			}
+		} else if _, level := parseThinkingLevelSuffix(modelName); level != "" {
+			geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
+				IncludeThoughts: true,
+				ThinkingLevel:   level,
+			}
+			info.ReasoningEffort = level
 		}
 	}
 }

+ 1 - 1
setting/model_setting/global.go

@@ -32,7 +32,7 @@ func GetGlobalSettings() *GlobalSettings {
 	return &globalSettings
 }
 
-// ShouldPreserveThinkingSuffix 判断模型是否配置为保留 thinking/-nothinking 后缀
+// ShouldPreserveThinkingSuffix 判断模型是否配置为保留 thinking/-nothinking/-low/-high/-medium 后缀
 func ShouldPreserveThinkingSuffix(modelName string) bool {
 	target := strings.TrimSpace(modelName)
 	if target == "" {

+ 5 - 0
setting/ratio_setting/model_ratio.go

@@ -7,6 +7,7 @@ import (
 
 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/setting/operation_setting"
+	"github.com/QuantumNous/new-api/setting/reasoning"
 )
 
 // from songquanpeng/one-api
@@ -821,6 +822,10 @@ func FormatMatchingModelName(name string) string {
 		name = handleThinkingBudgetModel(name, "gemini-2.5-pro", "gemini-2.5-pro-thinking-*")
 	}
 
+	if base, _, ok := reasoning.TrimEffortSuffix(name); ok {
+		name = base
+	}
+
 	if strings.HasPrefix(name, "gpt-4-gizmo") {
 		name = "gpt-4-gizmo-*"
 	}

+ 20 - 0
setting/reasoning/suffix.go

@@ -0,0 +1,20 @@
+package reasoning
+
+import (
+	"strings"
+
+	"github.com/samber/lo"
+)
+
+var EffortSuffixes = []string{"-high", "-medium", "-low"}
+
+// TrimEffortSuffix -> modelName level(low) exists
+func TrimEffortSuffix(modelName string) (string, string, bool) {
+	suffix, found := lo.Find(EffortSuffixes, func(s string) bool {
+		return strings.HasSuffix(modelName, s)
+	})
+	if !found {
+		return modelName, "", false
+	}
+	return strings.TrimSuffix(modelName, suffix), strings.TrimPrefix(suffix, "-"), true
+}

+ 2 - 2
web/src/i18n/locales/en.json

@@ -1996,7 +1996,7 @@
     "适用于个人使用的场景,不需要设置模型价格": "Suitable for personal use, no need to set model price.",
     "适用于为多个用户提供服务的场景": "Suitable for scenarios where multiple users are provided.",
     "适用于展示系统功能的场景,提供基础功能演示": "Suitable for scenarios where the system functions are displayed, providing basic feature demonstrations.",
-    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Adapt to -thinking, -thinking-budget number, and -nothinking suffixes",
+    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Adapt to -thinking, -thinking-budget number, -nothinking, and -low/-medium/-high suffixes",
     "选择充值套餐": "Choose a top-up package",
     "选择充值额度": "Select recharge amount",
     "选择分组": "Select group",
@@ -2178,4 +2178,4 @@
     "默认测试模型": "Default Test Model",
     "默认补全倍率": "Default completion ratio"
   }
-}
+}

+ 2 - 2
web/src/i18n/locales/fr.json

@@ -2006,7 +2006,7 @@
     "适用于个人使用的场景,不需要设置模型价格": "Adapté à un usage personnel, pas besoin de définir le prix du modèle.",
     "适用于为多个用户提供服务的场景": "Adapté aux scénarios où plusieurs utilisateurs sont fournis.",
     "适用于展示系统功能的场景,提供基础功能演示": "Adapté aux scénarios où les fonctions du système sont affichées, fournissant des démonstrations de fonctionnalités de base.",
-    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Adapter les suffixes -thinking, -thinking-budget et -nothinking",
+    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Adapter les suffixes -thinking, -thinking-budget, -nothinking et -low/-medium/-high",
     "选择充值额度": "Sélectionner le montant de la recharge",
     "选择分组": "Sélectionner un groupe",
     "选择同步来源": "Sélectionner la source de synchronisation",
@@ -2227,4 +2227,4 @@
     "随机种子 (留空为随机)": "Graine aléatoire (laisser vide pour aléatoire)",
     "默认补全倍率": "Taux de complétion par défaut"
   }
-}
+}

+ 2 - 2
web/src/i18n/locales/ja.json

@@ -1903,7 +1903,7 @@
     "适用于个人使用的场景,不需要设置模型价格": "個人利用のシナリオに適しており、モデル料金の設定は不要です",
     "适用于为多个用户提供服务的场景": "複数のユーザーにサービスを提供するシナリオに適しています",
     "适用于展示系统功能的场景,提供基础功能演示": "システムの機能を紹介するシナリオに適しており、基本的な機能のデモンストレーションを提供します",
-    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "-thinking、-thinking-予算数値、-nothinkingサフィックスに対応",
+    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "-thinking、-thinking-予算数値、-nothinking、および -low/-medium/-high サフィックスに対応",
     "选择充值额度": "チャージ額を選択",
     "选择分组": "グループを選択",
     "选择同步来源": "同期ソースを選択",
@@ -2126,4 +2126,4 @@
     "可选,用于复现结果": "オプション、結果の再現用",
     "随机种子 (留空为随机)": "ランダムシード(空欄でランダム)"
   }
-}
+}

+ 2 - 2
web/src/i18n/locales/ru.json

@@ -2017,7 +2017,7 @@
     "适用于个人使用的场景,不需要设置模型价格": "Подходит для сценариев личного использования, не требует установки цен на модели",
     "适用于为多个用户提供服务的场景": "Подходит для сценариев предоставления услуг нескольким пользователям",
     "适用于展示系统功能的场景,提供基础功能演示": "Подходит для сценариев демонстрации системных функций, предоставляет демонстрацию базовых функций",
-    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Адаптация суффиксов -thinking, -thinking-бюджетные-цифры и -nothinking",
+    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Адаптация суффиксов -thinking, -thinking-бюджетные-цифры, -nothinking и -low/-medium/-high",
     "选择充值额度": "Выберите сумму пополнения",
     "选择分组": "Выберите группу",
     "选择同步来源": "Выберите источник синхронизации",
@@ -2237,4 +2237,4 @@
     "可选,用于复现结果": "Необязательно, для воспроизводимых результатов",
     "随机种子 (留空为随机)": "Случайное зерно (оставьте пустым для случайного)"
   }
-}
+}

+ 2 - 2
web/src/i18n/locales/vi.json

@@ -2197,7 +2197,7 @@
     "适用于个人使用的场景,不需要设置模型价格": "Phù hợp cho mục đích sử dụng cá nhân, không cần đặt giá mô hình.",
     "适用于为多个用户提供服务的场景": "Phù hợp cho các kịch bản cung cấp dịch vụ cho nhiều người dùng.",
     "适用于展示系统功能的场景,提供基础功能演示": "Phù hợp cho các kịch bản hiển thị chức năng hệ thống, cung cấp bản demo chức năng cơ bản.",
-    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Thích ứng với các hậu tố -thinking, -thinking-budget number và -nothinking",
+    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Thích ứng với các hậu tố -thinking, -thinking-budget number, -nothinking và -low/-medium/-high",
     "选择充值额度": "Chọn hạn ngạch nạp tiền",
     "选择同步来源": "Chọn nguồn đồng bộ",
     "选择同步渠道": "Chọn kênh đồng bộ",
@@ -2737,4 +2737,4 @@
     "可选,用于复现结果": "Tùy chọn, để tái tạo kết quả",
     "随机种子 (留空为随机)": "Hạt giống ngẫu nhiên (để trống cho ngẫu nhiên)"
   }
-}
+}

+ 2 - 2
web/src/i18n/locales/zh.json

@@ -1984,7 +1984,7 @@
     "适用于个人使用的场景,不需要设置模型价格": "适用于个人使用的场景,不需要设置模型价格",
     "适用于为多个用户提供服务的场景": "适用于为多个用户提供服务的场景",
     "适用于展示系统功能的场景,提供基础功能演示": "适用于展示系统功能的场景,提供基础功能演示",
-    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀",
+    "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "适配 -thinking、-thinking-预算数字、-nothinking 以及 -low/-medium/-high 后缀",
     "选择充值额度": "选择充值额度",
     "选择分组": "选择分组",
     "选择同步来源": "选择同步来源",
@@ -2204,4 +2204,4 @@
     "可选,用于复现结果": "可选,用于复现结果",
     "随机种子 (留空为随机)": "随机种子 (留空为随机)"
   }
-}
+}