ソースを参照

feat: support adaptive thinking

Papersnake 1 週間 前
コミット
c9f5de7048

+ 1 - 0
relay/channel/aws/dto.go

@@ -26,6 +26,7 @@ type AwsClaudeRequest struct {
 	Tools            any                 `json:"tools,omitempty"`
 	ToolChoice       any                 `json:"tool_choice,omitempty"`
 	Thinking         *dto.Thinking       `json:"thinking,omitempty"`
+	OutputConfig     json.RawMessage     `json:"output_config,omitempty"`
 }
 
 func formatRequest(requestBody io.Reader, requestHeader http.Header) (*AwsClaudeRequest, error) {

+ 4 - 1
relay/channel/claude/constants.go

@@ -21,7 +21,10 @@ var ModelList = []string{
 	"claude-opus-4-5-20251101",
 	"claude-opus-4-5-20251101-thinking",
 	"claude-opus-4-6",
-	"claude-opus-4-6-thinking",
+	"claude-opus-4-6-max",
+	"claude-opus-4-6-high",
+	"claude-opus-4-6-medium",
+	"claude-opus-4-6-low",
 }
 
 var ChannelName = "claude"

+ 11 - 1
relay/channel/claude/relay-claude.go

@@ -17,6 +17,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/reasonmap"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting/model_setting"
+	"github.com/QuantumNous/new-api/setting/reasoning"
 	"github.com/QuantumNous/new-api/types"
 
 	"github.com/gin-gonic/gin"
@@ -141,7 +142,16 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
 		claudeRequest.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(textRequest.Model))
 	}
 
-	if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
+	if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" &&
+		strings.HasPrefix(textRequest.Model, "claude-opus-4-6") {
+		claudeRequest.Model = baseModel
+		claudeRequest.Thinking = &dto.Thinking{
+			Type: "adaptive",
+		}
+		claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
+		claudeRequest.TopP = 0
+		claudeRequest.Temperature = common.GetPointer[float64](1.0)
+	} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
 		strings.HasSuffix(textRequest.Model, "-thinking") {
 
 		// 因为BudgetTokens 必须大于1024

+ 4 - 0
relay/channel/vertex/dto.go

@@ -1,6 +1,8 @@
 package vertex
 
 import (
+	"encoding/json"
+
 	"github.com/QuantumNous/new-api/dto"
 )
 
@@ -17,6 +19,7 @@ type VertexAIClaudeRequest struct {
 	Tools            any                 `json:"tools,omitempty"`
 	ToolChoice       any                 `json:"tool_choice,omitempty"`
 	Thinking         *dto.Thinking       `json:"thinking,omitempty"`
+	OutputConfig     json.RawMessage     `json:"output_config,omitempty"`
 }
 
 func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest {
@@ -33,5 +36,6 @@ func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest
 		Tools:            req.Tools,
 		ToolChoice:       req.ToolChoice,
 		Thinking:         req.Thinking,
+		OutputConfig:     req.OutputConfig,
 	}
 }

+ 11 - 1
relay/claude_handler.go

@@ -2,6 +2,7 @@ package relay
 
 import (
 	"bytes"
+	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
@@ -14,6 +15,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/helper"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting/model_setting"
+	"github.com/QuantumNous/new-api/setting/reasoning"
 	"github.com/QuantumNous/new-api/types"
 
 	"github.com/gin-gonic/gin"
@@ -49,7 +51,15 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 		request.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(request.Model))
 	}
 
-	if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
+	if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" &&
+		strings.HasPrefix(request.Model, "claude-opus-4-6") {
+		request.Model = baseModel
+		request.Thinking = &dto.Thinking{
+			Type: "adaptive",
+		}
+		request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
+		info.UpstreamModelName = request.Model
+	} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
 		strings.HasSuffix(request.Model, "-thinking") {
 		if request.Thinking == nil {
 			// 因为BudgetTokens 必须大于1024

+ 8 - 0
setting/ratio_setting/cache_ratio.go

@@ -62,6 +62,10 @@ var defaultCacheRatio = map[string]float64{
 	"claude-opus-4-5-20251101-thinking":   0.1,
 	"claude-opus-4-6":                     0.1,
 	"claude-opus-4-6-thinking":            0.1,
+	"claude-opus-4-6-max":                 0.1,
+	"claude-opus-4-6-high":                0.1,
+	"claude-opus-4-6-medium":              0.1,
+	"claude-opus-4-6-low":                 0.1,
 }
 
 var defaultCreateCacheRatio = map[string]float64{
@@ -86,6 +90,10 @@ var defaultCreateCacheRatio = map[string]float64{
 	"claude-opus-4-5-20251101-thinking":   1.25,
 	"claude-opus-4-6":                     1.25,
 	"claude-opus-4-6-thinking":            1.25,
+	"claude-opus-4-6-max":                 1.25,
+	"claude-opus-4-6-high":                1.25,
+	"claude-opus-4-6-medium":              1.25,
+	"claude-opus-4-6-low":                 1.25,
 }
 
 //var defaultCreateCacheRatio = map[string]float64{}

+ 4 - 0
setting/ratio_setting/model_ratio.go

@@ -143,6 +143,10 @@ var defaultModelRatio = map[string]float64{
 	"claude-sonnet-4-5-20250929":                1.5,
 	"claude-opus-4-5-20251101":                  2.5,
 	"claude-opus-4-6":                           2.5,
+	"claude-opus-4-6-max":                       2.5,
+	"claude-opus-4-6-high":                      2.5,
+	"claude-opus-4-6-medium":                    2.5,
+	"claude-opus-4-6-low":                       2.5,
 	"claude-3-opus-20240229":                    7.5, // $15 / 1M tokens
 	"claude-opus-4-20250514":                    7.5,
 	"claude-opus-4-1-20250805":                  7.5,

+ 1 - 1
setting/reasoning/suffix.go

@@ -6,7 +6,7 @@ import (
 	"github.com/samber/lo"
 )
 
-var EffortSuffixes = []string{"-high", "-medium", "-low", "-minimal"}
+var EffortSuffixes = []string{"-max", "-high", "-medium", "-low", "-minimal"}
 
 // TrimEffortSuffix -> modelName level(low) exists
 func TrimEffortSuffix(modelName string) (string, string, bool) {