2 місяців тому · dd1cac3f2e
--- a/relay/channel/minimax/adaptor.go
+++ b/relay/channel/minimax/adaptor.go
@@ -0,0 +1,130 @@
 
				+package minimax
			
 
				+
			
 
				+import (
			
 
				+	"bytes"
			
 
				+	"encoding/json"
			
 
				+	"errors"
			
 
				+	"fmt"
			
 
				+	"io"
			
 
				+	"net/http"
			
 
				+
			
 
				+	"github.com/QuantumNous/new-api/dto"
			
 
				+	"github.com/QuantumNous/new-api/relay/channel"
			
 
				+	relaycommon "github.com/QuantumNous/new-api/relay/common"
			
 
				+	"github.com/QuantumNous/new-api/relay/constant"
			
 
				+	"github.com/QuantumNous/new-api/types"
			
 
				+
			
 
				+	"github.com/gin-gonic/gin"
			
 
				+)
			
 
				+
			
 
				+type Adaptor struct {
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) {
			
 
				+	return nil, errors.New("not implemented")
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) {
			
 
				+	return nil, errors.New("not implemented")
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
			
 
				+	if info.RelayMode != constant.RelayModeAudioSpeech {
			
 
				+		return nil, errors.New("unsupported audio relay mode")
			
 
				+	}
			
 
				+
			
 
				+	voiceID := mapVoiceType(request.Voice)
			
 
				+	speed := request.Speed
			
 
				+	outputFormat := mapOutputFormat(request.ResponseFormat)
			
 
				+
			
 
				+	c.Set("response_format", outputFormat)
			
 
				+
			
 
				+	minimaxRequest := MiniMaxTTSRequest{
			
 
				+		Model:        getTTSModel(info.OriginModelName),
			
 
				+		Text:         request.Input,
			
 
				+		VoiceID:      voiceID,
			
 
				+		Speed:        speed,
			
 
				+		OutputFormat: outputFormat,
			
 
				+	}
			
 
				+
			
 
				+	// 同步扩展字段的厂商自定义metadata
			
 
				+	if len(request.Metadata) > 0 {
			
 
				+		if err := json.Unmarshal(request.Metadata, &minimaxRequest); err != nil {
			
 
				+			return nil, fmt.Errorf("error unmarshalling metadata to minimax request: %w", err)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	jsonData, err := json.Marshal(minimaxRequest)
			
 
				+	if err != nil {
			
 
				+		return nil, fmt.Errorf("error marshalling minimax request: %w", err)
			
 
				+	}
			
 
				+
			
 
				+	// Debug: log the request structure
			
 
				+	fmt.Printf("MiniMax TTS Request: %s\n", string(jsonData))
			
 
				+
			
 
				+	return bytes.NewReader(jsonData), nil
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
			
 
				+	return request, nil
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
			
 
				+	return GetRequestURL(info)
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
			
 
				+	channel.SetupApiRequestHeader(info, c, req)
			
 
				+
			
 
				+	if info.RelayMode == constant.RelayModeAudioSpeech {
			
 
				+		req.Set("Content-Type", "application/json")
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				+	req.Set("Authorization", "Bearer "+info.ApiKey)
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
			
 
				+	if request == nil {
			
 
				+		return nil, errors.New("request is nil")
			
 
				+	}
			
 
				+	return request, nil
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
			
 
				+	return nil, nil
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
			
 
				+	return request, nil
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
			
 
				+	return nil, errors.New("not implemented")
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
			
 
				+	return channel.DoApiRequest(a, c, info, requestBody)
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
			
 
				+	if info.RelayMode == constant.RelayModeAudioSpeech {
			
 
				+		return handleTTSResponse(c, resp, info)
			
 
				+	}
			
 
				+
			
 
				+	// For chat completions, just pass through the response
			
 
				+	// MiniMax API is compatible with OpenAI format
			
 
				+	return handleChatCompletionResponse(c, resp, info)
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) GetModelList() []string {
			
 
				+	return ModelList
			
 
				+}
			
 
				+
			
 
				+func (a *Adaptor) GetChannelName() string {
			
 
				+	return ChannelName
			
 
				+}
			
--- a/relay/channel/minimax/constants.go
+++ b/relay/channel/minimax/constants.go
@@ -8,6 +8,12 @@ var ModelList = []string{
 
				 	"abab6-chat",
			
 
				 	"abab5.5-chat",
			
 
				 	"abab5.5s-chat",
			
 
				+	"speech-2.5-hd-preview",
			
 
				+	"speech-2.5-turbo-preview",
			
 
				+	"speech-02-hd",
			
 
				+	"speech-02-turbo",
			
 
				+	"speech-01-hd",
			
 
				+	"speech-01-turbo",
			
 
				 }
			
 
				 
			
 
				 var ChannelName = "minimax"
			
--- a/relay/channel/minimax/relay-minimax.go
+++ b/relay/channel/minimax/relay-minimax.go
@@ -3,9 +3,23 @@ package minimax
 
				 import (
			
 
				 	"fmt"
			
 
				 
			
 
				+	channelconstant "github.com/QuantumNous/new-api/constant"
			
 
				 	relaycommon "github.com/QuantumNous/new-api/relay/common"
			
 
				+	"github.com/QuantumNous/new-api/relay/constant"
			
 
				 )
			
 
				 
			
 
				 func GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
			
 
				-	return fmt.Sprintf("%s/v1/text/chatcompletion_v2", info.ChannelBaseUrl), nil
			
 
				+	baseUrl := info.ChannelBaseUrl
			
 
				+	if baseUrl == "" {
			
 
				+		baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeMiniMax]
			
 
				+	}
			
 
				+
			
 
				+	switch info.RelayMode {
			
 
				+	case constant.RelayModeChatCompletions:
			
 
				+		return fmt.Sprintf("%s/v1/text/chatcompletion_v2", baseUrl), nil
			
 
				+	case constant.RelayModeAudioSpeech:
			
 
				+		return fmt.Sprintf("%s/v1/t2a_v2", baseUrl), nil
			
 
				+	default:
			
 
				+		return "", fmt.Errorf("unsupported relay mode: %d", info.RelayMode)
			
 
				+	}
			
 
				 }
			
--- a/relay/channel/minimax/tts.go
+++ b/relay/channel/minimax/tts.go
@@ -0,0 +1,212 @@
 
				+package minimax
			
 
				+
			
 
				+import (
			
 
				+	"encoding/base64"
			
 
				+	"encoding/json"
			
 
				+	"errors"
			
 
				+	"io"
			
 
				+	"net/http"
			
 
				+
			
 
				+	"github.com/QuantumNous/new-api/dto"
			
 
				+	relaycommon "github.com/QuantumNous/new-api/relay/common"
			
 
				+	"github.com/QuantumNous/new-api/types"
			
 
				+	"github.com/gin-gonic/gin"
			
 
				+)
			
 
				+
			
 
				+type MiniMaxTTSRequest struct {
			
 
				+	Model           string  `json:"model"`
			
 
				+	Text            string  `json:"text"`
			
 
				+	VoiceID         string  `json:"voice_id"`
			
 
				+	Speed           float64 `json:"speed,omitempty"`
			
 
				+	Vol             float64 `json:"vol,omitempty"`
			
 
				+	Pitch           int     `json:"pitch,omitempty"`
			
 
				+	AudioSampleRate int     `json:"audio_sample_rate,omitempty"`
			
 
				+	OutputFormat    string  `json:"output_format,omitempty"`
			
 
				+}
			
 
				+
			
 
				+type MiniMaxTTSResponse struct {
			
 
				+	Created int              `json:"created"`
			
 
				+	Data    []MiniMaxTTSData `json:"data"`
			
 
				+	ID      string           `json:"id"`
			
 
				+	Model   string           `json:"model"`
			
 
				+	Object  string           `json:"object"`
			
 
				+	Usage   MiniMaxTTSUsage  `json:"usage"`
			
 
				+}
			
 
				+
			
 
				+type MiniMaxTTSData struct {
			
 
				+	Index        int    `json:"index"`
			
 
				+	Audio        string `json:"audio"`
			
 
				+	Text         string `json:"text"`
			
 
				+	FinishReason string `json:"finish_reason"`
			
 
				+}
			
 
				+
			
 
				+type MiniMaxTTSUsage struct {
			
 
				+	TotalTokens int `json:"total_tokens"`
			
 
				+}
			
 
				+
			
 
				+type MiniMaxTTSErrorResponse struct {
			
 
				+	Error MiniMaxTTSError `json:"error"`
			
 
				+}
			
 
				+
			
 
				+type MiniMaxTTSError struct {
			
 
				+	Code    string `json:"code"`
			
 
				+	Message string `json:"message"`
			
 
				+	Type    string `json:"type"`
			
 
				+}
			
 
				+
			
 
				+// OpenAI voice to MiniMax voice_id mapping
			
 
				+var openAIToMiniMaxVoiceMap = map[string]string{
			
 
				+	"alloy":   "male-qn-qingse",
			
 
				+	"echo":    "male-qn-jingying",
			
 
				+	"fable":   "female-shaonv",
			
 
				+	"onyx":    "male-qn-badao",
			
 
				+	"nova":    "female-shaonv-jingpin",
			
 
				+	"shimmer": "female-yujie",
			
 
				+	// Add some standard MiniMax voice IDs
			
 
				+	"voice-1": "male-qn-qingse",
			
 
				+	"voice-2": "female-shaonv",
			
 
				+}
			
 
				+
			
 
				+// OpenAI response format to MiniMax output format mapping
			
 
				+var responseFormatToOutputFormatMap = map[string]string{
			
 
				+	"mp3":  "mp3",
			
 
				+	"opus": "mp3",
			
 
				+	"aac":  "aac",
			
 
				+	"flac": "flac",
			
 
				+	"wav":  "wav",
			
 
				+	"pcm":  "pcm",
			
 
				+}
			
 
				+
			
 
				+// TTS model mapping - MiniMax uses speech-01 or speech-01-turbo
			
 
				+var modelToTTSModelMap = map[string]string{
			
 
				+	"speech-01":       "speech-01",
			
 
				+	"speech-01-turbo": "speech-01-turbo",
			
 
				+	"tts-1":           "speech-01-turbo",
			
 
				+	"tts-1-hd":        "speech-01",
			
 
				+}
			
 
				+
			
 
				+func mapVoiceType(openAIVoice string) string {
			
 
				+	if voice, ok := openAIToMiniMaxVoiceMap[openAIVoice]; ok {
			
 
				+		return voice
			
 
				+	}
			
 
				+	return "female-shaonv" // default voice
			
 
				+}
			
 
				+
			
 
				+func mapOutputFormat(responseFormat string) string {
			
 
				+	if format, ok := responseFormatToOutputFormatMap[responseFormat]; ok {
			
 
				+		return format
			
 
				+	}
			
 
				+	return "mp3" // default format
			
 
				+}
			
 
				+
			
 
				+func getTTSModel(modelName string) string {
			
 
				+	if ttsModel, ok := modelToTTSModelMap[modelName]; ok {
			
 
				+		return ttsModel
			
 
				+	}
			
 
				+	return "speech-01-turbo" // default model
			
 
				+}
			
 
				+
			
 
				+func getContentTypeByFormat(format string) string {
			
 
				+	contentTypeMap := map[string]string{
			
 
				+		"mp3":  "audio/mpeg",
			
 
				+		"wav":  "audio/wav",
			
 
				+		"flac": "audio/flac",
			
 
				+		"aac":  "audio/aac",
			
 
				+		"pcm":  "audio/pcm",
			
 
				+	}
			
 
				+	if ct, ok := contentTypeMap[format]; ok {
			
 
				+		return ct
			
 
				+	}
			
 
				+	return "audio/mpeg" // default to mp3
			
 
				+}
			
 
				+
			
 
				+func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
			
 
				+	body, readErr := io.ReadAll(resp.Body)
			
 
				+	if readErr != nil {
			
 
				+		return nil, types.NewErrorWithStatusCode(
			
 
				+			errors.New("failed to read minimax response"),
			
 
				+			types.ErrorCodeReadResponseBodyFailed,
			
 
				+			http.StatusInternalServerError,
			
 
				+		)
			
 
				+	}
			
 
				+	defer resp.Body.Close()
			
 
				+
			
 
				+	// First try to parse as error response
			
 
				+	var errorResp MiniMaxTTSErrorResponse
			
 
				+	if unmarshalErr := json.Unmarshal(body, &errorResp); unmarshalErr == nil && errorResp.Error.Code != "" {
			
 
				+		return nil, types.NewErrorWithStatusCode(
			
 
				+			errors.New(errorResp.Error.Message),
			
 
				+			types.ErrorCodeBadResponse,
			
 
				+			http.StatusBadRequest,
			
 
				+		)
			
 
				+	}
			
 
				+
			
 
				+	// Parse as successful response
			
 
				+	var minimaxResp MiniMaxTTSResponse
			
 
				+	if unmarshalErr := json.Unmarshal(body, &minimaxResp); unmarshalErr != nil {
			
 
				+		return nil, types.NewErrorWithStatusCode(
			
 
				+			errors.New("failed to parse minimax response"),
			
 
				+			types.ErrorCodeBadResponseBody,
			
 
				+			http.StatusInternalServerError,
			
 
				+		)
			
 
				+	}
			
 
				+
			
 
				+	// Check if we have audio data
			
 
				+	if len(minimaxResp.Data) == 0 || minimaxResp.Data[0].Audio == "" {
			
 
				+		return nil, types.NewErrorWithStatusCode(
			
 
				+			errors.New("no audio data in response"),
			
 
				+			types.ErrorCodeBadResponse,
			
 
				+			http.StatusBadRequest,
			
 
				+		)
			
 
				+	}
			
 
				+
			
 
				+	// Decode base64 audio data
			
 
				+	audioData, decodeErr := base64.StdEncoding.DecodeString(minimaxResp.Data[0].Audio)
			
 
				+	if decodeErr != nil {
			
 
				+		return nil, types.NewErrorWithStatusCode(
			
 
				+			errors.New("failed to decode audio data"),
			
 
				+			types.ErrorCodeBadResponseBody,
			
 
				+			http.StatusInternalServerError,
			
 
				+		)
			
 
				+	}
			
 
				+
			
 
				+	// Get output format from context or default to mp3
			
 
				+	outputFormat := c.GetString("response_format")
			
 
				+	if outputFormat == "" {
			
 
				+		outputFormat = "mp3"
			
 
				+	}
			
 
				+
			
 
				+	contentType := getContentTypeByFormat(outputFormat)
			
 
				+	c.Header("Content-Type", contentType)
			
 
				+	c.Data(http.StatusOK, contentType, audioData)
			
 
				+
			
 
				+	usage = &dto.Usage{
			
 
				+		PromptTokens:     info.PromptTokens,
			
 
				+		CompletionTokens: 0,
			
 
				+		TotalTokens:      minimaxResp.Usage.TotalTokens,
			
 
				+	}
			
 
				+
			
 
				+	return usage, nil
			
 
				+}
			
 
				+
			
 
				+func handleChatCompletionResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
			
 
				+	body, readErr := io.ReadAll(resp.Body)
			
 
				+	if readErr != nil {
			
 
				+		return nil, types.NewErrorWithStatusCode(
			
 
				+			errors.New("failed to read minimax response"),
			
 
				+			types.ErrorCodeReadResponseBodyFailed,
			
 
				+			http.StatusInternalServerError,
			
 
				+		)
			
 
				+	}
			
 
				+	defer resp.Body.Close()
			
 
				+
			
 
				+	// Set response headers
			
 
				+	for key, values := range resp.Header {
			
 
				+		for _, value := range values {
			
 
				+			c.Header(key, value)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	c.Data(resp.StatusCode, "application/json", body)
			
 
				+	return nil, nil
			
 
				+}