|
|
@@ -0,0 +1,212 @@
|
|
|
+package minimax
|
|
|
+
|
|
|
+import (
|
|
|
+ "encoding/base64"
|
|
|
+ "encoding/json"
|
|
|
+ "errors"
|
|
|
+ "io"
|
|
|
+ "net/http"
|
|
|
+
|
|
|
+ "github.com/QuantumNous/new-api/dto"
|
|
|
+ relaycommon "github.com/QuantumNous/new-api/relay/common"
|
|
|
+ "github.com/QuantumNous/new-api/types"
|
|
|
+ "github.com/gin-gonic/gin"
|
|
|
+)
|
|
|
+
|
|
|
+type MiniMaxTTSRequest struct {
|
|
|
+ Model string `json:"model"`
|
|
|
+ Text string `json:"text"`
|
|
|
+ VoiceID string `json:"voice_id"`
|
|
|
+ Speed float64 `json:"speed,omitempty"`
|
|
|
+ Vol float64 `json:"vol,omitempty"`
|
|
|
+ Pitch int `json:"pitch,omitempty"`
|
|
|
+ AudioSampleRate int `json:"audio_sample_rate,omitempty"`
|
|
|
+ OutputFormat string `json:"output_format,omitempty"`
|
|
|
+}
|
|
|
+
|
|
|
+type MiniMaxTTSResponse struct {
|
|
|
+ Created int `json:"created"`
|
|
|
+ Data []MiniMaxTTSData `json:"data"`
|
|
|
+ ID string `json:"id"`
|
|
|
+ Model string `json:"model"`
|
|
|
+ Object string `json:"object"`
|
|
|
+ Usage MiniMaxTTSUsage `json:"usage"`
|
|
|
+}
|
|
|
+
|
|
|
+type MiniMaxTTSData struct {
|
|
|
+ Index int `json:"index"`
|
|
|
+ Audio string `json:"audio"`
|
|
|
+ Text string `json:"text"`
|
|
|
+ FinishReason string `json:"finish_reason"`
|
|
|
+}
|
|
|
+
|
|
|
+type MiniMaxTTSUsage struct {
|
|
|
+ TotalTokens int `json:"total_tokens"`
|
|
|
+}
|
|
|
+
|
|
|
+type MiniMaxTTSErrorResponse struct {
|
|
|
+ Error MiniMaxTTSError `json:"error"`
|
|
|
+}
|
|
|
+
|
|
|
+type MiniMaxTTSError struct {
|
|
|
+ Code string `json:"code"`
|
|
|
+ Message string `json:"message"`
|
|
|
+ Type string `json:"type"`
|
|
|
+}
|
|
|
+
|
|
|
+// OpenAI voice to MiniMax voice_id mapping
|
|
|
+var openAIToMiniMaxVoiceMap = map[string]string{
|
|
|
+ "alloy": "male-qn-qingse",
|
|
|
+ "echo": "male-qn-jingying",
|
|
|
+ "fable": "female-shaonv",
|
|
|
+ "onyx": "male-qn-badao",
|
|
|
+ "nova": "female-shaonv-jingpin",
|
|
|
+ "shimmer": "female-yujie",
|
|
|
+ // Add some standard MiniMax voice IDs
|
|
|
+ "voice-1": "male-qn-qingse",
|
|
|
+ "voice-2": "female-shaonv",
|
|
|
+}
|
|
|
+
|
|
|
+// OpenAI response format to MiniMax output format mapping
|
|
|
+var responseFormatToOutputFormatMap = map[string]string{
|
|
|
+ "mp3": "mp3",
|
|
|
+ "opus": "mp3",
|
|
|
+ "aac": "aac",
|
|
|
+ "flac": "flac",
|
|
|
+ "wav": "wav",
|
|
|
+ "pcm": "pcm",
|
|
|
+}
|
|
|
+
|
|
|
+// TTS model mapping - MiniMax uses speech-01 or speech-01-turbo
|
|
|
+var modelToTTSModelMap = map[string]string{
|
|
|
+ "speech-01": "speech-01",
|
|
|
+ "speech-01-turbo": "speech-01-turbo",
|
|
|
+ "tts-1": "speech-01-turbo",
|
|
|
+ "tts-1-hd": "speech-01",
|
|
|
+}
|
|
|
+
|
|
|
+func mapVoiceType(openAIVoice string) string {
|
|
|
+ if voice, ok := openAIToMiniMaxVoiceMap[openAIVoice]; ok {
|
|
|
+ return voice
|
|
|
+ }
|
|
|
+ return "female-shaonv" // default voice
|
|
|
+}
|
|
|
+
|
|
|
+func mapOutputFormat(responseFormat string) string {
|
|
|
+ if format, ok := responseFormatToOutputFormatMap[responseFormat]; ok {
|
|
|
+ return format
|
|
|
+ }
|
|
|
+ return "mp3" // default format
|
|
|
+}
|
|
|
+
|
|
|
+func getTTSModel(modelName string) string {
|
|
|
+ if ttsModel, ok := modelToTTSModelMap[modelName]; ok {
|
|
|
+ return ttsModel
|
|
|
+ }
|
|
|
+ return "speech-01-turbo" // default model
|
|
|
+}
|
|
|
+
|
|
|
+func getContentTypeByFormat(format string) string {
|
|
|
+ contentTypeMap := map[string]string{
|
|
|
+ "mp3": "audio/mpeg",
|
|
|
+ "wav": "audio/wav",
|
|
|
+ "flac": "audio/flac",
|
|
|
+ "aac": "audio/aac",
|
|
|
+ "pcm": "audio/pcm",
|
|
|
+ }
|
|
|
+ if ct, ok := contentTypeMap[format]; ok {
|
|
|
+ return ct
|
|
|
+ }
|
|
|
+ return "audio/mpeg" // default to mp3
|
|
|
+}
|
|
|
+
|
|
|
+func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
|
|
|
+ body, readErr := io.ReadAll(resp.Body)
|
|
|
+ if readErr != nil {
|
|
|
+ return nil, types.NewErrorWithStatusCode(
|
|
|
+ errors.New("failed to read minimax response"),
|
|
|
+ types.ErrorCodeReadResponseBodyFailed,
|
|
|
+ http.StatusInternalServerError,
|
|
|
+ )
|
|
|
+ }
|
|
|
+ defer resp.Body.Close()
|
|
|
+
|
|
|
+ // First try to parse as error response
|
|
|
+ var errorResp MiniMaxTTSErrorResponse
|
|
|
+ if unmarshalErr := json.Unmarshal(body, &errorResp); unmarshalErr == nil && errorResp.Error.Code != "" {
|
|
|
+ return nil, types.NewErrorWithStatusCode(
|
|
|
+ errors.New(errorResp.Error.Message),
|
|
|
+ types.ErrorCodeBadResponse,
|
|
|
+ http.StatusBadRequest,
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ // Parse as successful response
|
|
|
+ var minimaxResp MiniMaxTTSResponse
|
|
|
+ if unmarshalErr := json.Unmarshal(body, &minimaxResp); unmarshalErr != nil {
|
|
|
+ return nil, types.NewErrorWithStatusCode(
|
|
|
+ errors.New("failed to parse minimax response"),
|
|
|
+ types.ErrorCodeBadResponseBody,
|
|
|
+ http.StatusInternalServerError,
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ // Check if we have audio data
|
|
|
+ if len(minimaxResp.Data) == 0 || minimaxResp.Data[0].Audio == "" {
|
|
|
+ return nil, types.NewErrorWithStatusCode(
|
|
|
+ errors.New("no audio data in response"),
|
|
|
+ types.ErrorCodeBadResponse,
|
|
|
+ http.StatusBadRequest,
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ // Decode base64 audio data
|
|
|
+ audioData, decodeErr := base64.StdEncoding.DecodeString(minimaxResp.Data[0].Audio)
|
|
|
+ if decodeErr != nil {
|
|
|
+ return nil, types.NewErrorWithStatusCode(
|
|
|
+ errors.New("failed to decode audio data"),
|
|
|
+ types.ErrorCodeBadResponseBody,
|
|
|
+ http.StatusInternalServerError,
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ // Get output format from context or default to mp3
|
|
|
+ outputFormat := c.GetString("response_format")
|
|
|
+ if outputFormat == "" {
|
|
|
+ outputFormat = "mp3"
|
|
|
+ }
|
|
|
+
|
|
|
+ contentType := getContentTypeByFormat(outputFormat)
|
|
|
+ c.Header("Content-Type", contentType)
|
|
|
+ c.Data(http.StatusOK, contentType, audioData)
|
|
|
+
|
|
|
+ usage = &dto.Usage{
|
|
|
+ PromptTokens: info.PromptTokens,
|
|
|
+ CompletionTokens: 0,
|
|
|
+ TotalTokens: minimaxResp.Usage.TotalTokens,
|
|
|
+ }
|
|
|
+
|
|
|
+ return usage, nil
|
|
|
+}
|
|
|
+
|
|
|
+func handleChatCompletionResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
|
|
|
+ body, readErr := io.ReadAll(resp.Body)
|
|
|
+ if readErr != nil {
|
|
|
+ return nil, types.NewErrorWithStatusCode(
|
|
|
+ errors.New("failed to read minimax response"),
|
|
|
+ types.ErrorCodeReadResponseBodyFailed,
|
|
|
+ http.StatusInternalServerError,
|
|
|
+ )
|
|
|
+ }
|
|
|
+ defer resp.Body.Close()
|
|
|
+
|
|
|
+ // Set response headers
|
|
|
+ for key, values := range resp.Header {
|
|
|
+ for _, value := range values {
|
|
|
+ c.Header(key, value)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ c.Data(resp.StatusCode, "application/json", body)
|
|
|
+ return nil, nil
|
|
|
+}
|