Quellcode durchsuchen

Merge pull request #2068 from feitianbubu/pr/doubao-speech-emotion

豆包语音2.0音色支持情感,情绪,音量
IcedTangerine vor 2 Monaten
Ursprung
Commit
7156bf2382
3 geänderte Dateien mit 18 neuen und 20 gelöschten Zeilen
  1. 10 5
      dto/audio.go
  2. 8 1
      relay/channel/volcengine/adaptor.go
  3. 0 14
      relay/channel/volcengine/tts.go

+ 10 - 5
dto/audio.go

@@ -1,17 +1,22 @@
 package dto
 
 import (
+	"encoding/json"
+
 	"github.com/QuantumNous/new-api/types"
 
 	"github.com/gin-gonic/gin"
 )
 
 type AudioRequest struct {
-	Model          string  `json:"model"`
-	Input          string  `json:"input"`
-	Voice          string  `json:"voice"`
-	Speed          float64 `json:"speed,omitempty"`
-	ResponseFormat string  `json:"response_format,omitempty"`
+	Model          string          `json:"model"`
+	Input          string          `json:"input"`
+	Voice          string          `json:"voice"`
+	Instructions   string          `json:"instructions,omitempty"`
+	ResponseFormat string          `json:"response_format,omitempty"`
+	Speed          float64         `json:"speed,omitempty"`
+	StreamFormat   string          `json:"stream_format,omitempty"`
+	Metadata       json.RawMessage `json:"metadata,omitempty"`
 }
 
 func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {

+ 8 - 1
relay/channel/volcengine/adaptor.go

@@ -47,7 +47,7 @@ func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInf
 	}
 
 	voiceType := mapVoiceType(request.Voice)
-	speedRatio := mapSpeedRatio(request.Speed)
+	speedRatio := request.Speed
 	encoding := mapEncoding(request.ResponseFormat)
 
 	c.Set("response_format", encoding)
@@ -75,6 +75,13 @@ func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInf
 		},
 	}
 
+	// 同步扩展字段的厂商自定义metadata
+	if len(request.Metadata) > 0 {
+		if err = json.Unmarshal(request.Metadata, &volcRequest); err != nil {
+			return nil, fmt.Errorf("error unmarshalling metadata to volcengine request: %w", err)
+		}
+	}
+
 	jsonData, err := json.Marshal(volcRequest)
 	if err != nil {
 		return nil, fmt.Errorf("error marshalling volcengine request: %w", err)

+ 0 - 14
relay/channel/volcengine/tts.go

@@ -119,20 +119,6 @@ func mapVoiceType(openAIVoice string) string {
 	return openAIVoice
 }
 
-// [0.1,2],默认为 1,通常保留一位小数即可
-func mapSpeedRatio(speed float64) float64 {
-	if speed == 0 {
-		return 1.0
-	}
-	if speed < 0.1 {
-		return 0.1
-	}
-	if speed > 2.0 {
-		return 2.0
-	}
-	return speed
-}
-
 func mapEncoding(responseFormat string) string {
 	if encoding, ok := responseFormatToEncodingMap[responseFormat]; ok {
 		return encoding