audio.go 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. package dto
  2. import (
  3. "encoding/json"
  4. "strings"
  5. "github.com/QuantumNous/new-api/types"
  6. "github.com/gin-gonic/gin"
  7. )
  8. type AudioRequest struct {
  9. Model string `json:"model"`
  10. Input string `json:"input"`
  11. Voice string `json:"voice"`
  12. Instructions string `json:"instructions,omitempty"`
  13. ResponseFormat string `json:"response_format,omitempty"`
  14. Speed float64 `json:"speed,omitempty"`
  15. StreamFormat string `json:"stream_format,omitempty"`
  16. Metadata json.RawMessage `json:"metadata,omitempty"`
  17. }
  18. func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
  19. meta := &types.TokenCountMeta{
  20. CombineText: r.Input,
  21. TokenType: types.TokenTypeTextNumber,
  22. }
  23. if strings.Contains(r.Model, "gpt") {
  24. meta.TokenType = types.TokenTypeTokenizer
  25. }
  26. return meta
  27. }
  28. func (r *AudioRequest) IsStream(c *gin.Context) bool {
  29. return r.StreamFormat == "sse"
  30. }
  31. func (r *AudioRequest) SetModelName(modelName string) {
  32. if modelName != "" {
  33. r.Model = modelName
  34. }
  35. }
  36. type AudioResponse struct {
  37. Text string `json:"text"`
  38. }
  39. type WhisperVerboseJSONResponse struct {
  40. Task string `json:"task,omitempty"`
  41. Language string `json:"language,omitempty"`
  42. Duration float64 `json:"duration,omitempty"`
  43. Text string `json:"text,omitempty"`
  44. Segments []Segment `json:"segments,omitempty"`
  45. }
  46. type Segment struct {
  47. Id int `json:"id"`
  48. Seek int `json:"seek"`
  49. Start float64 `json:"start"`
  50. End float64 `json:"end"`
  51. Text string `json:"text"`
  52. Tokens []int `json:"tokens"`
  53. Temperature float64 `json:"temperature"`
  54. AvgLogprob float64 `json:"avg_logprob"`
  55. CompressionRatio float64 `json:"compression_ratio"`
  56. NoSpeechProb float64 `json:"no_speech_prob"`
  57. }