audio.go 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. package dto
  2. import (
  3. "encoding/json"
  4. "github.com/QuantumNous/new-api/types"
  5. "github.com/gin-gonic/gin"
  6. )
  7. type AudioRequest struct {
  8. Model string `json:"model"`
  9. Input string `json:"input"`
  10. Voice string `json:"voice"`
  11. Instructions string `json:"instructions,omitempty"`
  12. ResponseFormat string `json:"response_format,omitempty"`
  13. Speed float64 `json:"speed,omitempty"`
  14. StreamFormat string `json:"stream_format,omitempty"`
  15. Metadata json.RawMessage `json:"metadata,omitempty"`
  16. }
  17. func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
  18. meta := &types.TokenCountMeta{
  19. CombineText: r.Input,
  20. TokenType: types.TokenTypeTextNumber,
  21. }
  22. return meta
  23. }
  24. func (r *AudioRequest) IsStream(c *gin.Context) bool {
  25. return false
  26. }
  27. func (r *AudioRequest) SetModelName(modelName string) {
  28. if modelName != "" {
  29. r.Model = modelName
  30. }
  31. }
  32. type AudioResponse struct {
  33. Text string `json:"text"`
  34. }
  35. type WhisperVerboseJSONResponse struct {
  36. Task string `json:"task,omitempty"`
  37. Language string `json:"language,omitempty"`
  38. Duration float64 `json:"duration,omitempty"`
  39. Text string `json:"text,omitempty"`
  40. Segments []Segment `json:"segments,omitempty"`
  41. }
  42. type Segment struct {
  43. Id int `json:"id"`
  44. Seek int `json:"seek"`
  45. Start float64 `json:"start"`
  46. End float64 `json:"end"`
  47. Text string `json:"text"`
  48. Tokens []int `json:"tokens"`
  49. Temperature float64 `json:"temperature"`
  50. AvgLogprob float64 `json:"avg_logprob"`
  51. CompressionRatio float64 `json:"compression_ratio"`
  52. NoSpeechProb float64 `json:"no_speech_prob"`
  53. }