audio.go 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. package dto
  2. import (
  3. "github.com/QuantumNous/new-api/types"
  4. "github.com/gin-gonic/gin"
  5. )
  6. type AudioRequest struct {
  7. Model string `json:"model"`
  8. Input string `json:"input"`
  9. Voice string `json:"voice"`
  10. Speed float64 `json:"speed,omitempty"`
  11. ResponseFormat string `json:"response_format,omitempty"`
  12. }
  13. func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
  14. meta := &types.TokenCountMeta{
  15. CombineText: r.Input,
  16. TokenType: types.TokenTypeTextNumber,
  17. }
  18. return meta
  19. }
  20. func (r *AudioRequest) IsStream(c *gin.Context) bool {
  21. return false
  22. }
  23. func (r *AudioRequest) SetModelName(modelName string) {
  24. if modelName != "" {
  25. r.Model = modelName
  26. }
  27. }
  28. type AudioResponse struct {
  29. Text string `json:"text"`
  30. }
  31. type WhisperVerboseJSONResponse struct {
  32. Task string `json:"task,omitempty"`
  33. Language string `json:"language,omitempty"`
  34. Duration float64 `json:"duration,omitempty"`
  35. Text string `json:"text,omitempty"`
  36. Segments []Segment `json:"segments,omitempty"`
  37. }
  38. type Segment struct {
  39. Id int `json:"id"`
  40. Seek int `json:"seek"`
  41. Start float64 `json:"start"`
  42. End float64 `json:"end"`
  43. Text string `json:"text"`
  44. Tokens []int `json:"tokens"`
  45. Temperature float64 `json:"temperature"`
  46. AvgLogprob float64 `json:"avg_logprob"`
  47. CompressionRatio float64 `json:"compression_ratio"`
  48. NoSpeechProb float64 `json:"no_speech_prob"`
  49. }