stt.go 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. package model
  2. import "github.com/labring/aiproxy/core/model"
  3. type SttJSONResponse struct {
  4. Text string `json:"text,omitempty"`
  5. }
  6. type SttVerboseJSONResponse struct {
  7. Task string `json:"task,omitempty"`
  8. Language string `json:"language,omitempty"`
  9. Text string `json:"text,omitempty"`
  10. Segments []*Segment `json:"segments,omitempty"`
  11. Duration float64 `json:"duration,omitempty"`
  12. Usage *SttUsage `json:"usage,omitempty"`
  13. }
  14. type SttSSEResponseType = string
  15. const (
  16. SttSSEResponseTypeTranscriptTextDelta SttSSEResponseType = "transcript.text.delta"
  17. SttSSEResponseTypeTranscriptTextDone SttSSEResponseType = "transcript.text.done"
  18. )
  19. type SttSSEResponse struct {
  20. Type SttSSEResponseType `json:"type,omitempty"`
  21. Delta string `json:"delta,omitempty"`
  22. Text string `json:"text,omitempty"`
  23. Usage *SttUsage `json:"usage,omitempty"`
  24. }
  25. type Segment struct {
  26. Text string `json:"text"`
  27. Tokens []int `json:"tokens"`
  28. ID int `json:"id"`
  29. Seek int `json:"seek"`
  30. Start float64 `json:"start"`
  31. End float64 `json:"end"`
  32. Temperature float64 `json:"temperature"`
  33. AvgLogprob float64 `json:"avg_logprob"`
  34. CompressionRatio float64 `json:"compression_ratio"`
  35. NoSpeechProb float64 `json:"no_speech_prob"`
  36. }
  37. type SttUsageType = string
  38. const (
  39. SttUsageTypeTokens SttUsageType = "tokens"
  40. SttUsageTypeDuration SttUsageType = "duration"
  41. )
  42. type SttUsage struct {
  43. Type SttUsageType `json:"type,omitempty"`
  44. Seconds int64 `json:"seconds,omitempty"`
  45. InputTokens int64 `json:"input_tokens,omitempty"`
  46. OutputTokens int64 `json:"output_tokens,omitempty"`
  47. TotalTokens int64 `json:"total_tokens,omitempty"`
  48. InputTokenDetails *SttUsageInputTokenDetails `json:"input_token_details,omitempty"`
  49. }
  50. type SttUsageInputTokenDetails struct {
  51. TextTokens int64 `json:"text_tokens"`
  52. AudioTokens int64 `json:"audio_tokens"`
  53. }
  54. func (u *SttUsage) ToModelUsage() model.Usage {
  55. switch u.Type {
  56. case SttUsageTypeDuration:
  57. return model.Usage{
  58. InputTokens: model.ZeroNullInt64(u.Seconds),
  59. TotalTokens: model.ZeroNullInt64(u.Seconds),
  60. }
  61. default:
  62. modelUsage := model.Usage{
  63. InputTokens: model.ZeroNullInt64(u.InputTokens),
  64. OutputTokens: model.ZeroNullInt64(u.OutputTokens),
  65. TotalTokens: model.ZeroNullInt64(u.TotalTokens),
  66. }
  67. if u.InputTokenDetails != nil {
  68. modelUsage.AudioInputTokens = model.ZeroNullInt64(u.InputTokenDetails.AudioTokens)
  69. }
  70. return modelUsage
  71. }
  72. }