monitor.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. package monitor
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "net/http"
  7. "strconv"
  8. "time"
  9. "github.com/gin-gonic/gin"
  10. "github.com/labring/aiproxy/core/common"
  11. "github.com/labring/aiproxy/core/common/conv"
  12. "github.com/labring/aiproxy/core/common/notify"
  13. "github.com/labring/aiproxy/core/common/reqlimit"
  14. "github.com/labring/aiproxy/core/model"
  15. "github.com/labring/aiproxy/core/monitor"
  16. "github.com/labring/aiproxy/core/relay/adaptor"
  17. "github.com/labring/aiproxy/core/relay/meta"
  18. "github.com/labring/aiproxy/core/relay/plugin"
  19. "github.com/labring/aiproxy/core/relay/plugin/noop"
  20. )
  21. var _ plugin.Plugin = (*ChannelMonitor)(nil)
  22. type ChannelMonitor struct {
  23. noop.Noop
  24. }
  25. func NewChannelMonitorPlugin() plugin.Plugin {
  26. return &ChannelMonitor{}
  27. }
  28. var channelNoRetryStatusCodesMap = map[int]struct{}{
  29. http.StatusBadRequest: {},
  30. http.StatusRequestEntityTooLarge: {},
  31. http.StatusUnprocessableEntity: {},
  32. http.StatusUnavailableForLegalReasons: {},
  33. }
  34. func ShouldRetry(relayErr adaptor.Error) bool {
  35. _, ok := channelNoRetryStatusCodesMap[relayErr.StatusCode()]
  36. return !ok
  37. }
  38. var channelNoPermissionStatusCodesMap = map[int]struct{}{
  39. http.StatusUnauthorized: {},
  40. http.StatusPaymentRequired: {},
  41. http.StatusForbidden: {},
  42. http.StatusNotFound: {},
  43. }
  44. func ChannelHasPermission(relayErr adaptor.Error) bool {
  45. _, ok := channelNoPermissionStatusCodesMap[relayErr.StatusCode()]
  46. return !ok
  47. }
  48. func getRequestDuration(meta *meta.Meta) time.Duration {
  49. requestAt, ok := meta.Get("requestAt")
  50. if !ok {
  51. return 0
  52. }
  53. requestAtTime, ok := requestAt.(time.Time)
  54. if !ok {
  55. return 0
  56. }
  57. return common.TruncateDuration(time.Since(requestAtTime))
  58. }
  59. func (m *ChannelMonitor) DoRequest(
  60. meta *meta.Meta,
  61. store adaptor.Store,
  62. c *gin.Context,
  63. req *http.Request,
  64. do adaptor.DoRequest,
  65. ) (*http.Response, error) {
  66. count, overLimitCount, secondCount := reqlimit.PushChannelModelRequest(
  67. context.Background(),
  68. strconv.Itoa(meta.Channel.ID),
  69. meta.OriginModel,
  70. )
  71. updateChannelModelRequestRate(c, meta, count+overLimitCount, secondCount)
  72. requestAt := time.Now()
  73. meta.Set("requestAt", requestAt)
  74. resp, err := do.DoRequest(meta, store, c, req)
  75. requestCost := common.TruncateDuration(time.Since(requestAt))
  76. log := common.GetLogger(c)
  77. log.Data["req_cost"] = requestCost.String()
  78. if err == nil {
  79. return resp, nil
  80. }
  81. var adaptorErr adaptor.Error
  82. ok := errors.As(err, &adaptorErr)
  83. if ok {
  84. if !ShouldRetry(adaptorErr) {
  85. return resp, err
  86. }
  87. handleAdaptorError(meta, c, adaptorErr)
  88. } else {
  89. handleDoRequestError(meta, c, err, requestCost)
  90. }
  91. return resp, err
  92. }
  93. func handleDoRequestError(meta *meta.Meta, c *gin.Context, err error, requestCost time.Duration) {
  94. beyondThreshold, banExecution, _err := monitor.AddRequest(
  95. context.Background(),
  96. meta.OriginModel,
  97. int64(meta.Channel.ID),
  98. true,
  99. false,
  100. meta.ModelConfig.WarnErrorRate,
  101. meta.ModelConfig.MaxErrorRate,
  102. )
  103. if _err != nil {
  104. common.GetLogger(c).Errorf("add request failed: %+v", _err)
  105. }
  106. switch {
  107. case banExecution:
  108. notifyChannelRequestIssue(
  109. meta,
  110. "autoBanned",
  111. "Auto Banned",
  112. err,
  113. requestCost,
  114. )
  115. case beyondThreshold:
  116. notifyChannelRequestIssue(
  117. meta,
  118. "beyondThreshold",
  119. "Error Rate Beyond Threshold",
  120. err,
  121. requestCost,
  122. )
  123. }
  124. }
  125. func notifyChannelRequestIssue(
  126. meta *meta.Meta,
  127. issueType, titleSuffix string,
  128. err error,
  129. requestCost time.Duration,
  130. ) {
  131. var notifyFunc func(title, message string)
  132. lockKey := fmt.Sprintf(
  133. "%s:%d:%s:%s",
  134. issueType,
  135. meta.Channel.ID,
  136. meta.OriginModel,
  137. issueType,
  138. )
  139. switch issueType {
  140. case "beyondThreshold":
  141. notifyFunc = func(title, message string) {
  142. notify.WarnThrottle(lockKey, time.Minute, title, message)
  143. }
  144. default:
  145. notifyFunc = func(title, message string) {
  146. notify.ErrorThrottle(lockKey, time.Minute, title, message)
  147. }
  148. }
  149. message := fmt.Sprintf(
  150. "channel: %s (type: %d, type name: %s, id: %d)\nmodel: %s\nmode: %s\nerror: %s\nrequest id: %s\ntime cost: %s",
  151. meta.Channel.Name,
  152. meta.Channel.Type,
  153. meta.Channel.Type.String(),
  154. meta.Channel.ID,
  155. meta.OriginModel,
  156. meta.Mode,
  157. err.Error(),
  158. meta.RequestID,
  159. requestCost.String(),
  160. )
  161. notifyFunc(
  162. fmt.Sprintf("%s `%s` %s", meta.Channel.Name, meta.OriginModel, titleSuffix),
  163. message,
  164. )
  165. }
  166. func (m *ChannelMonitor) DoResponse(
  167. meta *meta.Meta,
  168. store adaptor.Store,
  169. c *gin.Context,
  170. resp *http.Response,
  171. do adaptor.DoResponse,
  172. ) (model.Usage, adaptor.Error) {
  173. usage, relayErr := do.DoResponse(meta, store, c, resp)
  174. if usage.TotalTokens > 0 {
  175. count, overLimitCount, secondCount := reqlimit.PushChannelModelTokensRequest(
  176. context.Background(),
  177. strconv.Itoa(meta.Channel.ID),
  178. meta.OriginModel,
  179. int64(usage.TotalTokens),
  180. )
  181. updateChannelModelTokensRequestRate(c, meta, count+overLimitCount, secondCount)
  182. }
  183. if relayErr == nil {
  184. if _, _, err := monitor.AddRequest(
  185. context.Background(),
  186. meta.OriginModel,
  187. int64(meta.Channel.ID),
  188. false,
  189. false,
  190. meta.ModelConfig.WarnErrorRate,
  191. meta.ModelConfig.MaxErrorRate,
  192. ); err != nil {
  193. common.GetLogger(c).Errorf("add request failed: %+v", err)
  194. }
  195. return usage, nil
  196. }
  197. if !ShouldRetry(relayErr) {
  198. return usage, relayErr
  199. }
  200. handleAdaptorError(meta, c, relayErr)
  201. return usage, relayErr
  202. }
  203. func handleAdaptorError(meta *meta.Meta, c *gin.Context, relayErr adaptor.Error) {
  204. hasPermission := ChannelHasPermission(relayErr)
  205. beyondThreshold, banExecution, err := monitor.AddRequest(
  206. context.Background(),
  207. meta.OriginModel,
  208. int64(meta.Channel.ID),
  209. true,
  210. !hasPermission,
  211. meta.ModelConfig.WarnErrorRate,
  212. meta.ModelConfig.MaxErrorRate,
  213. )
  214. if err != nil {
  215. common.GetLogger(c).Errorf("add request failed: %+v", err)
  216. }
  217. switch {
  218. case banExecution:
  219. notifyChannelResponseIssue(c, meta, "autoBanned", "Auto Banned", relayErr)
  220. case beyondThreshold:
  221. notifyChannelResponseIssue(
  222. c,
  223. meta,
  224. "beyondThreshold",
  225. "Error Rate Beyond Threshold",
  226. relayErr,
  227. )
  228. case !hasPermission:
  229. notifyChannelResponseIssue(c, meta, "channelHasPermission", "No Permission", relayErr)
  230. }
  231. }
  232. func notifyChannelResponseIssue(
  233. c *gin.Context,
  234. meta *meta.Meta,
  235. issueType, titleSuffix string,
  236. err adaptor.Error,
  237. ) {
  238. var notifyFunc func(title, message string)
  239. lockKey := fmt.Sprintf(
  240. "%s:%d:%s:%s:%d",
  241. issueType,
  242. meta.Channel.ID,
  243. meta.OriginModel,
  244. issueType,
  245. err.StatusCode(),
  246. )
  247. switch issueType {
  248. case "beyondThreshold", "requestRateLimitExceeded":
  249. notifyFunc = func(title, message string) {
  250. notify.WarnThrottle(lockKey, time.Minute, title, message)
  251. }
  252. default:
  253. notifyFunc = func(title, message string) {
  254. notify.ErrorThrottle(lockKey, time.Minute, title, message)
  255. }
  256. }
  257. respBody, _ := err.MarshalJSON()
  258. message := fmt.Sprintf(
  259. "channel: %s (type: %d, type name: %s, id: %d)\nmodel: %s\nmode: %s\nstatus code: %d\ndetail: %s\nrequest id: %s\ntime cost: %s",
  260. meta.Channel.Name,
  261. meta.Channel.Type,
  262. meta.Channel.Type.String(),
  263. meta.Channel.ID,
  264. meta.OriginModel,
  265. meta.Mode,
  266. err.StatusCode(),
  267. conv.BytesToString(respBody),
  268. meta.RequestID,
  269. getRequestDuration(meta).String(),
  270. )
  271. if err.StatusCode() == http.StatusTooManyRequests {
  272. rate := GetChannelModelRequestRate(c, meta)
  273. message += fmt.Sprintf(
  274. "\nrpm: %d\nrps: %d\ntpm: %d\ntps: %d",
  275. rate.RPM,
  276. rate.RPS,
  277. rate.TPM,
  278. rate.TPS,
  279. )
  280. }
  281. notifyFunc(
  282. fmt.Sprintf("%s `%s` %s", meta.Channel.Name, meta.OriginModel, titleSuffix),
  283. message,
  284. )
  285. }
  286. const (
  287. MetaChannelModelKeyRPM = "channel_model_rpm"
  288. MetaChannelModelKeyRPS = "channel_model_rps"
  289. MetaChannelModelKeyTPM = "channel_model_tpm"
  290. MetaChannelModelKeyTPS = "channel_model_tps"
  291. )
  292. type RequestRate struct {
  293. RPM int64
  294. RPS int64
  295. TPM int64
  296. TPS int64
  297. }
  298. func GetChannelModelRequestRate(c *gin.Context, meta *meta.Meta) RequestRate {
  299. rate := RequestRate{}
  300. if rpm, ok := meta.Get(MetaChannelModelKeyRPM); ok {
  301. rate.RPM, _ = rpm.(int64)
  302. rate.RPS = meta.GetInt64(MetaChannelModelKeyRPS)
  303. } else {
  304. rpm, rps := reqlimit.GetChannelModelRequest(context.Background(), strconv.Itoa(meta.Channel.ID), meta.OriginModel)
  305. rate.RPM = rpm
  306. rate.RPS = rps
  307. updateChannelModelRequestRate(c, meta, rpm, rps)
  308. }
  309. if tpm, ok := meta.Get(MetaChannelModelKeyTPM); ok {
  310. rate.TPM, _ = tpm.(int64)
  311. rate.TPS = meta.GetInt64(MetaChannelModelKeyTPS)
  312. } else {
  313. tpm, tps := reqlimit.GetChannelModelTokensRequest(context.Background(), strconv.Itoa(meta.Channel.ID), meta.OriginModel)
  314. rate.TPM = tpm
  315. rate.TPS = tps
  316. updateChannelModelTokensRequestRate(c, meta, tpm, tps)
  317. }
  318. return rate
  319. }
  320. func updateChannelModelRequestRate(c *gin.Context, meta *meta.Meta, rpm, rps int64) {
  321. meta.Set(MetaChannelModelKeyRPM, rpm)
  322. meta.Set(MetaChannelModelKeyRPS, rps)
  323. log := common.GetLogger(c)
  324. log.Data["ch_rpm"] = rpm
  325. log.Data["ch_rps"] = rps
  326. }
  327. func updateChannelModelTokensRequestRate(c *gin.Context, meta *meta.Meta, tpm, tps int64) {
  328. meta.Set(MetaChannelModelKeyTPM, tpm)
  329. meta.Set(MetaChannelModelKeyTPS, tps)
  330. log := common.GetLogger(c)
  331. log.Data["ch_tpm"] = tpm
  332. log.Data["ch_tps"] = tps
  333. }