monitor.go 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. package monitor
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "net/http"
  7. "strconv"
  8. "time"
  9. "github.com/gin-gonic/gin"
  10. "github.com/labring/aiproxy/core/common"
  11. "github.com/labring/aiproxy/core/common/conv"
  12. "github.com/labring/aiproxy/core/common/notify"
  13. "github.com/labring/aiproxy/core/common/reqlimit"
  14. "github.com/labring/aiproxy/core/model"
  15. "github.com/labring/aiproxy/core/monitor"
  16. "github.com/labring/aiproxy/core/relay/adaptor"
  17. "github.com/labring/aiproxy/core/relay/meta"
  18. "github.com/labring/aiproxy/core/relay/plugin"
  19. "github.com/labring/aiproxy/core/relay/plugin/noop"
  20. )
  21. var _ plugin.Plugin = (*ChannelMonitor)(nil)
  22. type ChannelMonitor struct {
  23. noop.Noop
  24. }
  25. func NewChannelMonitorPlugin() plugin.Plugin {
  26. return &ChannelMonitor{}
  27. }
  28. var channelNoRetryStatusCodesMap = map[int]struct{}{
  29. http.StatusBadRequest: {},
  30. http.StatusRequestEntityTooLarge: {},
  31. http.StatusUnprocessableEntity: {},
  32. http.StatusUnavailableForLegalReasons: {},
  33. }
  34. func ShouldRetry(relayErr adaptor.Error) bool {
  35. _, ok := channelNoRetryStatusCodesMap[relayErr.StatusCode()]
  36. return !ok
  37. }
  38. var channelNoPermissionStatusCodesMap = map[int]struct{}{
  39. http.StatusUnauthorized: {},
  40. http.StatusPaymentRequired: {},
  41. http.StatusForbidden: {},
  42. http.StatusNotFound: {},
  43. }
  44. func ChannelHasPermission(relayErr adaptor.Error) bool {
  45. _, ok := channelNoPermissionStatusCodesMap[relayErr.StatusCode()]
  46. return !ok
  47. }
  48. func getRequestDuration(meta *meta.Meta) time.Duration {
  49. requestAt, ok := meta.Get("requestAt")
  50. if !ok {
  51. return 0
  52. }
  53. requestAtTime, ok := requestAt.(time.Time)
  54. if !ok {
  55. return 0
  56. }
  57. return common.TruncateDuration(time.Since(requestAtTime))
  58. }
  59. func (m *ChannelMonitor) DoRequest(
  60. meta *meta.Meta,
  61. store adaptor.Store,
  62. c *gin.Context,
  63. req *http.Request,
  64. do adaptor.DoRequest,
  65. ) (*http.Response, error) {
  66. count, overLimitCount, secondCount := reqlimit.PushChannelModelRequest(
  67. context.Background(),
  68. strconv.Itoa(meta.Channel.ID),
  69. meta.OriginModel,
  70. )
  71. updateChannelModelRequestRate(c, meta, count+overLimitCount, secondCount)
  72. requestAt := time.Now()
  73. meta.Set("requestAt", requestAt)
  74. resp, err := do.DoRequest(meta, store, c, req)
  75. requestCost := common.TruncateDuration(time.Since(requestAt))
  76. log := common.GetLogger(c)
  77. log.Data["req_cost"] = requestCost.String()
  78. if err == nil {
  79. return resp, nil
  80. }
  81. var adaptorErr adaptor.Error
  82. ok := errors.As(err, &adaptorErr)
  83. if ok {
  84. if !ShouldRetry(adaptorErr) {
  85. return resp, err
  86. }
  87. handleAdaptorError(meta, c, adaptorErr)
  88. } else {
  89. handleDoRequestError(meta, c, err, requestCost)
  90. }
  91. return resp, err
  92. }
  93. func handleDoRequestError(meta *meta.Meta, c *gin.Context, err error, requestCost time.Duration) {
  94. beyondThreshold, banExecution, _err := monitor.AddRequest(
  95. context.Background(),
  96. meta.OriginModel,
  97. int64(meta.Channel.ID),
  98. true,
  99. false,
  100. meta.ModelConfig.WarnErrorRate,
  101. meta.ModelConfig.MaxErrorRate,
  102. )
  103. if _err != nil {
  104. common.GetLogger(c).Errorf("add request failed: %+v", _err)
  105. }
  106. switch {
  107. case banExecution:
  108. notifyChannelRequestIssue(
  109. meta,
  110. "autoBanned",
  111. "Auto Banned",
  112. err,
  113. requestCost,
  114. time.Minute*15,
  115. )
  116. case beyondThreshold:
  117. notifyChannelRequestIssue(
  118. meta,
  119. "beyondThreshold",
  120. "Error Rate Beyond Threshold",
  121. err,
  122. requestCost,
  123. time.Minute*15,
  124. )
  125. }
  126. }
  127. func notifyChannelRequestIssue(
  128. meta *meta.Meta,
  129. issueType, titleSuffix string,
  130. err error,
  131. requestCost time.Duration,
  132. interval time.Duration,
  133. ) {
  134. var notifyFunc func(title, message string)
  135. lockKey := fmt.Sprintf(
  136. "%s:%d:%s:%s",
  137. issueType,
  138. meta.Channel.ID,
  139. meta.OriginModel,
  140. issueType,
  141. )
  142. switch issueType {
  143. case "beyondThreshold":
  144. notifyFunc = func(title, message string) {
  145. notify.WarnThrottle(lockKey, interval, title, message)
  146. }
  147. default:
  148. notifyFunc = func(title, message string) {
  149. notify.ErrorThrottle(lockKey, interval, title, message)
  150. }
  151. }
  152. message := fmt.Sprintf(
  153. "channel: %s (type: %d, type name: %s, id: %d)\nmodel: %s\nmode: %s\nerror: %s\nrequest id: %s\ntime cost: %s",
  154. meta.Channel.Name,
  155. meta.Channel.Type,
  156. meta.Channel.Type.String(),
  157. meta.Channel.ID,
  158. meta.OriginModel,
  159. meta.Mode,
  160. err.Error(),
  161. meta.RequestID,
  162. requestCost.String(),
  163. )
  164. notifyFunc(
  165. fmt.Sprintf("%s `%s` %s", meta.Channel.Name, meta.OriginModel, titleSuffix),
  166. message,
  167. )
  168. }
  169. func (m *ChannelMonitor) DoResponse(
  170. meta *meta.Meta,
  171. store adaptor.Store,
  172. c *gin.Context,
  173. resp *http.Response,
  174. do adaptor.DoResponse,
  175. ) (model.Usage, adaptor.Error) {
  176. usage, relayErr := do.DoResponse(meta, store, c, resp)
  177. if usage.TotalTokens > 0 {
  178. count, overLimitCount, secondCount := reqlimit.PushChannelModelTokensRequest(
  179. context.Background(),
  180. strconv.Itoa(meta.Channel.ID),
  181. meta.OriginModel,
  182. int64(usage.TotalTokens),
  183. )
  184. updateChannelModelTokensRequestRate(c, meta, count+overLimitCount, secondCount)
  185. }
  186. if relayErr == nil {
  187. if _, _, err := monitor.AddRequest(
  188. context.Background(),
  189. meta.OriginModel,
  190. int64(meta.Channel.ID),
  191. false,
  192. false,
  193. meta.ModelConfig.WarnErrorRate,
  194. meta.ModelConfig.MaxErrorRate,
  195. ); err != nil {
  196. common.GetLogger(c).Errorf("add request failed: %+v", err)
  197. }
  198. return usage, nil
  199. }
  200. if !ShouldRetry(relayErr) {
  201. return usage, relayErr
  202. }
  203. handleAdaptorError(meta, c, relayErr)
  204. return usage, relayErr
  205. }
  206. func handleAdaptorError(meta *meta.Meta, c *gin.Context, relayErr adaptor.Error) {
  207. hasPermission := ChannelHasPermission(relayErr)
  208. beyondThreshold, banExecution, err := monitor.AddRequest(
  209. context.Background(),
  210. meta.OriginModel,
  211. int64(meta.Channel.ID),
  212. true,
  213. !hasPermission,
  214. meta.ModelConfig.WarnErrorRate,
  215. meta.ModelConfig.MaxErrorRate,
  216. )
  217. if err != nil {
  218. common.GetLogger(c).Errorf("add request failed: %+v", err)
  219. }
  220. switch {
  221. case banExecution:
  222. notifyChannelResponseIssue(c, meta, "autoBanned", "Auto Banned", relayErr, time.Minute*15)
  223. case beyondThreshold:
  224. notifyChannelResponseIssue(
  225. c,
  226. meta,
  227. "beyondThreshold",
  228. "Error Rate Beyond Threshold",
  229. relayErr,
  230. time.Minute*15,
  231. )
  232. case !hasPermission:
  233. notifyChannelResponseIssue(
  234. c,
  235. meta,
  236. "channelHasPermission",
  237. "No Permission",
  238. relayErr,
  239. time.Minute*15,
  240. )
  241. }
  242. }
  243. func notifyChannelResponseIssue(
  244. c *gin.Context,
  245. meta *meta.Meta,
  246. issueType, titleSuffix string,
  247. err adaptor.Error,
  248. interval time.Duration,
  249. ) {
  250. var notifyFunc func(title, message string)
  251. lockKey := fmt.Sprintf(
  252. "%s:%d:%s:%s:%d",
  253. issueType,
  254. meta.Channel.ID,
  255. meta.OriginModel,
  256. issueType,
  257. err.StatusCode(),
  258. )
  259. switch issueType {
  260. case "beyondThreshold", "requestRateLimitExceeded":
  261. notifyFunc = func(title, message string) {
  262. notify.WarnThrottle(lockKey, interval, title, message)
  263. }
  264. default:
  265. notifyFunc = func(title, message string) {
  266. notify.ErrorThrottle(lockKey, interval, title, message)
  267. }
  268. }
  269. respBody, _ := err.MarshalJSON()
  270. message := fmt.Sprintf(
  271. "channel: %s (type: %d, type name: %s, id: %d)\nmodel: %s\nmode: %s\nstatus code: %d\ndetail: %s\nrequest id: %s\ntime cost: %s",
  272. meta.Channel.Name,
  273. meta.Channel.Type,
  274. meta.Channel.Type.String(),
  275. meta.Channel.ID,
  276. meta.OriginModel,
  277. meta.Mode,
  278. err.StatusCode(),
  279. conv.BytesToString(respBody),
  280. meta.RequestID,
  281. getRequestDuration(meta).String(),
  282. )
  283. if err.StatusCode() == http.StatusTooManyRequests {
  284. rate := GetChannelModelRequestRate(c, meta)
  285. message += fmt.Sprintf(
  286. "\nrpm: %d\nrps: %d\ntpm: %d\ntps: %d",
  287. rate.RPM,
  288. rate.RPS,
  289. rate.TPM,
  290. rate.TPS,
  291. )
  292. }
  293. notifyFunc(
  294. fmt.Sprintf("%s `%s` %s", meta.Channel.Name, meta.OriginModel, titleSuffix),
  295. message,
  296. )
  297. }
  298. const (
  299. MetaChannelModelKeyRPM = "channel_model_rpm"
  300. MetaChannelModelKeyRPS = "channel_model_rps"
  301. MetaChannelModelKeyTPM = "channel_model_tpm"
  302. MetaChannelModelKeyTPS = "channel_model_tps"
  303. )
  304. type RequestRate struct {
  305. RPM int64
  306. RPS int64
  307. TPM int64
  308. TPS int64
  309. }
  310. func GetChannelModelRequestRate(c *gin.Context, meta *meta.Meta) RequestRate {
  311. rate := RequestRate{}
  312. if rpm, ok := meta.Get(MetaChannelModelKeyRPM); ok {
  313. rate.RPM, _ = rpm.(int64)
  314. rate.RPS = meta.GetInt64(MetaChannelModelKeyRPS)
  315. } else {
  316. rpm, rps := reqlimit.GetChannelModelRequest(context.Background(), strconv.Itoa(meta.Channel.ID), meta.OriginModel)
  317. rate.RPM = rpm
  318. rate.RPS = rps
  319. updateChannelModelRequestRate(c, meta, rpm, rps)
  320. }
  321. if tpm, ok := meta.Get(MetaChannelModelKeyTPM); ok {
  322. rate.TPM, _ = tpm.(int64)
  323. rate.TPS = meta.GetInt64(MetaChannelModelKeyTPS)
  324. } else {
  325. tpm, tps := reqlimit.GetChannelModelTokensRequest(context.Background(), strconv.Itoa(meta.Channel.ID), meta.OriginModel)
  326. rate.TPM = tpm
  327. rate.TPS = tps
  328. updateChannelModelTokensRequestRate(c, meta, tpm, tps)
  329. }
  330. return rate
  331. }
  332. func updateChannelModelRequestRate(c *gin.Context, meta *meta.Meta, rpm, rps int64) {
  333. meta.Set(MetaChannelModelKeyRPM, rpm)
  334. meta.Set(MetaChannelModelKeyRPS, rps)
  335. log := common.GetLogger(c)
  336. log.Data["ch_rpm"] = rpm
  337. log.Data["ch_rps"] = rps
  338. }
  339. func updateChannelModelTokensRequestRate(c *gin.Context, meta *meta.Meta, tpm, tps int64) {
  340. meta.Set(MetaChannelModelKeyTPM, tpm)
  341. meta.Set(MetaChannelModelKeyTPS, tps)
  342. log := common.GetLogger(c)
  343. log.Data["ch_tpm"] = tpm
  344. log.Data["ch_tps"] = tps
  345. }