Просмотр исходного кода

🔧 fix(stream_scanner): improve resource management and error handling in StreamScannerHandler

CaIon 6 месяцев назад
Родитель
Сommit
f124404f07
2 измененных файлов с 130 добавлено и 27 удалено
  1. 128 25
      relay/helper/stream_scanner.go
  2. 2 2
      service/token_counter.go

+ 128 - 25
relay/helper/stream_scanner.go

@@ -3,6 +3,7 @@ package helper
 import (
 	"bufio"
 	"context"
+	"fmt"
 	"io"
 	"net/http"
 	"one-api/common"
@@ -19,8 +20,8 @@ import (
 )
 
 const (
-	InitialScannerBufferSize = 1 << 20  // 1MB (1*1024*1024)
-	MaxScannerBufferSize     = 10 << 20 // 10MB (10*1024*1024)
+	InitialScannerBufferSize = 64 << 10  // 64KB (64*1024)
+	MaxScannerBufferSize     = 10 << 20  // 10MB (10*1024*1024)
 	DefaultPingInterval      = 10 * time.Second
 )
 
@@ -30,7 +31,12 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 		return
 	}
 
-	defer resp.Body.Close()
+	// 确保响应体总是被关闭
+	defer func() {
+		if resp.Body != nil {
+			resp.Body.Close()
+		}
+	}()
 
 	streamingTimeout := time.Duration(constant.StreamingTimeout) * time.Second
 	if strings.HasPrefix(info.UpstreamModelName, "o") {
@@ -39,11 +45,12 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 	}
 
 	var (
-		stopChan   = make(chan bool, 2)
+		stopChan   = make(chan bool, 3) // 增加缓冲区避免阻塞
 		scanner    = bufio.NewScanner(resp.Body)
 		ticker     = time.NewTicker(streamingTimeout)
 		pingTicker *time.Ticker
 		writeMutex sync.Mutex // Mutex to protect concurrent writes
+		wg         sync.WaitGroup // 用于等待所有 goroutine 退出
 	)
 
 	generalSettings := operation_setting.GetGeneralSetting()
@@ -57,13 +64,32 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 		pingTicker = time.NewTicker(pingInterval)
 	}
 
+	// 改进资源清理,确保所有 goroutine 正确退出
 	defer func() {
+		// 通知所有 goroutine 停止
+		common.SafeSendBool(stopChan, true)
+		
 		ticker.Stop()
 		if pingTicker != nil {
 			pingTicker.Stop()
 		}
+		
+		// 等待所有 goroutine 退出,最多等待5秒
+		done := make(chan struct{})
+		go func() {
+			wg.Wait()
+			close(done)
+		}()
+		
+		select {
+		case <-done:
+		case <-time.After(5 * time.Second):
+			common.LogError(c, "timeout waiting for goroutines to exit")
+		}
+		
 		close(stopChan)
 	}()
+	
 	scanner.Buffer(make([]byte, InitialScannerBufferSize), MaxScannerBufferSize)
 	scanner.Split(bufio.ScanLines)
 	SetEventStreamHeaders(c)
@@ -73,35 +99,95 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 
 	ctx = context.WithValue(ctx, "stop_chan", stopChan)
 
-	// Handle ping data sending
+	// Handle ping data sending with improved error handling
 	if pingEnabled && pingTicker != nil {
+		wg.Add(1)
 		gopool.Go(func() {
+			defer func() {
+				wg.Done()
+				if r := recover(); r != nil {
+					common.LogError(c, fmt.Sprintf("ping goroutine panic: %v", r))
+					common.SafeSendBool(stopChan, true)
+				}
+				if common.DebugEnabled {
+					println("ping goroutine exited")
+				}
+			}()
+			
+			// 添加超时保护,防止 goroutine 无限运行
+			maxPingDuration := 30 * time.Minute // 最大 ping 持续时间
+			pingTimeout := time.NewTimer(maxPingDuration)
+			defer pingTimeout.Stop()
+			
 			for {
 				select {
 				case <-pingTicker.C:
-					writeMutex.Lock() // Lock before writing
-					err := PingData(c)
-					writeMutex.Unlock() // Unlock after writing
-					if err != nil {
-						common.LogError(c, "ping data error: "+err.Error())
-						common.SafeSendBool(stopChan, true)
+					// 使用超时机制防止写操作阻塞
+					done := make(chan error, 1)
+					go func() {
+						writeMutex.Lock()
+						defer writeMutex.Unlock()
+						done <- PingData(c)
+					}()
+					
+					select {
+					case err := <-done:
+						if err != nil {
+							common.LogError(c, "ping data error: "+err.Error())
+							return
+						}
+						if common.DebugEnabled {
+							println("ping data sent")
+						}
+					case <-time.After(10 * time.Second):
+						common.LogError(c, "ping data send timeout")
+						return
+					case <-ctx.Done():
+						return
+					case <-stopChan:
 						return
-					}
-					if common.DebugEnabled {
-						println("ping data sent")
 					}
 				case <-ctx.Done():
-					if common.DebugEnabled {
-						println("ping data goroutine stopped")
-					}
+					return
+				case <-stopChan:
+					return
+				case <-c.Request.Context().Done():
+					// 监听客户端断开连接
+					return
+				case <-pingTimeout.C:
+					common.LogError(c, "ping goroutine max duration reached")
 					return
 				}
 			}
 		})
 	}
 
+	// Scanner goroutine with improved error handling
+	wg.Add(1)
 	common.RelayCtxGo(ctx, func() {
+		defer func() {
+			wg.Done()
+			if r := recover(); r != nil {
+				common.LogError(c, fmt.Sprintf("scanner goroutine panic: %v", r))
+			}
+			common.SafeSendBool(stopChan, true)
+			if common.DebugEnabled {
+				println("scanner goroutine exited")
+			}
+		}()
+		
 		for scanner.Scan() {
+			// 检查是否需要停止
+			select {
+			case <-stopChan:
+				return
+			case <-ctx.Done():
+				return
+			case <-c.Request.Context().Done():
+				return
+			default:
+			}
+			
 			ticker.Reset(streamingTimeout)
 			data := scanner.Text()
 			if common.DebugEnabled {
@@ -119,11 +205,27 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 			data = strings.TrimSuffix(data, "\r")
 			if !strings.HasPrefix(data, "[DONE]") {
 				info.SetFirstResponseTime()
-				writeMutex.Lock() // Lock before writing
-				success := dataHandler(data)
-				writeMutex.Unlock() // Unlock after writing
-				if !success {
-					break
+				
+				// 使用超时机制防止写操作阻塞
+				done := make(chan bool, 1)
+				go func() {
+					writeMutex.Lock()
+					defer writeMutex.Unlock()
+					done <- dataHandler(data)
+				}()
+				
+				select {
+				case success := <-done:
+					if !success {
+						return
+					}
+				case <-time.After(10 * time.Second):
+					common.LogError(c, "data handler timeout")
+					return
+				case <-ctx.Done():
+					return
+				case <-stopChan:
+					return
 				}
 			}
 		}
@@ -133,17 +235,18 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 				common.LogError(c, "scanner error: "+err.Error())
 			}
 		}
-
-		common.SafeSendBool(stopChan, true)
 	})
 
+	// 主循环等待完成或超时
 	select {
 	case <-ticker.C:
 		// 超时处理逻辑
 		common.LogError(c, "streaming timeout")
-		common.SafeSendBool(stopChan, true)
 	case <-stopChan:
 		// 正常结束
 		common.LogInfo(c, "streaming finished")
+	case <-c.Request.Context().Done():
+		// 客户端断开连接
+		common.LogInfo(c, "client disconnected")
 	}
 }

+ 2 - 2
service/token_counter.go

@@ -68,8 +68,8 @@ func getTokenNum(tokenEncoder tokenizer.Codec, text string) int {
 	if text == "" {
 		return 0
 	}
-	ids, _, _ := tokenEncoder.Encode(text)
-	return len(ids)
+	tkm, _ := tokenEncoder.Count(text)
+	return tkm
 }
 
 func getImageToken(info *relaycommon.RelayInfo, imageUrl *dto.MessageImageUrl, model string, stream bool) (int, error) {