file_service.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. package service
  2. import (
  3. "bytes"
  4. "encoding/base64"
  5. "fmt"
  6. "image"
  7. _ "image/gif"
  8. _ "image/jpeg"
  9. _ "image/png"
  10. "io"
  11. "net/http"
  12. "strings"
  13. "github.com/QuantumNous/new-api/common"
  14. "github.com/QuantumNous/new-api/constant"
  15. "github.com/QuantumNous/new-api/logger"
  16. "github.com/QuantumNous/new-api/types"
  17. "github.com/gin-gonic/gin"
  18. "golang.org/x/image/webp"
  19. )
  20. // FileService 统一的文件处理服务
  21. // 提供文件下载、解码、缓存等功能的统一入口
  22. // getContextCacheKey 生成 context 缓存的 key
  23. func getContextCacheKey(url string) string {
  24. return fmt.Sprintf("file_cache_%s", common.GenerateHMAC(url))
  25. }
  26. // LoadFileSource 加载文件源数据
  27. // 这是统一的入口,会自动处理缓存和不同的来源类型
  28. func LoadFileSource(c *gin.Context, source *types.FileSource, reason ...string) (*types.CachedFileData, error) {
  29. if source == nil {
  30. return nil, fmt.Errorf("file source is nil")
  31. }
  32. if common.DebugEnabled {
  33. logger.LogDebug(c, fmt.Sprintf("LoadFileSource starting for: %s", source.GetIdentifier()))
  34. }
  35. // 1. 快速检查内部缓存
  36. if source.HasCache() {
  37. // 即使命中内部缓存,也要确保注册到清理列表(如果尚未注册)
  38. if c != nil {
  39. registerSourceForCleanup(c, source)
  40. }
  41. return source.GetCache(), nil
  42. }
  43. // 2. 加锁保护加载过程
  44. source.Mu().Lock()
  45. defer source.Mu().Unlock()
  46. // 3. 双重检查
  47. if source.HasCache() {
  48. if c != nil {
  49. registerSourceForCleanup(c, source)
  50. }
  51. return source.GetCache(), nil
  52. }
  53. // 4. 如果是 URL,检查 Context 缓存
  54. var contextKey string
  55. if source.IsURL() && c != nil {
  56. contextKey = getContextCacheKey(source.URL)
  57. if cachedData, exists := c.Get(contextKey); exists {
  58. data := cachedData.(*types.CachedFileData)
  59. source.SetCache(data)
  60. registerSourceForCleanup(c, source)
  61. return data, nil
  62. }
  63. }
  64. // 5. 执行加载逻辑
  65. var cachedData *types.CachedFileData
  66. var err error
  67. if source.IsURL() {
  68. cachedData, err = loadFromURL(c, source.URL, reason...)
  69. } else {
  70. cachedData, err = loadFromBase64(source.Base64Data, source.MimeType)
  71. }
  72. if err != nil {
  73. return nil, err
  74. }
  75. // 6. 设置缓存
  76. source.SetCache(cachedData)
  77. if contextKey != "" && c != nil {
  78. c.Set(contextKey, cachedData)
  79. }
  80. // 7. 注册到 context 以便请求结束时自动清理
  81. if c != nil {
  82. registerSourceForCleanup(c, source)
  83. }
  84. return cachedData, nil
  85. }
  86. // registerSourceForCleanup 注册 FileSource 到 context 以便请求结束时清理
  87. func registerSourceForCleanup(c *gin.Context, source *types.FileSource) {
  88. if source.IsRegistered() {
  89. return
  90. }
  91. key := string(constant.ContextKeyFileSourcesToCleanup)
  92. var sources []*types.FileSource
  93. if existing, exists := c.Get(key); exists {
  94. sources = existing.([]*types.FileSource)
  95. }
  96. sources = append(sources, source)
  97. c.Set(key, sources)
  98. source.SetRegistered(true)
  99. }
  100. // CleanupFileSources 清理请求中所有注册的 FileSource
  101. // 应在请求结束时调用(通常由中间件自动调用)
  102. func CleanupFileSources(c *gin.Context) {
  103. key := string(constant.ContextKeyFileSourcesToCleanup)
  104. if sources, exists := c.Get(key); exists {
  105. for _, source := range sources.([]*types.FileSource) {
  106. if cache := source.GetCache(); cache != nil {
  107. cache.Close()
  108. }
  109. }
  110. c.Set(key, nil) // 清除引用
  111. }
  112. }
  113. // loadFromURL 从 URL 加载文件
  114. func loadFromURL(c *gin.Context, url string, reason ...string) (*types.CachedFileData, error) {
  115. // 下载文件
  116. var maxFileSize = constant.MaxFileDownloadMB * 1024 * 1024
  117. if common.DebugEnabled {
  118. logger.LogDebug(c, "loadFromURL: initiating download")
  119. }
  120. resp, err := DoDownloadRequest(url, reason...)
  121. if err != nil {
  122. return nil, fmt.Errorf("failed to download file from %s: %w", url, err)
  123. }
  124. defer resp.Body.Close()
  125. if resp.StatusCode != 200 {
  126. return nil, fmt.Errorf("failed to download file, status code: %d", resp.StatusCode)
  127. }
  128. // 读取文件内容(限制大小)
  129. if common.DebugEnabled {
  130. logger.LogDebug(c, "loadFromURL: reading response body")
  131. }
  132. fileBytes, err := io.ReadAll(io.LimitReader(resp.Body, int64(maxFileSize+1)))
  133. if err != nil {
  134. return nil, fmt.Errorf("failed to read file content: %w", err)
  135. }
  136. if len(fileBytes) > maxFileSize {
  137. return nil, fmt.Errorf("file size exceeds maximum allowed size: %dMB", constant.MaxFileDownloadMB)
  138. }
  139. // 转换为 base64
  140. base64Data := base64.StdEncoding.EncodeToString(fileBytes)
  141. // 智能获取 MIME 类型
  142. mimeType := smartDetectMimeType(resp, url, fileBytes)
  143. // 判断是否使用磁盘缓存
  144. base64Size := int64(len(base64Data))
  145. var cachedData *types.CachedFileData
  146. if shouldUseDiskCache(base64Size) {
  147. // 使用磁盘缓存
  148. diskPath, err := writeToDiskCache(base64Data)
  149. if err != nil {
  150. // 磁盘缓存失败,回退到内存
  151. logger.LogWarn(c, fmt.Sprintf("Failed to write to disk cache, falling back to memory: %v", err))
  152. cachedData = types.NewMemoryCachedData(base64Data, mimeType, int64(len(fileBytes)))
  153. } else {
  154. cachedData = types.NewDiskCachedData(diskPath, mimeType, int64(len(fileBytes)))
  155. cachedData.DiskSize = base64Size
  156. cachedData.OnClose = func(size int64) {
  157. common.DecrementDiskFiles(size)
  158. }
  159. common.IncrementDiskFiles(base64Size)
  160. if common.DebugEnabled {
  161. logger.LogDebug(c, fmt.Sprintf("File cached to disk: %s, size: %d bytes", diskPath, base64Size))
  162. }
  163. }
  164. } else {
  165. // 使用内存缓存
  166. cachedData = types.NewMemoryCachedData(base64Data, mimeType, int64(len(fileBytes)))
  167. }
  168. // 如果是图片,尝试获取图片配置
  169. if strings.HasPrefix(mimeType, "image/") {
  170. if common.DebugEnabled {
  171. logger.LogDebug(c, "loadFromURL: decoding image config")
  172. }
  173. config, format, err := decodeImageConfig(fileBytes)
  174. if err == nil {
  175. cachedData.ImageConfig = &config
  176. cachedData.ImageFormat = format
  177. // 如果通过图片解码获取了更准确的格式,更新 MIME 类型
  178. if mimeType == "application/octet-stream" || mimeType == "" {
  179. cachedData.MimeType = "image/" + format
  180. }
  181. }
  182. }
  183. return cachedData, nil
  184. }
  185. // shouldUseDiskCache 判断是否应该使用磁盘缓存
  186. func shouldUseDiskCache(dataSize int64) bool {
  187. return common.ShouldUseDiskCache(dataSize)
  188. }
  189. // writeToDiskCache 将数据写入磁盘缓存
  190. func writeToDiskCache(base64Data string) (string, error) {
  191. return common.WriteDiskCacheFileString(common.DiskCacheTypeFile, base64Data)
  192. }
  193. // smartDetectMimeType 智能检测 MIME 类型
  194. func smartDetectMimeType(resp *http.Response, url string, fileBytes []byte) string {
  195. // 1. 尝试从 Content-Type header 获取
  196. mimeType := resp.Header.Get("Content-Type")
  197. if idx := strings.Index(mimeType, ";"); idx != -1 {
  198. mimeType = strings.TrimSpace(mimeType[:idx])
  199. }
  200. if mimeType != "" && mimeType != "application/octet-stream" {
  201. return mimeType
  202. }
  203. // 2. 尝试从 Content-Disposition header 的 filename 获取
  204. if cd := resp.Header.Get("Content-Disposition"); cd != "" {
  205. parts := strings.Split(cd, ";")
  206. for _, part := range parts {
  207. part = strings.TrimSpace(part)
  208. if strings.HasPrefix(strings.ToLower(part), "filename=") {
  209. name := strings.TrimSpace(strings.TrimPrefix(part, "filename="))
  210. // 移除引号
  211. if len(name) > 2 && name[0] == '"' && name[len(name)-1] == '"' {
  212. name = name[1 : len(name)-1]
  213. }
  214. if dot := strings.LastIndex(name, "."); dot != -1 && dot+1 < len(name) {
  215. ext := strings.ToLower(name[dot+1:])
  216. if ext != "" {
  217. mt := GetMimeTypeByExtension(ext)
  218. if mt != "application/octet-stream" {
  219. return mt
  220. }
  221. }
  222. }
  223. break
  224. }
  225. }
  226. }
  227. // 3. 尝试从 URL 路径获取扩展名
  228. mt := guessMimeTypeFromURL(url)
  229. if mt != "application/octet-stream" {
  230. return mt
  231. }
  232. // 4. 使用 http.DetectContentType 内容嗅探
  233. if len(fileBytes) > 0 {
  234. sniffed := http.DetectContentType(fileBytes)
  235. if sniffed != "" && sniffed != "application/octet-stream" {
  236. // 去除可能的 charset 参数
  237. if idx := strings.Index(sniffed, ";"); idx != -1 {
  238. sniffed = strings.TrimSpace(sniffed[:idx])
  239. }
  240. return sniffed
  241. }
  242. }
  243. // 5. 尝试作为图片解码获取格式
  244. if len(fileBytes) > 0 {
  245. if _, format, err := decodeImageConfig(fileBytes); err == nil && format != "" {
  246. return "image/" + strings.ToLower(format)
  247. }
  248. }
  249. // 最终回退
  250. return "application/octet-stream"
  251. }
  252. // loadFromBase64 从 base64 字符串加载文件
  253. func loadFromBase64(base64String string, providedMimeType string) (*types.CachedFileData, error) {
  254. var mimeType string
  255. var cleanBase64 string
  256. // 处理 data: 前缀
  257. if strings.HasPrefix(base64String, "data:") {
  258. idx := strings.Index(base64String, ",")
  259. if idx != -1 {
  260. header := base64String[:idx]
  261. cleanBase64 = base64String[idx+1:]
  262. if strings.Contains(header, ":") && strings.Contains(header, ";") {
  263. mimeStart := strings.Index(header, ":") + 1
  264. mimeEnd := strings.Index(header, ";")
  265. if mimeStart < mimeEnd {
  266. mimeType = header[mimeStart:mimeEnd]
  267. }
  268. }
  269. } else {
  270. cleanBase64 = base64String
  271. }
  272. } else {
  273. cleanBase64 = base64String
  274. }
  275. if providedMimeType != "" {
  276. mimeType = providedMimeType
  277. }
  278. decodedData, err := base64.StdEncoding.DecodeString(cleanBase64)
  279. if err != nil {
  280. return nil, fmt.Errorf("failed to decode base64 data: %w", err)
  281. }
  282. base64Size := int64(len(cleanBase64))
  283. var cachedData *types.CachedFileData
  284. if shouldUseDiskCache(base64Size) {
  285. diskPath, err := writeToDiskCache(cleanBase64)
  286. if err != nil {
  287. cachedData = types.NewMemoryCachedData(cleanBase64, mimeType, int64(len(decodedData)))
  288. } else {
  289. cachedData = types.NewDiskCachedData(diskPath, mimeType, int64(len(decodedData)))
  290. cachedData.DiskSize = base64Size
  291. cachedData.OnClose = func(size int64) {
  292. common.DecrementDiskFiles(size)
  293. }
  294. common.IncrementDiskFiles(base64Size)
  295. }
  296. } else {
  297. cachedData = types.NewMemoryCachedData(cleanBase64, mimeType, int64(len(decodedData)))
  298. }
  299. if mimeType == "" || strings.HasPrefix(mimeType, "image/") {
  300. config, format, err := decodeImageConfig(decodedData)
  301. if err == nil {
  302. cachedData.ImageConfig = &config
  303. cachedData.ImageFormat = format
  304. if mimeType == "" {
  305. cachedData.MimeType = "image/" + format
  306. }
  307. }
  308. }
  309. return cachedData, nil
  310. }
  311. // GetImageConfig 获取图片配置
  312. func GetImageConfig(c *gin.Context, source *types.FileSource) (image.Config, string, error) {
  313. cachedData, err := LoadFileSource(c, source, "get_image_config")
  314. if err != nil {
  315. return image.Config{}, "", err
  316. }
  317. if cachedData.ImageConfig != nil {
  318. return *cachedData.ImageConfig, cachedData.ImageFormat, nil
  319. }
  320. base64Str, err := cachedData.GetBase64Data()
  321. if err != nil {
  322. return image.Config{}, "", fmt.Errorf("failed to get base64 data: %w", err)
  323. }
  324. decodedData, err := base64.StdEncoding.DecodeString(base64Str)
  325. if err != nil {
  326. return image.Config{}, "", fmt.Errorf("failed to decode base64 for image config: %w", err)
  327. }
  328. config, format, err := decodeImageConfig(decodedData)
  329. if err != nil {
  330. return image.Config{}, "", err
  331. }
  332. cachedData.ImageConfig = &config
  333. cachedData.ImageFormat = format
  334. return config, format, nil
  335. }
  336. // GetBase64Data 获取 base64 编码的数据
  337. func GetBase64Data(c *gin.Context, source *types.FileSource, reason ...string) (string, string, error) {
  338. cachedData, err := LoadFileSource(c, source, reason...)
  339. if err != nil {
  340. return "", "", err
  341. }
  342. base64Str, err := cachedData.GetBase64Data()
  343. if err != nil {
  344. return "", "", fmt.Errorf("failed to get base64 data: %w", err)
  345. }
  346. return base64Str, cachedData.MimeType, nil
  347. }
  348. // GetMimeType 获取文件的 MIME 类型
  349. func GetMimeType(c *gin.Context, source *types.FileSource) (string, error) {
  350. if source.HasCache() {
  351. return source.GetCache().MimeType, nil
  352. }
  353. if source.IsURL() {
  354. mimeType, err := GetFileTypeFromUrl(c, source.URL, "get_mime_type")
  355. if err == nil && mimeType != "" && mimeType != "application/octet-stream" {
  356. return mimeType, nil
  357. }
  358. }
  359. cachedData, err := LoadFileSource(c, source, "get_mime_type")
  360. if err != nil {
  361. return "", err
  362. }
  363. return cachedData.MimeType, nil
  364. }
  365. // DetectFileType 检测文件类型
  366. func DetectFileType(mimeType string) types.FileType {
  367. if strings.HasPrefix(mimeType, "image/") {
  368. return types.FileTypeImage
  369. }
  370. if strings.HasPrefix(mimeType, "audio/") {
  371. return types.FileTypeAudio
  372. }
  373. if strings.HasPrefix(mimeType, "video/") {
  374. return types.FileTypeVideo
  375. }
  376. return types.FileTypeFile
  377. }
  378. // decodeImageConfig 从字节数据解码图片配置
  379. func decodeImageConfig(data []byte) (image.Config, string, error) {
  380. reader := bytes.NewReader(data)
  381. config, format, err := image.DecodeConfig(reader)
  382. if err == nil {
  383. return config, format, nil
  384. }
  385. reader.Seek(0, io.SeekStart)
  386. config, err = webp.DecodeConfig(reader)
  387. if err == nil {
  388. return config, "webp", nil
  389. }
  390. return image.Config{}, "", fmt.Errorf("failed to decode image config: unsupported format")
  391. }
  392. // guessMimeTypeFromURL 从 URL 猜测 MIME 类型
  393. func guessMimeTypeFromURL(url string) string {
  394. cleanedURL := url
  395. if q := strings.Index(cleanedURL, "?"); q != -1 {
  396. cleanedURL = cleanedURL[:q]
  397. }
  398. if slash := strings.LastIndex(cleanedURL, "/"); slash != -1 && slash+1 < len(cleanedURL) {
  399. last := cleanedURL[slash+1:]
  400. if dot := strings.LastIndex(last, "."); dot != -1 && dot+1 < len(last) {
  401. ext := strings.ToLower(last[dot+1:])
  402. return GetMimeTypeByExtension(ext)
  403. }
  404. }
  405. return "application/octet-stream"
  406. }