CrawlerHandler.cs 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. using Masuit.Tools.Mime;
  2. using System.Net;
  3. using System.Text.RegularExpressions;
  4. using Masuit.Tools.Files;
  5. using Polly;
  6. namespace Masuit.MyBlogs.Core.Extensions.UEditor;
  7. /// <summary>
  8. /// Crawler 的摘要说明
  9. /// </summary>
  10. public class CrawlerHandler(HttpContext context) : Handler(context)
  11. {
  12. private readonly HttpClient _httpClient = context.RequestServices.GetRequiredService<IHttpClientFactory>().CreateClient();
  13. private readonly IConfiguration _configuration = context.RequestServices.GetRequiredService<IConfiguration>();
  14. public override async Task<string> Process()
  15. {
  16. var form = await Request.ReadFormAsync();
  17. string[] sources = form["source[]"];
  18. if (sources is { Length: > 0 })
  19. {
  20. using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(sources.Length * 2));
  21. return WriteJson(new
  22. {
  23. state = "SUCCESS",
  24. list = await sources.SelectAsync(async s =>
  25. {
  26. var crawler = new Crawler(s, _httpClient, _configuration, Context);
  27. var fetch = await Policy<Crawler>.Handle<ObjectDisposedException>().RetryAsync(3).WrapAsync(Policy<Crawler>.Handle<ObjectDisposedException>().FallbackAsync(crawler)).ExecuteAsync(() => crawler.Fetch(cts.Token));
  28. return new
  29. {
  30. state = fetch.State,
  31. source = fetch.SourceUrl,
  32. url = fetch.ServerUrl
  33. };
  34. })
  35. });
  36. }
  37. return WriteJson(new
  38. {
  39. state = "参数错误:没有指定抓取源"
  40. });
  41. }
  42. }
  43. public class Crawler(string sourceUrl, HttpClient httpClient, IConfiguration configuration, HttpContext httpContext)
  44. {
  45. public string SourceUrl { get; set; } = sourceUrl;
  46. public string ServerUrl { get; set; }
  47. public string State { get; set; }
  48. public async Task<Crawler> Fetch(CancellationToken token)
  49. {
  50. if (!SourceUrl.IsExternalAddress())
  51. {
  52. State = "INVALID_URL";
  53. return this;
  54. }
  55. httpClient.DefaultRequestHeaders.Referrer = new Uri(SourceUrl);
  56. var stream = await httpClient.GetAsync(configuration["HttpClientProxy:UriPrefix"] + SourceUrl, token).ContinueWith(task =>
  57. {
  58. if (task.IsCompletedSuccessfully)
  59. {
  60. var response = task.Result;
  61. if (response.StatusCode != HttpStatusCode.OK)
  62. {
  63. State = "远程地址返回了错误的状态吗:" + response.StatusCode;
  64. return new PooledMemoryStream();
  65. }
  66. var fileName = Path.GetFileNameWithoutExtension(SourceUrl).Next(s => Regex.Matches(s, @"\w+").LastOrDefault()?.Value);
  67. ServerUrl = PathFormatter.Format(fileName, CommonHelper.SystemSettings.GetOrAdd("UploadPath", "upload") + UeditorConfig.GetString("catcherPathFormat")) + MimeMapper.ExtTypes[response.Content.Headers.ContentType?.MediaType ?? "image/jpeg"];
  68. return response.Content.ReadAsStreamAsync().Result;
  69. }
  70. State = "远程请求失败";
  71. return new PooledMemoryStream();
  72. });
  73. if (stream.Length == 0)
  74. {
  75. return this;
  76. }
  77. var format = await Image.DetectFormatAsync(stream).ContinueWith(t => t.IsCompletedSuccessfully ? t.Result : null);
  78. stream.Position = 0;
  79. if (format != null)
  80. {
  81. ServerUrl = ServerUrl.Replace(Path.GetExtension(ServerUrl), "." + format.Name.ToLower());
  82. if (!Regex.IsMatch(format.Name, "JPEG|PNG|Webp|GIF", RegexOptions.IgnoreCase))
  83. {
  84. using var image = await Image.LoadAsync(stream, token);
  85. await image.SaveAsJpegAsync(stream, token);
  86. ServerUrl = ServerUrl.Replace(Path.GetExtension(ServerUrl), ".jpg");
  87. }
  88. }
  89. var savePath = AppContext.BaseDirectory + "wwwroot" + ServerUrl;
  90. var (url, success) = await httpContext.RequestServices.GetRequiredService<ImagebedClient>().UploadImage(stream, savePath, token);
  91. if (success)
  92. {
  93. ServerUrl = url;
  94. }
  95. else
  96. {
  97. Directory.CreateDirectory(Path.GetDirectoryName(savePath));
  98. await stream.SaveFileAsync(savePath);
  99. }
  100. State = "SUCCESS";
  101. return this;
  102. }
  103. }