CrawlerHandler.cs 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. using Masuit.MyBlogs.Core.Common;
  2. using Masuit.Tools.Mime;
  3. using Masuit.Tools.Logging;
  4. using System.Diagnostics;
  5. using System.Net;
  6. using System.Text.RegularExpressions;
  7. namespace Masuit.MyBlogs.Core.Extensions.UEditor;
  8. /// <summary>
  9. /// Crawler 的摘要说明
  10. /// </summary>
  11. public class CrawlerHandler : Handler
  12. {
  13. private readonly HttpClient _httpClient;
  14. private readonly IConfiguration _configuration;
  15. public CrawlerHandler(HttpContext context) : base(context)
  16. {
  17. _httpClient = context.RequestServices.GetRequiredService<IHttpClientFactory>().CreateClient();
  18. _configuration = context.RequestServices.GetRequiredService<IConfiguration>();
  19. }
  20. public override async Task<string> Process()
  21. {
  22. var form = await Request.ReadFormAsync();
  23. string[] sources = form["source[]"];
  24. if (sources?.Length > 0 || sources?.Length <= 10)
  25. {
  26. using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
  27. return WriteJson(new
  28. {
  29. state = "SUCCESS",
  30. list = (await sources.SelectAsync(s =>
  31. {
  32. return new Crawler(s, _httpClient, _configuration, Context).Fetch(cts.Token).ContinueWith(t => new
  33. {
  34. state = t.Result.State,
  35. source = t.Result.SourceUrl,
  36. url = t.Result.ServerUrl
  37. });
  38. }))
  39. });
  40. }
  41. return WriteJson(new
  42. {
  43. state = "参数错误:没有指定抓取源"
  44. });
  45. }
  46. }
  47. public class Crawler
  48. {
  49. public string SourceUrl { get; set; }
  50. public string ServerUrl { get; set; }
  51. public string State { get; set; }
  52. private readonly HttpClient _httpClient;
  53. private readonly IConfiguration _configuration;
  54. private readonly HttpContext _httpContext;
  55. public Crawler(string sourceUrl, HttpClient httpClient, IConfiguration configuration, HttpContext httpContext)
  56. {
  57. SourceUrl = sourceUrl;
  58. _httpClient = httpClient;
  59. _configuration = configuration;
  60. _httpContext = httpContext;
  61. }
  62. public async Task<Crawler> Fetch(CancellationToken token)
  63. {
  64. if (!SourceUrl.IsExternalAddress())
  65. {
  66. State = "INVALID_URL";
  67. return this;
  68. }
  69. try
  70. {
  71. _httpClient.DefaultRequestHeaders.Referrer = new Uri(SourceUrl);
  72. using var response = await _httpClient.GetAsync(_configuration["HttpClientProxy:UriPrefix"] + SourceUrl);
  73. if (response.StatusCode != HttpStatusCode.OK)
  74. {
  75. State = "远程地址返回了错误的状态吗:" + response.StatusCode;
  76. return this;
  77. }
  78. ServerUrl = PathFormatter.Format(Path.GetFileNameWithoutExtension(SourceUrl), CommonHelper.SystemSettings.GetOrAdd("UploadPath", "upload") + UeditorConfig.GetString("catcherPathFormat")) + MimeMapper.ExtTypes[response.Content.Headers.ContentType?.MediaType ?? "image/jpeg"];
  79. var stream = await response.Content.ReadAsStreamAsync();
  80. var format = await Image.DetectFormatAsync(stream).ContinueWith(t => t.IsCompletedSuccessfully ? t.Result : null);
  81. stream.Position = 0;
  82. if (format != null)
  83. {
  84. ServerUrl = ServerUrl.Replace(Path.GetExtension(ServerUrl), "." + format.Name.ToLower());
  85. if (!Regex.IsMatch(format.Name, "JPEG|PNG|Webp|GIF", RegexOptions.IgnoreCase))
  86. {
  87. using var image = await Image.LoadAsync(stream);
  88. var memoryStream = new PooledMemoryStream();
  89. await image.SaveAsJpegAsync(memoryStream);
  90. await stream.DisposeAsync();
  91. stream = memoryStream;
  92. ServerUrl = ServerUrl.Replace(Path.GetExtension(ServerUrl), ".jpg");
  93. }
  94. }
  95. var savePath = AppContext.BaseDirectory + "wwwroot" + ServerUrl;
  96. var (url, success) = await _httpContext.RequestServices.GetRequiredService<ImagebedClient>().UploadImage(stream, savePath, token);
  97. if (success)
  98. {
  99. ServerUrl = url;
  100. }
  101. else
  102. {
  103. Directory.CreateDirectory(Path.GetDirectoryName(savePath));
  104. await File.WriteAllBytesAsync(savePath, await stream.ToArrayAsync());
  105. }
  106. await stream.DisposeAsync();
  107. State = "SUCCESS";
  108. }
  109. catch (Exception e)
  110. {
  111. State = "抓取错误:" + e.Message;
  112. LogManager.Error(e.Demystify());
  113. }
  114. return this;
  115. }
  116. }