浏览代码

改进正则匹配img src

懒得勤快 7 年之前
父节点
当前提交
398e700a05

+ 5 - 4
Masuit.Tools.Core/Html/HtmlHelper.cs

@@ -432,20 +432,21 @@ namespace Masuit.Tools.Core.Html
             return Regex.Replace(s, @"<img src=""(http:\/\/.+?)/", @"<img src=""/");
         }
 
+        private static readonly Regex ImgRegex = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<src>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>");
 
         /// <summary>
         /// 匹配html的所有img标签集合
         /// </summary>
         /// <param name="html"></param>
         /// <returns></returns>
-        public static MatchCollection MatchImgTags(this string html) => Regex.Matches(html, @"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))");
+        public static MatchCollection MatchImgTags(this string html) => ImgRegex.Matches(html);
 
         /// <summary>
         /// 匹配html的一个img标签
         /// </summary>
         /// <param name="html"></param>
         /// <returns></returns>
-        public static Match MatchImgTag(this string html) => Regex.Match(html, @"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))");
+        public static Match MatchImgTag(this string html) => ImgRegex.Match(html);
 
         /// <summary>
         /// 获取html中第一个img标签的src
@@ -454,7 +455,7 @@ namespace Masuit.Tools.Core.Html
         /// <returns></returns>
         public static string MatchFirstImgSrc(this string html)
         {
-            string src = Regex.Match(html, @"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))").Groups["src"].Value;
+            string src = ImgRegex.Match(html).Groups["src"].Value;
             int index = src.IndexOf("\"", StringComparison.Ordinal);
             if (index > 0)
             {
@@ -470,7 +471,7 @@ namespace Masuit.Tools.Core.Html
         /// <returns></returns>
         public static string MatchRandomImgSrc(this string html)
         {
-            var collection = Regex.Matches(html, @"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))");
+            var collection = ImgRegex.Matches(html);
             if (collection.Count > 0)
             {
                 string src = collection[new Random().StrictNext(collection.Count)].Groups["src"].Value;

+ 1 - 1
Masuit.Tools.Core/Masuit.Tools.Core.csproj

@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <Version>2.1.3</Version>
+    <Version>2.1.3.1</Version>
     <Authors>懒得勤快</Authors>
     <Company>masuit.com</Company>
     <Description>包含一些常用的操作类,大都是静态类,加密解密,反射操作,硬件信息,字符串扩展方法,日期时间扩展操作,大文件拷贝,图像裁剪,html处理,验证码、NoSql等常用封装。

+ 5 - 4
Masuit.Tools/Html/HtmlHelper.cs

@@ -432,20 +432,21 @@ namespace Masuit.Tools.Html
             return Regex.Replace(s, @"<img src=""(http:\/\/.+?)/", @"<img src=""/");
         }
 
+        private static readonly Regex ImgRegex = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<src>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>");
 
         /// <summary>
         /// 匹配html的所有img标签集合
         /// </summary>
         /// <param name="html"></param>
         /// <returns></returns>
-        public static MatchCollection MatchImgTags(this string html) => Regex.Matches(html, @"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))");
+        public static MatchCollection MatchImgTags(this string html) => ImgRegex.Matches(html);
 
         /// <summary>
         /// 匹配html的一个img标签
         /// </summary>
         /// <param name="html"></param>
         /// <returns></returns>
-        public static Match MatchImgTag(this string html) => Regex.Match(html, @"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))");
+        public static Match MatchImgTag(this string html) => ImgRegex.Match(html);
 
         /// <summary>
         /// 获取html中第一个img标签的src
@@ -454,7 +455,7 @@ namespace Masuit.Tools.Html
         /// <returns></returns>
         public static string MatchFirstImgSrc(this string html)
         {
-            string src = Regex.Match(html, @"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))").Groups["src"].Value;
+            string src = ImgRegex.Match(html).Groups["src"].Value;
             int index = src.IndexOf("\"", StringComparison.Ordinal);
             if (index > 0)
             {
@@ -470,7 +471,7 @@ namespace Masuit.Tools.Html
         /// <returns></returns>
         public static string MatchRandomImgSrc(this string html)
         {
-            var collection = Regex.Matches(html, @"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))");
+            var collection = ImgRegex.Matches(html);
             if (collection.Count > 0)
             {
                 string src = collection[new Random().StrictNext(collection.Count)].Groups["src"].Value;

+ 2 - 2
Masuit.Tools/Properties/AssemblyInfo.cs

@@ -36,7 +36,7 @@ using System.Runtime.InteropServices;
 // 方法是按如下所示使用“*”: :
 // [assembly: AssemblyVersion("1.0.*")]
 
-[assembly: AssemblyVersion("2.1.3.0")]
-[assembly: AssemblyFileVersion("2.1.3.0")]
+[assembly: AssemblyVersion("2.1.3.1")]
+[assembly: AssemblyFileVersion("2.1.3.1")]
 [assembly: NeutralResourcesLanguage("zh-CN")]