using AngleSharp; using AngleSharp.Dom; using Ganss.Xss; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; namespace Masuit.Tools.Html { /// /// html工具类 /// public static class HtmlTools { /// /// 标准的防止html的xss净化器 /// /// /// public static string HtmlSanitizerStandard(this string html) { var sanitizer = new HtmlSanitizer { KeepChildNodes = true }; sanitizer.AllowedAttributes.Remove("id"); sanitizer.AllowedAttributes.Remove("alt"); sanitizer.AllowedCssProperties.Remove("font-family"); sanitizer.AllowedTags.Remove("input"); sanitizer.AllowedTags.Remove("button"); sanitizer.AllowedTags.Remove("iframe"); sanitizer.AllowedTags.Remove("frame"); sanitizer.AllowedTags.Remove("textarea"); sanitizer.AllowedTags.Remove("select"); sanitizer.AllowedTags.Remove("form"); sanitizer.AllowedAttributes.Add("src"); sanitizer.AllowedAttributes.Add("class"); sanitizer.AllowedAttributes.Add("style"); return sanitizer.Sanitize(html); } /// /// 自定义的防止html的xss净化器 /// /// 源html /// 需要移除的标签集合 /// 需要移除的属性集合 /// 需要移除的样式集合 /// public static string HtmlSanitizerCustom(this string html, string[] labels = null, string[] attributes = null, string[] styles = null) { var sanitizer = new HtmlSanitizer { KeepChildNodes = true }; sanitizer.AllowedAttributes.Remove("id"); sanitizer.AllowedAttributes.Remove("alt"); sanitizer.AllowedCssProperties.Remove("font-family"); sanitizer.AllowedTags.Remove("input"); sanitizer.AllowedTags.Remove("button"); sanitizer.AllowedTags.Remove("iframe"); sanitizer.AllowedTags.Remove("frame"); sanitizer.AllowedTags.Remove("textarea"); sanitizer.AllowedTags.Remove("select"); sanitizer.AllowedTags.Remove("form"); sanitizer.AllowedAttributes.Add("src"); sanitizer.AllowedAttributes.Add("class"); sanitizer.AllowedAttributes.Add("style"); if (labels != null) { foreach (string label in labels) { sanitizer.AllowedTags.Remove(label); } } if (attributes != null) { foreach (string attr in attributes) { sanitizer.AllowedAttributes.Remove(attr); } } if (styles != null) { foreach (string p in styles) { sanitizer.AllowedCssProperties.Remove(p); } } sanitizer.KeepChildNodes = true; return sanitizer.Sanitize(html); } /// /// 去除html标签后并截取字符串 /// /// 源html /// 截取长度 /// public static string RemoveHtmlTag(this string html, int length = 0) { var context = BrowsingContext.New(Configuration.Default); var doc = context.OpenAsync(req => req.Content(html)).Result; var strText = doc.Body.TextContent; if (length > 0 && strText.Length > length) { return strText.Substring(0, length); } return strText; } /// /// 替换html的img路径为绝对路径 /// /// /// /// public static string ReplaceHtmlImgSource(this string html, string imgDest) => html.Replace(" /// 将src的绝对路径换成相对路径 /// /// /// public static string ConvertImgSrcToRelativePath(this string s) { return Regex.Replace(s, @" /// 匹配html的所有img标签集合 /// /// /// public static IHtmlCollection MatchImgTags(this string html) { var context = BrowsingContext.New(Configuration.Default); var doc = context.OpenAsync(req => req.Content(html)).Result; return doc.Body.GetElementsByTagName("img"); } /// /// 匹配html的所有img标签的src集合 /// /// /// public static IEnumerable MatchImgSrcs(this string html) { return MatchImgTags(html).Where(n => n.HasAttribute("src")).Select(n => n.GetAttribute("src")); } /// /// 获取html中第一个img标签的src /// /// /// public static string MatchFirstImgSrc(this string html) { return MatchImgSrcs(html).FirstOrDefault(); } /// /// 随机获取html代码中的img标签的src属性 /// /// /// public static string MatchRandomImgSrc(this string html) { return MatchImgSrcs(html).OrderByRandom().FirstOrDefault(); } /// /// 替换html字符 /// /// html public static string EncodeHtml(this string strHtml) { if (strHtml != "") { return strHtml.Replace(",", "&def").Replace("'", "&dot").Replace(";", "&dec"); } return ""; } } }