using Ganss.XSS; using HtmlAgilityPack; using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; namespace Masuit.Tools.Html { /// /// html工具类 /// public static partial class HtmlTools { private static readonly HtmlSanitizer Sanitizer = new HtmlSanitizer(); static HtmlTools() { Sanitizer.AllowedAttributes.Remove("id"); Sanitizer.AllowedAttributes.Remove("alt"); Sanitizer.AllowedCssProperties.Remove("font-family"); Sanitizer.AllowedCssProperties.Remove("background-color"); Sanitizer.KeepChildNodes = true; Sanitizer.AllowedTags.Remove("input"); Sanitizer.AllowedTags.Remove("button"); Sanitizer.AllowedTags.Remove("iframe"); Sanitizer.AllowedTags.Remove("frame"); Sanitizer.AllowedTags.Remove("textarea"); Sanitizer.AllowedTags.Remove("select"); Sanitizer.AllowedTags.Remove("form"); Sanitizer.AllowedAttributes.Add("src"); Sanitizer.AllowedAttributes.Add("class"); Sanitizer.AllowedAttributes.Add("style"); } /// /// 标准的防止html的xss净化器 /// /// /// public static string HtmlSantinizerStandard(this string html) { return Sanitizer.Sanitize(html); } /// /// 自定义的防止html的xss净化器 /// /// 源html /// 需要移除的标签集合 /// 需要移除的属性集合 /// 需要移除的样式集合 /// public static string HtmlSantinizerCustom(this string html, string[] labels = null, string[] attributes = null, string[] styles = null) { if (labels != null) { foreach (string label in labels) { Sanitizer.AllowedTags.Remove(label); } } if (attributes != null) { foreach (string attr in attributes) { Sanitizer.AllowedAttributes.Remove(attr); } } if (styles != null) { foreach (string p in styles) { Sanitizer.AllowedCssProperties.Remove(p); } } Sanitizer.KeepChildNodes = true; return Sanitizer.Sanitize(html); } /// /// 去除html标签后并截取字符串 /// /// 源html /// 截取长度 /// public static string RemoveHtmlTag(this string html, int length = 0) { var doc = new HtmlDocument(); doc.LoadHtml(html); var strText = doc.DocumentNode.InnerText; if (length > 0 && strText.Length > length) { return strText.Substring(0, length); } return strText; } /// /// 清理Word文档转html后的冗余标签属性 /// /// /// public static string ClearHtml(this string html) { string s = Regex.Match(Regex.Replace(html, @"background-color:#?\w{3,7}|font-family:'?[\w|\(|\)]*'?;?", string.Empty), @"]*>([\s\S]*)<\/body>").Groups[1].Value.Replace(" ", string.Empty); s = Regex.Replace(s, @"\w+-?\w+:0\w+;?", string.Empty); //去除多余的零值属性 s = Regex.Replace(s, "alt=\"(.+?)\"", string.Empty); //除去alt属性 s = Regex.Replace(s, @"-aw.+?\s", string.Empty); //去除Word产生的-aw属性 return s; } /// /// 替换html的img路径为绝对路径 /// /// /// /// public static string ReplaceHtmlImgSource(this string html, string imgDest) => html.Replace(" /// 将src的绝对路径换成相对路径 /// /// /// public static string ConvertImgSrcToRelativePath(this string s) { return Regex.Replace(s, @" /// 匹配html的所有img标签集合 /// /// /// public static IEnumerable MatchImgTags(this string html) { var doc = new HtmlDocument(); doc.LoadHtml(html); var nodes = doc.DocumentNode.Descendants("img"); return nodes; } /// /// 匹配html的所有img标签的src集合 /// /// /// public static IEnumerable MatchImgSrcs(this string html) { return MatchImgTags(html).Where(n => n.Attributes.Contains("src")).Select(n => n.Attributes["src"].Value); } /// /// 获取html中第一个img标签的src /// /// /// public static string MatchFirstImgSrc(this string html) { return MatchImgSrcs(html).FirstOrDefault(); } /// /// 随机获取html代码中的img标签的src属性 /// /// /// public static string MatchRandomImgSrc(this string html) { int count = MatchImgSrcs(html).Count(); var rnd = new Random(); return MatchImgSrcs(html).ElementAtOrDefault(rnd.Next(count)); } /// /// 替换回车换行符为html换行符 /// /// html public static string StrFormat(this string str) { str = str.Replace("\r\n", "
"); str = str.Replace("\n", "
"); var str2 = str; return str2; } /// /// 替换html字符 /// /// html public static string EncodeHtml(this string strHtml) { if (strHtml != "") { strHtml = strHtml.Replace(",", "&def"); strHtml = strHtml.Replace("'", "&dot"); strHtml = strHtml.Replace(";", "&dec"); return strHtml; } return ""; } } }