using Ganss.XSS;
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace Masuit.Tools.Html
{
///
/// html工具类
///
public static partial class HtmlTools
{
private static readonly HtmlSanitizer Sanitizer = new HtmlSanitizer();
static HtmlTools()
{
Sanitizer.AllowedAttributes.Remove("id");
Sanitizer.AllowedAttributes.Remove("alt");
Sanitizer.AllowedCssProperties.Remove("font-family");
Sanitizer.AllowedCssProperties.Remove("background-color");
Sanitizer.KeepChildNodes = true;
Sanitizer.AllowedTags.Remove("input");
Sanitizer.AllowedTags.Remove("button");
Sanitizer.AllowedTags.Remove("iframe");
Sanitizer.AllowedTags.Remove("frame");
Sanitizer.AllowedTags.Remove("textarea");
Sanitizer.AllowedTags.Remove("select");
Sanitizer.AllowedTags.Remove("form");
Sanitizer.AllowedAttributes.Add("src");
Sanitizer.AllowedAttributes.Add("class");
Sanitizer.AllowedAttributes.Add("style");
}
///
/// 标准的防止html的xss净化器
///
///
///
public static string HtmlSantinizerStandard(this string html)
{
return Sanitizer.Sanitize(html);
}
///
/// 自定义的防止html的xss净化器
///
/// 源html
/// 需要移除的标签集合
/// 需要移除的属性集合
/// 需要移除的样式集合
///
public static string HtmlSantinizerCustom(this string html, string[] labels = null, string[] attributes = null, string[] styles = null)
{
if (labels != null)
{
foreach (string label in labels)
{
Sanitizer.AllowedTags.Remove(label);
}
}
if (attributes != null)
{
foreach (string attr in attributes)
{
Sanitizer.AllowedAttributes.Remove(attr);
}
}
if (styles != null)
{
foreach (string p in styles)
{
Sanitizer.AllowedCssProperties.Remove(p);
}
}
Sanitizer.KeepChildNodes = true;
return Sanitizer.Sanitize(html);
}
///
/// 去除html标签后并截取字符串
///
/// 源html
/// 截取长度
///
public static string RemoveHtmlTag(this string html, int length = 0)
{
var doc = new HtmlDocument();
doc.LoadHtml(html);
var strText = doc.DocumentNode.InnerText;
if (length > 0 && strText.Length > length)
{
return strText.Substring(0, length);
}
return strText;
}
///
/// 清理Word文档转html后的冗余标签属性
///
///
///
public static string ClearHtml(this string html)
{
string s = Regex.Match(Regex.Replace(html, @"background-color:#?\w{3,7}|font-family:'?[\w|\(|\)]*'?;?", string.Empty), @"
]*>([\s\S]*)<\/body>").Groups[1].Value.Replace(" ", string.Empty);
s = Regex.Replace(s, @"\w+-?\w+:0\w+;?", string.Empty); //去除多余的零值属性
s = Regex.Replace(s, "alt=\"(.+?)\"", string.Empty); //除去alt属性
s = Regex.Replace(s, @"-aw.+?\s", string.Empty); //去除Word产生的-aw属性
return s;
}
///
/// 替换html的img路径为绝对路径
///
///
///
///
public static string ReplaceHtmlImgSource(this string html, string imgDest) => html.Replace("
/// 将src的绝对路径换成相对路径
///
///
///
public static string ConvertImgSrcToRelativePath(this string s)
{
return Regex.Replace(s, @"
/// 匹配html的所有img标签集合
///
///
///
public static IEnumerable MatchImgTags(this string html)
{
var doc = new HtmlDocument();
doc.LoadHtml(html);
var nodes = doc.DocumentNode.Descendants("img");
return nodes;
}
///
/// 匹配html的所有img标签的src集合
///
///
///
public static IEnumerable MatchImgSrcs(this string html)
{
return MatchImgTags(html).Where(n => n.Attributes.Contains("src")).Select(n => n.Attributes["src"].Value);
}
///
/// 获取html中第一个img标签的src
///
///
///
public static string MatchFirstImgSrc(this string html)
{
return MatchImgSrcs(html).FirstOrDefault();
}
///
/// 随机获取html代码中的img标签的src属性
///
///
///
public static string MatchRandomImgSrc(this string html)
{
int count = MatchImgSrcs(html).Count();
var rnd = new Random();
return MatchImgSrcs(html).ElementAtOrDefault(rnd.Next(count));
}
///
/// 替换回车换行符为html换行符
///
/// html
public static string StrFormat(this string str)
{
str = str.Replace("\r\n", "
");
str = str.Replace("\n", "
");
var str2 = str;
return str2;
}
///
/// 替换html字符
///
/// html
public static string EncodeHtml(this string strHtml)
{
if (strHtml != "")
{
strHtml = strHtml.Replace(",", "&def");
strHtml = strHtml.Replace("'", "&dot");
strHtml = strHtml.Replace(";", "&dec");
return strHtml;
}
return "";
}
}
}