using AngleSharp;
using AngleSharp.Dom;
using Ganss.XSS;
using Masuit.Tools.RandomSelector;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace Masuit.Tools.Html
{
///
/// html工具类
///
public static partial class HtmlTools
{
private static readonly HtmlSanitizer Sanitizer = new HtmlSanitizer();
static HtmlTools()
{
Sanitizer.AllowedAttributes.Remove("id");
Sanitizer.AllowedAttributes.Remove("alt");
Sanitizer.AllowedCssProperties.Remove("font-family");
Sanitizer.AllowedCssProperties.Remove("background-color");
Sanitizer.KeepChildNodes = true;
Sanitizer.AllowedTags.Remove("input");
Sanitizer.AllowedTags.Remove("button");
Sanitizer.AllowedTags.Remove("iframe");
Sanitizer.AllowedTags.Remove("frame");
Sanitizer.AllowedTags.Remove("textarea");
Sanitizer.AllowedTags.Remove("select");
Sanitizer.AllowedTags.Remove("form");
Sanitizer.AllowedAttributes.Add("src");
Sanitizer.AllowedAttributes.Add("class");
Sanitizer.AllowedAttributes.Add("style");
}
///
/// 标准的防止html的xss净化器
///
///
///
public static string HtmlSantinizerStandard(this string html)
{
return Sanitizer.Sanitize(html);
}
///
/// 自定义的防止html的xss净化器
///
/// 源html
/// 需要移除的标签集合
/// 需要移除的属性集合
/// 需要移除的样式集合
///
public static string HtmlSantinizerCustom(this string html, string[] labels = null, string[] attributes = null, string[] styles = null)
{
if (labels != null)
{
foreach (string label in labels)
{
Sanitizer.AllowedTags.Remove(label);
}
}
if (attributes != null)
{
foreach (string attr in attributes)
{
Sanitizer.AllowedAttributes.Remove(attr);
}
}
if (styles != null)
{
foreach (string p in styles)
{
Sanitizer.AllowedCssProperties.Remove(p);
}
}
Sanitizer.KeepChildNodes = true;
return Sanitizer.Sanitize(html);
}
///
/// 去除html标签后并截取字符串
///
/// 源html
/// 截取长度
///
public static string RemoveHtmlTag(this string html, int length = 0)
{
var context = BrowsingContext.New(Configuration.Default);
var doc = context.OpenAsync(req => req.Content(html)).Result;
var strText = doc.Body.TextContent;
if (length > 0 && strText.Length > length)
{
return strText.Substring(0, length);
}
return strText;
}
///
/// 替换html的img路径为绝对路径
///
///
///
///
public static string ReplaceHtmlImgSource(this string html, string imgDest) => html.Replace("
/// 将src的绝对路径换成相对路径
///
///
///
public static string ConvertImgSrcToRelativePath(this string s)
{
return Regex.Replace(s, @"
/// 匹配html的所有img标签集合
///
///
///
public static IHtmlCollection MatchImgTags(this string html)
{
var context = BrowsingContext.New(Configuration.Default);
var doc = context.OpenAsync(req => req.Content(html)).Result;
return doc.Body.GetElementsByTagName("img");
}
///
/// 匹配html的所有img标签的src集合
///
///
///
public static IEnumerable MatchImgSrcs(this string html)
{
return MatchImgTags(html).Where(n => n.HasAttribute("src")).Select(n => n.GetAttribute("src"));
}
///
/// 获取html中第一个img标签的src
///
///
///
public static string MatchFirstImgSrc(this string html)
{
return MatchImgSrcs(html).FirstOrDefault();
}
///
/// 随机获取html代码中的img标签的src属性
///
///
///
public static string MatchRandomImgSrc(this string html)
{
var srcs = MatchImgSrcs(html).ToList();
var rnd = new Random();
return srcs.Count > 0 ? srcs[rnd.Next(srcs.Count)] : default;
}
///
/// 按顺序优先获取html代码中的img标签的src属性
///
///
///
public static string MatchSeqRandomImgSrc(this string html)
{
var srcs = MatchImgSrcs(html).ToList();
return srcs.Count > 0 ? srcs.Select((s, i) => new WeightedItem(s, srcs.Count - i)).WeightedItem() : default;
}
///
/// 替换回车换行符为html换行符
///
/// html
public static string StrFormat(this string str)
{
return str.Replace("\r\n", "
").Replace("\n", "
");
}
///
/// 替换html字符
///
/// html
public static string EncodeHtml(this string strHtml)
{
if (strHtml != "")
{
return strHtml.Replace(",", "&def").Replace("'", "&dot").Replace(";", "&dec");
}
return "";
}
}
}