using AngleSharp; using AngleSharp.Css; using AngleSharp.Css.Dom; using AngleSharp.Css.Parser; using AngleSharp.Dom; using AngleSharp.Html.Dom; using AngleSharp.Html.Parser; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; namespace Ganss.Xss; /// /// Cleans HTML documents and fragments from constructs that can lead to XSS attacks. /// /// /// XSS attacks can occur at several levels within an HTML document or fragment: /// /// HTML tags (e.g. the <script> tag) /// HTML attributes (e.g. the "onload" attribute) /// CSS styles (url property values) /// malformed HTML or HTML that exploits parser bugs in specific browsers /// /// /// The HtmlSanitizer class addresses all of these possible attack vectors by using a sophisticated HTML parser (AngleSharp). /// /// /// In order to facilitate different use cases, HtmlSanitizer can be customized at the levels mentioned above: /// /// You can specify the allowed HTML tags through the property . All other tags will be stripped. /// You can specify the allowed HTML attributes through the property . All other attributes will be stripped. /// You can specify the allowed CSS property names through the property . All other styles will be stripped. /// You can specify the allowed URI schemes through the property . All other URIs will be stripped. /// You can specify the HTML attributes that contain URIs (such as "src", "href" etc.) through the property . /// /// /// /// /// /// alert('xss')
Test
"; /// var sanitized = sanitizer.Sanitize(html, "http://www.example.com"); /// // -> "
Test
" /// ]]> ///
///
public class HtmlSanitizer : IHtmlSanitizer { private const string StyleAttributeName = "style"; // from http://genshi.edgewall.org/ private static readonly Regex CssUnicodeEscapes = new(@"\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'""{};:()#*])", RegexOptions.Compiled); private static readonly Regex CssComments = new(@"/\*.*?\*/", RegexOptions.Compiled); // IE6 private static readonly Regex CssExpression = new(@"[eE\uFF25\uFF45][xX\uFF38\uFF58][pP\uFF30\uFF50][rR\u0280\uFF32\uFF52][eE\uFF25\uFF45][sS\uFF33\uFF53]{2}[iI\u026A\uFF29\uFF49][oO\uFF2F\uFF4F][nN\u0274\uFF2E\uFF4E]", RegexOptions.Compiled); private static readonly Regex CssUrl = new(@"[Uu][Rr\u0280][Ll\u029F]\((['""]?)([^'"")]+)(['""]?)", RegexOptions.Compiled); private static readonly Regex WhitespaceRegex = new(@"\s*", RegexOptions.Compiled); private static readonly IConfiguration defaultConfiguration = Configuration.Default.WithCss(new CssParserOptions { IsIncludingUnknownDeclarations = true, IsIncludingUnknownRules = true, IsToleratingInvalidSelectors = true, }); private static readonly HtmlParser defaultHtmlParser = new(new HtmlParserOptions { IsScripting = true }, BrowsingContext.New(defaultConfiguration)); /// /// Initializes a new instance of the class /// with the default options. /// public HtmlSanitizer() { AllowedTags = new HashSet(HtmlSanitizerDefaults.AllowedTags, StringComparer.OrdinalIgnoreCase); AllowedSchemes = new HashSet(HtmlSanitizerDefaults.AllowedSchemes, StringComparer.OrdinalIgnoreCase); AllowedAttributes = new HashSet(HtmlSanitizerDefaults.AllowedAttributes, StringComparer.OrdinalIgnoreCase); UriAttributes = new HashSet(HtmlSanitizerDefaults.UriAttributes, StringComparer.OrdinalIgnoreCase); AllowedCssProperties = new HashSet(HtmlSanitizerDefaults.AllowedCssProperties, StringComparer.OrdinalIgnoreCase); AllowedAtRules = new HashSet(HtmlSanitizerDefaults.AllowedAtRules); AllowedClasses = new HashSet(HtmlSanitizerDefaults.AllowedClasses); } /// /// Initializes a new instance of the class /// with the given options. /// /// Options to control the sanitizing. public HtmlSanitizer(HtmlSanitizerOptions options) { AllowedTags = new HashSet(options.AllowedTags, StringComparer.OrdinalIgnoreCase); AllowedSchemes = new HashSet(options.AllowedSchemes, StringComparer.OrdinalIgnoreCase); AllowedAttributes = new HashSet(options.AllowedAttributes, StringComparer.OrdinalIgnoreCase); UriAttributes = new HashSet(options.UriAttributes, StringComparer.OrdinalIgnoreCase); AllowedClasses = new HashSet(options.AllowedCssClasses, StringComparer.OrdinalIgnoreCase); AllowedCssProperties = new HashSet(options.AllowedCssProperties, StringComparer.OrdinalIgnoreCase); AllowedAtRules = new HashSet(options.AllowedAtRules); AllowCssCustomProperties = options.AllowCssCustomProperties; AllowDataAttributes = options.AllowDataAttributes; } /// /// Gets or sets the default method that encodes comments. /// public Action EncodeComment { get; set; } = DefaultEncodeComment; /// /// Gets or sets the default method that encodes literal text content. /// public Action EncodeLiteralTextElementContent { get; set; } = DefaultEncodeLiteralTextElementContent; /// /// Gets or sets the default value indicating whether to keep child nodes of elements that are removed. Default is false. /// public static bool DefaultKeepChildNodes { get; set; } = false; /// /// Gets or sets a value indicating whether to keep child nodes of elements that are removed. Default is . /// public bool KeepChildNodes { get; set; } = DefaultKeepChildNodes; /// /// Gets or sets the default object that creates the parser used for parsing the input. /// public static Func DefaultHtmlParserFactory { get; set; } = () => defaultHtmlParser; /// /// Gets or sets the object the creates the parser used for parsing the input. /// public Func HtmlParserFactory { get; set; } = DefaultHtmlParserFactory; /// /// Gets or sets the default object used for generating output. Default is . /// public static IMarkupFormatter DefaultOutputFormatter { get; set; } = HtmlFormatter.Instance; /// /// Gets or sets the object used for generating output. Default is . /// public IMarkupFormatter OutputFormatter { get; set; } = DefaultOutputFormatter; /// /// Gets or sets the default object used for generating CSS output. Default is . /// public static IStyleFormatter DefaultStyleFormatter { get; set; } = CssStyleFormatter.Instance; /// /// Gets or sets the object used for generating CSS output. Default is . /// public IStyleFormatter StyleFormatter { get; set; } = DefaultStyleFormatter; /// /// Gets or sets the allowed CSS at-rules such as "@media" and "@font-face". /// /// /// The allowed CSS at-rules. /// public ISet AllowedAtRules { get; private set; } /// /// Gets or sets the allowed URI schemes such as "http" and "https". /// /// /// The allowed URI schemes. /// public ISet AllowedSchemes { get; private set; } /// /// Gets or sets the allowed HTML tag names such as "a" and "div". /// /// /// The allowed tag names. /// public ISet AllowedTags { get; private set; } /// /// Gets or sets the allowed HTML attributes such as "href" and "alt". /// /// /// The allowed HTML attributes. /// public ISet AllowedAttributes { get; private set; } /// /// Allow all HTML5 data attributes; the attributes prefixed with data-. /// public bool AllowDataAttributes { get; set; } /// /// Gets or sets the HTML attributes that can contain a URI such as "href". /// /// /// The URI attributes. /// public ISet UriAttributes { get; private set; } /// /// Gets or sets the allowed CSS properties such as "font" and "margin". /// /// /// The allowed CSS properties. /// public ISet AllowedCssProperties { get; private set; } /// /// Allow all custom CSS properties (variables) prefixed with --. /// public bool AllowCssCustomProperties { get; set; } /// /// Gets or sets a regex that must not match for legal CSS property values. /// /// /// The regex. /// public Regex DisallowCssPropertyValue { get; set; } = DefaultDisallowedCssPropertyValue; /// /// Gets or sets the allowed CSS classes. If the set is empty, all classes will be allowed. /// /// /// The allowed CSS classes. An empty set means all classes are allowed. /// public ISet AllowedClasses { get; private set; } /// /// Occurs after sanitizing the document and post processing nodes. /// public event EventHandler? PostProcessDom; /// /// Occurs for every node after sanitizing. /// public event EventHandler? PostProcessNode; /// /// Occurs before a tag is removed. /// public event EventHandler? RemovingTag; /// /// Occurs before an attribute is removed. /// public event EventHandler? RemovingAttribute; /// /// Occurs before a style is removed. /// public event EventHandler? RemovingStyle; /// /// Occurs before an at-rule is removed. /// public event EventHandler? RemovingAtRule; /// /// Occurs before a comment is removed. /// public event EventHandler? RemovingComment; /// /// Occurs before a CSS class is removed. /// public event EventHandler? RemovingCssClass; /// /// Occurs when a URL is being sanitized. /// public event EventHandler? FilterUrl; /// /// Raises the event. /// /// The instance containing the event data. protected virtual void OnPostProcessDom(PostProcessDomEventArgs e) { PostProcessDom?.Invoke(this, e); } /// /// Raises the event. /// /// The instance containing the event data. protected virtual void OnPostProcessNode(PostProcessNodeEventArgs e) { PostProcessNode?.Invoke(this, e); } /// /// Raises the event. /// /// The instance containing the event data. protected virtual void OnRemovingTag(RemovingTagEventArgs e) { RemovingTag?.Invoke(this, e); } /// /// Raises the event. /// /// The instance containing the event data. protected virtual void OnRemovingAttribute(RemovingAttributeEventArgs e) { RemovingAttribute?.Invoke(this, e); } /// /// Raises the event. /// /// The instance containing the event data. protected virtual void OnRemovingStyle(RemovingStyleEventArgs e) { RemovingStyle?.Invoke(this, e); } /// /// Raises the event. /// /// The instance containing the event data. protected virtual void OnRemovingAtRule(RemovingAtRuleEventArgs e) { RemovingAtRule?.Invoke(this, e); } /// /// Raises the event. /// /// The instance containing the event data. protected virtual void OnRemovingComment(RemovingCommentEventArgs e) { RemovingComment?.Invoke(this, e); } /// /// The default regex for disallowed CSS property values. /// public static readonly Regex DefaultDisallowedCssPropertyValue = new(@"[<>]", RegexOptions.Compiled); /// /// Raises the event. /// /// The instance containing the event data. protected virtual void OnRemovingCssClass(RemovingCssClassEventArgs e) { RemovingCssClass?.Invoke(this, e); } /// /// Raises the event. /// /// The instance containing the event data. protected virtual void OnFilteringUrl(FilterUrlEventArgs e) { FilterUrl?.Invoke(this, e); } /// /// Return all nested subnodes of a node. The nodes are returned in DOM order. /// /// The root node. /// All nested subnodes. private static IEnumerable GetAllNodes(INode dom) { if (dom.ChildNodes.Length == 0) yield break; var s = new Stack(); for (var i = dom.ChildNodes.Length - 1; i >= 0; i--) { s.Push(dom.ChildNodes[i]); } while (s.Count > 0) { var n = s.Pop(); yield return n; for (var i = n.ChildNodes.Length - 1; i >= 0; i--) { s.Push(n.ChildNodes[i]); } } } /// /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. /// /// The HTML body fragment to sanitize. /// The base URL relative URLs are resolved against. No resolution if empty. /// The formatter used to render the DOM. Using the if null. /// The sanitized HTML body fragment. public string Sanitize(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null) { using var dom = SanitizeDom(html, baseUrl); if (dom.Body == null) return string.Empty; var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter); return output; } /// /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. /// /// The HTML body fragment to sanitize. /// The base URL relative URLs are resolved against. No resolution if empty. /// The sanitized HTML document. public IHtmlDocument SanitizeDom(string html, string baseUrl = "") { var parser = HtmlParserFactory(); var dom = parser.ParseDocument("" + html); if (dom.Body != null) DoSanitize(dom, dom.Body, baseUrl); return dom; } /// /// Sanitizes the specified parsed HTML body fragment. /// If the document has not been parsed with CSS support then all styles will be removed. /// /// The parsed HTML document. /// The node within which to sanitize. /// The base URL relative URLs are resolved against. No resolution if empty. /// The sanitized HTML document. public IHtmlDocument SanitizeDom(IHtmlDocument document, IHtmlElement? context = null, string baseUrl = "") { DoSanitize(document, context ?? (IParentNode)document, baseUrl); return document; } /// /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. /// /// The HTML document to sanitize. /// The base URL relative URLs are resolved against. No resolution if empty. /// The formatter used to render the DOM. Using the if null. /// The sanitized HTML document. public string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null) { var parser = HtmlParserFactory(); using var dom = parser.ParseDocument(html); DoSanitize(dom, dom, baseUrl); var output = dom.ToHtml(outputFormatter ?? OutputFormatter); return output; } /// /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. /// /// The HTML document to sanitize. /// The base URL relative URLs are resolved against. No resolution if empty. /// The formatter used to render the DOM. Using the if null. /// The sanitized HTML document. public string SanitizeDocument(Stream html, string baseUrl = "", IMarkupFormatter? outputFormatter = null) { var parser = HtmlParserFactory(); using var dom = parser.ParseDocument(html); DoSanitize(dom, dom, baseUrl); var output = dom.ToHtml(outputFormatter ?? OutputFormatter); return output; } /// /// Removes all comment nodes from a list of nodes. /// /// The node within which to remove comments. /// true if any comments were removed; otherwise, false. private void RemoveComments(INode context) { foreach (var comment in GetAllNodes(context).OfType().ToList()) { EncodeComment(comment); var e = new RemovingCommentEventArgs(comment); OnRemovingComment(e); if (!e.Cancel) comment.Remove(); } } private static void DefaultEncodeComment(IComment comment) { var escapedText = comment.TextContent.Replace("<", "<").Replace(">", ">"); if (escapedText != comment.TextContent) comment.TextContent = escapedText; } private static void DefaultEncodeLiteralTextElementContent(IElement tag) { var escapedHtml = tag.InnerHtml.Replace("<", "<").Replace(">", ">"); if (escapedHtml != tag.InnerHtml) tag.InnerHtml = escapedHtml; if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript tag.SetInnerText(escapedHtml); } private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl = "") { // remove disallowed tags foreach (var tag in context.QuerySelectorAll("*").Where(t => !IsAllowedTag(t)).ToList()) { RemoveTag(tag, RemoveReason.NotAllowedTag); } // always encode text in raw data content foreach (var tag in context.QuerySelectorAll("*") .Where(t => t is not IHtmlStyleElement && t.Flags.HasFlag(NodeFlags.LiteralText) && !string.IsNullOrWhiteSpace(t.InnerHtml))) { EncodeLiteralTextElementContent(tag); } SanitizeStyleSheets(dom, baseUrl); // cleanup attributes foreach (var tag in context.QuerySelectorAll("*").ToList()) { // remove disallowed attributes foreach (var attribute in tag.Attributes.Where(a => !IsAllowedAttribute(a)).ToList()) { RemoveAttribute(tag, attribute, RemoveReason.NotAllowedAttribute); } // sanitize URLs in URL-marked attributes foreach (var attribute in tag.Attributes.Where(IsUriAttribute).ToList()) { var url = SanitizeUrl(tag, attribute.Value, baseUrl); if (url == null) RemoveAttribute(tag, attribute, RemoveReason.NotAllowedUrlValue); else tag.SetAttribute(attribute.Name, url); } // sanitize the style attribute var oldStyleEmpty = string.IsNullOrEmpty(tag.GetAttribute(StyleAttributeName)); SanitizeStyle(tag, baseUrl); // sanitize the value of the attributes foreach (var attribute in tag.Attributes.ToList()) { // The '& Javascript include' is a possible method to execute Javascript and can lead to XSS. // (see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#.26_JavaScript_includes) if (attribute.Value.Contains("&{")) { RemoveAttribute(tag, attribute, RemoveReason.NotAllowedValue); } else { if (AllowedClasses.Any() && attribute.Name == "class") { var removedClasses = tag.ClassList.Except(AllowedClasses).ToArray(); foreach (var removedClass in removedClasses) RemoveCssClass(tag, removedClass, RemoveReason.NotAllowedCssClass); if (tag.ClassList.Length == 0) RemoveAttribute(tag, attribute, RemoveReason.ClassAttributeEmpty); } else if (!oldStyleEmpty && attribute.Name == StyleAttributeName && string.IsNullOrEmpty(attribute.Value)) { RemoveAttribute(tag, attribute, RemoveReason.StyleAttributeEmpty); } } } } if (context is INode node) { RemoveComments(node); } DoPostProcess(dom, context as INode); } private void SanitizeStyleSheets(IHtmlDocument dom, string baseUrl) { foreach (var styleSheet in dom.StyleSheets.OfType()) { var styleTag = styleSheet.OwnerNode; var i = 0; while (i < styleSheet.Rules.Length) { var rule = styleSheet.Rules[i]; if (!SanitizeStyleRule(rule, styleTag, baseUrl) && RemoveAtRule(styleTag, rule)) styleSheet.RemoveAt(i); else i++; } styleTag.InnerHtml = styleSheet.ToCss(StyleFormatter).Replace("<", "\\3c "); } } private bool SanitizeStyleRule(ICssRule rule, IElement styleTag, string baseUrl) { if (!AllowedAtRules.Contains(rule.Type)) return false; if (rule is ICssStyleRule styleRule) { SanitizeStyleDeclaration(styleTag, styleRule.Style, baseUrl); } else { if (rule is ICssGroupingRule groupingRule) { var i = 0; while (i < groupingRule.Rules.Length) { var childRule = groupingRule.Rules[i]; if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule)) groupingRule.RemoveAt(i); else i++; } } else if (rule is ICssPageRule pageRule) { SanitizeStyleDeclaration(styleTag, pageRule.Style, baseUrl); } else if (rule is ICssKeyframesRule keyFramesRule) { foreach (var childRule in keyFramesRule.Rules.OfType().ToList()) { if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule)) keyFramesRule.Remove(childRule.KeyText); } } else if (rule is ICssKeyframeRule keyFrameRule) { SanitizeStyleDeclaration(styleTag, keyFrameRule.Style, baseUrl); } } return true; } /// /// Performs post processing on all nodes in the document. /// /// The HTML document. /// The node within which to post process all nodes. private void DoPostProcess(IHtmlDocument dom, INode? context) { if (PostProcessNode != null) { dom.Normalize(); if (context != null) { var nodes = GetAllNodes(context).ToList(); foreach (var node in nodes) { var e = new PostProcessNodeEventArgs(dom, node); OnPostProcessNode(e); if (e.ReplacementNodes.Count != 0) { ((IChildNode)node).Replace([.. e.ReplacementNodes]); } } } } if (PostProcessDom != null) { var e = new PostProcessDomEventArgs(dom); OnPostProcessDom(e); } } /// /// Determines whether the specified attribute can contain a URI. /// /// The attribute. /// true if the attribute can contain a URI; otherwise, false. private bool IsUriAttribute(IAttr attribute) { return UriAttributes.Contains(attribute.Name); } /// /// Determines whether the specified tag is allowed. /// /// The tag. /// true if the tag is allowed; otherwise, false. private bool IsAllowedTag(IElement tag) { return AllowedTags.Contains(tag.NodeName); } /// /// Determines whether the specified attribute is allowed. /// /// The attribute. /// true if the attribute is allowed; otherwise, false. private bool IsAllowedAttribute(IAttr attribute) { return AllowedAttributes.Contains(attribute.Name) // test html5 data- attributes || (AllowDataAttributes && attribute.Name != null && attribute.Name.StartsWith("data-", StringComparison.OrdinalIgnoreCase)); } /// /// Sanitizes the style. /// /// The element. /// The base URL. protected void SanitizeStyle(IElement element, string baseUrl) { // filter out invalid CSS declarations // see https://github.com/AngleSharp/AngleSharp/issues/101 var attribute = element.GetAttribute(StyleAttributeName); if (attribute == null) return; if (element.GetStyle() == null) { element.RemoveAttribute(StyleAttributeName); return; } element.SetAttribute(StyleAttributeName, element.GetStyle().ToCss(StyleFormatter)); var styles = element.GetStyle(); if (styles == null || styles.Length == 0) return; SanitizeStyleDeclaration(element, styles, baseUrl); } /// /// Verify if the given CSS property name is allowed. By default this will /// check if the property is in the set, /// or if the property is a custom property and is true. /// /// The name of the CSS property. /// True if the property is allowed or not. protected virtual bool IsAllowedCssProperty(string propertyName) { return AllowedCssProperties.Contains(propertyName) || AllowCssCustomProperties && propertyName != null && propertyName.StartsWith("--"); } private void SanitizeStyleDeclaration(IElement element, ICssStyleDeclaration styles, string baseUrl) { var removeStyles = new List>(); var setStyles = new Dictionary(); foreach (var style in styles) { var key = DecodeCss(style.Name); var val = DecodeCss(style.Value); if (!IsAllowedCssProperty(key)) { removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedStyle)); continue; } if (CssExpression.IsMatch(val) || DisallowCssPropertyValue.IsMatch(val)) { removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedValue)); continue; } val = WhitespaceRegex.Replace(val, string.Empty); var urls = CssUrl.Matches(val).Cast().Select(m => (Match: m, Url: SanitizeUrl(element, m.Groups[2].Value, baseUrl))); if (urls.Any()) { if (urls.Any(u => u.Url == null)) removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedUrlValue)); else { var sb = new StringBuilder(); var ix = 0; foreach (var url in urls) { sb.Append(val, ix, url.Match.Index - ix); sb.Append("url("); sb.Append(url.Match.Groups[1].Value); sb.Append(url.Url); sb.Append(url.Match.Groups[3].Value); ix = url.Match.Index + url.Match.Length; } sb.Append(val, ix, val.Length - ix); var s = sb.ToString(); if (s != val) { if (key != style.Name) { removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedUrlValue)); } setStyles[key] = s; } } } } foreach (var style in setStyles) { styles.SetProperty(style.Key, style.Value); } foreach (var style in removeStyles) { RemoveStyle(element, styles, style.Item1, style.Item2); } } /// /// Decodes CSS Unicode escapes and removes comments. /// /// The CSS string. /// The decoded CSS string. protected static string DecodeCss(string css) { var r = CssUnicodeEscapes.Replace(css, m => { if (m.Groups[1].Success) return ((char)int.Parse(m.Groups[1].Value, NumberStyles.HexNumber)).ToString(); var t = m.Groups[2].Value; return t == "\\" ? @"\\" : t; }); r = CssComments.Replace(r, m => ""); return r; } private static readonly Regex SchemeRegex = new(@"^([^\/#]*?)(?:\:|�*58|�*3a)", RegexOptions.Compiled | RegexOptions.IgnoreCase); /// /// Tries to create a safe object from a string. /// /// The URL. /// The object or null if no safe can be created. protected Iri? GetSafeIri(string url) { url = url.TrimStart(); var schemeMatch = SchemeRegex.Match(url); if (schemeMatch.Success) { var scheme = schemeMatch.Groups[1].Value; return AllowedSchemes.Contains(scheme, StringComparer.OrdinalIgnoreCase) ? new Iri(url, scheme) : null; } return new Iri(url); } /// /// Sanitizes a URL. /// /// The tag containing the URL being sanitized. /// The URL. /// The base URL relative URLs are resolved against (empty or null for no resolution). /// The sanitized URL or null if no safe URL can be created. protected virtual string? SanitizeUrl(IElement element, string url, string baseUrl) { var iri = GetSafeIri(url); if (iri != null && !iri.IsAbsolute && !string.IsNullOrEmpty(baseUrl)) { // resolve relative URI if (Uri.TryCreate(baseUrl, UriKind.Absolute, out Uri? baseUri)) { try { var sanitizedUrl = new Uri(baseUri, iri.Value).AbsoluteUri; var ev = new FilterUrlEventArgs(element, url, sanitizedUrl); OnFilteringUrl(ev); return ev.SanitizedUrl; } catch (UriFormatException) { iri = null; } } else iri = null; } var e = new FilterUrlEventArgs(element, url, iri?.Value); OnFilteringUrl(e); return e.SanitizedUrl; } /// /// Removes a tag from the document. /// /// Tag to be removed. /// Reason for removal. private void RemoveTag(IElement tag, RemoveReason reason) { var e = new RemovingTagEventArgs(tag, reason); OnRemovingTag(e); if (!e.Cancel) { if (KeepChildNodes && tag.HasChildNodes) tag.Replace([.. tag.ChildNodes]); else tag.Remove(); } } /// /// Removes an attribute from the document. /// /// Tag the attribute belongs to. /// Attribute to be removed. /// Reason for removal. private void RemoveAttribute(IElement tag, IAttr attribute, RemoveReason reason) { var e = new RemovingAttributeEventArgs(tag, attribute, reason); OnRemovingAttribute(e); if (!e.Cancel) tag.RemoveAttribute(attribute.Name); } /// /// Removes a style from the document. /// /// Tag the style belongs to. /// Style rule that contains the style to be removed. /// Style to be removed. /// Reason for removal. private void RemoveStyle(IElement tag, ICssStyleDeclaration styles, ICssProperty style, RemoveReason reason) { var e = new RemovingStyleEventArgs(tag, style, reason); OnRemovingStyle(e); if (!e.Cancel) styles.RemoveProperty(style.Name); } /// /// Removes an at-rule from the document. /// /// Tag the style belongs to. /// Rule to be removed. /// true, if the rule can be removed; false, otherwise. private bool RemoveAtRule(IElement tag, ICssRule rule) { var e = new RemovingAtRuleEventArgs(tag, rule); OnRemovingAtRule(e); return !e.Cancel; } /// /// Removes a CSS class from a class attribute. /// /// Tag the style belongs to. /// Class to be removed. /// Reason for removal. private void RemoveCssClass(IElement tag, string cssClass, RemoveReason reason) { var e = new RemovingCssClassEventArgs(tag, cssClass, reason); OnRemovingCssClass(e); if (!e.Cancel) tag.ClassList.Remove(cssClass); } }