using AngleSharp; using AngleSharp.Css.Dom; using AngleSharp.Html.Dom; using AngleSharp.Html.Parser; using System; using System.Collections.Generic; using System.Text.RegularExpressions; namespace Ganss.Xss; /// /// Enables an inheriting class to implement an HtmlSanitizer class, which cleans HTML documents and fragments /// from constructs that can lead to XSS attacks. /// public interface IHtmlSanitizer { /// /// Gets or sets a value indicating whether to keep child nodes of elements that are removed. /// bool KeepChildNodes { get; set; } /// /// Gets or sets the object the creates the parser used for parsing the input. /// Func HtmlParserFactory { get; set; } /// /// Gets or sets the object used for generating output. /// IMarkupFormatter OutputFormatter { get; set; } /// /// Gets the allowed CSS at-rules such as "@media" and "@font-face". /// /// /// The allowed CSS at-rules. /// ISet AllowedAtRules { get; } /// /// Gets the allowed URI schemes such as "http" and "https". /// /// /// The allowed URI schemes. /// ISet AllowedSchemes { get; } /// /// Gets the allowed HTML tag names such as "a" and "div". /// /// /// The allowed tag names. /// ISet AllowedTags { get; } /// /// Gets the allowed HTML attributes such as "href" and "alt". /// /// /// The allowed HTML attributes. /// ISet AllowedAttributes { get; } /// /// Allow all HTML5 data attributes; the attributes prefixed with data- /// bool AllowDataAttributes { get; set; } /// /// Gets or sets the HTML attributes that can contain a URI such as "href". /// /// /// The URI attributes. /// ISet UriAttributes { get; } /// /// Gets or sets the allowed CSS properties such as "font" and "margin". /// /// /// The allowed CSS properties. /// ISet AllowedCssProperties { get; } /// /// Gets or sets a regex that must not match for legal CSS property values. /// /// /// The regex. /// Regex DisallowCssPropertyValue { get; set; } /// /// Gets or sets the allowed CSS classes. If the set is empty, all classes will be allowed. /// /// /// The allowed CSS classes. An empty set means all classes are allowed. /// ISet AllowedClasses { get; } /// /// Occurs after sanitizing the document and post processing nodes. /// event EventHandler PostProcessDom; /// /// Occurs for every node after sanitizing. /// event EventHandler PostProcessNode; /// /// Occurs before a tag is removed. /// event EventHandler RemovingTag; /// /// Occurs before an attribute is removed. /// event EventHandler RemovingAttribute; /// /// Occurs before a style is removed. /// event EventHandler RemovingStyle; /// /// Occurs before an at-rule is removed. /// event EventHandler RemovingAtRule; /// /// Occurs before a comment is removed. /// event EventHandler RemovingComment; /// /// Occurs before a CSS class is removed. /// event EventHandler RemovingCssClass; /// /// Occurs when a URL is being sanitized. /// event EventHandler? FilterUrl; /// /// Sanitizes the specified HTML. /// /// The HTML to sanitize. /// The base URL relative URLs are resolved against. No resolution if empty. /// The formatter used to render the DOM. Using the default formatter if null. /// The sanitized HTML. string Sanitize(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null); /// /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. /// /// The HTML body fragment to sanitize. /// The base URL relative URLs are resolved against. No resolution if empty. /// The sanitized HTML document. IHtmlDocument SanitizeDom(string html, string baseUrl = ""); /// /// Sanitizes the specified parsed HTML body fragment. /// If the document has not been parsed with CSS support then all styles will be removed. /// /// The parsed HTML document. /// The node within which to sanitize. /// The base URL relative URLs are resolved against. No resolution if empty. /// The sanitized HTML document. IHtmlDocument SanitizeDom(IHtmlDocument document, IHtmlElement? context = null, string baseUrl = ""); /// /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. /// /// The HTML document to sanitize. /// The base URL relative URLs are resolved against. No resolution if empty. /// The formatter used to render the DOM. Using the if null. /// The sanitized HTML document. string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter? outputFormatter = null); }