@@ -19,78 +19,82 @@ namespace CommonUtilities.Helpers.Html;
1919
2020/// <summary>
2121/// Helper class for sanitizing HTML-like input to prevent XSS attacks.
22+ /// Allows img tags with src & class attributes; disallows script tags and unsafe attributes.
2223/// </summary>
2324public static class HtmlSanitizerHelper
2425{
25- /// <summary>
26- /// The HTML sanitizer instance with a configurable whitelist/policy.
27- /// </summary>
2826 private static readonly HtmlSanitizer Sanitizer ;
2927
3028 static HtmlSanitizerHelper ( )
3129 {
3230 Sanitizer = new HtmlSanitizer ( ) ;
3331
34- // Clear default allowed tags and define our own conservative whitelist
32+ // Configure allowed tags ( whitelist)
3533 Sanitizer . AllowedTags . Clear ( ) ;
36- string [ ] allowedTags =
37- [
38- "a" , "b" , "i" , "strong" , "em" , "u" , "p" , "br" ,
39- "ul" , "ol" , "li" , "blockquote" , "code" , "pre"
40- ] ;
34+ string [ ] allowedTags = new [ ]
35+ {
36+ "a" , "b" , "i" , "strong" , "em" , "u" ,
37+ "p" , "br" , "ul" , "ol" , "li" , "blockquote" ,
38+ "code" , "pre" , "img"
39+ } ;
4140 foreach ( string tag in allowedTags ) Sanitizer . AllowedTags . Add ( tag ) ;
4241
43- // Clear default allowed attributes and define ours
42+ // Configure allowed attributes
4443 Sanitizer . AllowedAttributes . Clear ( ) ;
4544 Sanitizer . AllowedAttributes . Add ( "href" ) ;
4645 Sanitizer . AllowedAttributes . Add ( "title" ) ;
4746 Sanitizer . AllowedAttributes . Add ( "target" ) ;
4847 Sanitizer . AllowedAttributes . Add ( "rel" ) ;
48+ Sanitizer . AllowedAttributes . Add ( "src" ) ;
49+ Sanitizer . AllowedAttributes . Add ( "class" ) ;
4950
50- // Limit allowed URI schemes for safe links
51+ // Configure allowed URI schemes
5152 Sanitizer . AllowedSchemes . Clear ( ) ;
5253 Sanitizer . AllowedSchemes . Add ( "http" ) ;
5354 Sanitizer . AllowedSchemes . Add ( "https" ) ;
5455 Sanitizer . AllowedSchemes . Add ( "mailto" ) ;
5556
56- // Forbid all event attributes (onload, onclick, etc.)
57+ // Configure which attributes are treated as URI attributes
58+ Sanitizer . UriAttributes . Clear ( ) ;
59+ Sanitizer . UriAttributes . Add ( "href" ) ;
60+ Sanitizer . UriAttributes . Add ( "src" ) ;
61+
62+ // Disallow any inline event attributes (onload, onclick, etc.)
5763 Sanitizer . RemovingAttribute += ( sender , args ) =>
5864 {
5965 if ( args . Attribute . Name . StartsWith ( "on" , StringComparison . OrdinalIgnoreCase ) )
60- args . Cancel = false ; // allow removal
66+ // Allow removal (nothing special to do)
67+ args . Cancel = false ;
6168 } ;
6269
63- // Optional: disallow style attributes entirely (for simplicity)
70+ // Disallow all CSS properties if you do not want inline styles
6471 Sanitizer . AllowedCssProperties . Clear ( ) ;
6572
66- // Optional: For links enforce rel="nofollow" (or other policy)
73+ // Post-process <a> tags to enforce rel & target policy
6774 Sanitizer . PostProcessNode += ( sender , args ) =>
6875 {
6976 if ( args . Node is IElement element &&
7077 element . TagName . Equals ( "a" , StringComparison . OrdinalIgnoreCase ) )
71- {
72- element . GetAttribute ( "href" ) ;
78+ // Ensure rel attribute to avoid reverse tabnabbing
7379 element . SetAttribute ( "rel" , "noopener noreferrer nofollow" ) ;
74- }
80+ // (You may also enforce target="_blank" if desired)
7581 } ;
7682 }
7783
7884 /// <summary>
7985 /// Sanitizes the provided HTML-like input and returns a string safe to render as HTML.
80- /// Newlines are converted to <br/> so plain-text line breaks are preserved.
81- /// Allowed tags and attributes are limited by the sanitizer configuration above.
86+ /// Plain-text newlines are converted to <br/> so simple line breaks are preserved.
8287 /// </summary>
8388 /// <param name="input">The user-provided HTML-like string.</param>
84- /// <returns>A sanitized string safe for HTML rendering.</returns>
89+ /// <returns>A sanitized string safe for HTML body rendering.</returns>
8590 public static string SanitizeAndFormat ( string input )
8691 {
8792 if ( string . IsNullOrWhiteSpace ( input ) ) return string . Empty ;
8893
89- // Sanitize the raw input (removes unsafe tags/attributes, normalizes HTML )
94+ // Sanitize the raw input (removes disallowed tags/attributes)
9095 string sanitized = Sanitizer . Sanitize ( input ) ;
9196
92- // Convert newline sequences to <br/>.
93- // Note: If you prefer using <p> wrappers, you might adjust this accordingly.
97+ // Convert newline sequences to <br/> for simple formatting
9498 sanitized = sanitized
9599 . Replace ( "\r \n " , "\n " )
96100 . Replace ( "\r " , "\n " )
0 commit comments