@@ -489,3 +489,99 @@ def test_unicode_escape_in_style(self):
489489 with self .subTest (html = html ):
490490 cleaned = clean_html (html )
491491 self .assertEqual ('<div><style>/* deleted */</style></div>' , cleaned )
492+
493+ def test_unicode_escape_mixed_with_comments (self ):
494+ # Unicode escapes mixed with CSS comments should still be caught
495+ test_cases = [
496+ # \69 = 'i' with comment before
497+ '<style>@/*comment*/\\ 69mport url(evil.css)</style>' ,
498+ # \69 = 'i' with comment after
499+ '<style>@\\ 69mport/*comment*/ url(evil.css)</style>' ,
500+ # Multiple escapes with comments
501+ '<style>\\ 65\\ 78/*comment*/\\ 70\\ 72\\ 65\\ 73\\ 73\\ 69\\ 6f\\ 6e(alert(1))</style>' ,
502+ ]
503+
504+ for html in test_cases :
505+ with self .subTest (html = html ):
506+ cleaned = clean_html (html )
507+ self .assertEqual ('<div><style>/* deleted */</style></div>' , cleaned )
508+
509+ def test_unicode_escape_case_insensitive (self ):
510+ # CSS hex escapes should work with both uppercase and lowercase hex digits
511+ # \69 = 'i', \6D = 'm', etc.
512+ test_cases = [
513+ # @import with uppercase hex digits: \69\6D\70\6F\72\74
514+ '<style>@\\ 69\\ 6D\\ 70\\ 6F\\ 72\\ 74 url(evil.css)</style>' ,
515+ # @import with some uppercase
516+ '<style>@\\ 69\\ 6D\\ 70\\ 6f\\ 72\\ 74 url(evil.css)</style>' ,
517+ ]
518+
519+ for html in test_cases :
520+ with self .subTest (html = html ):
521+ cleaned = clean_html (html )
522+ self .assertEqual ('<div><style>/* deleted */</style></div>' , cleaned )
523+
524+ def test_unicode_escape_various_schemes (self ):
525+ # Test Unicode escapes for various malicious schemes
526+ test_cases = [
527+ # \76\62\73\63\72\69\70\74 = "vbscript"
528+ '<style>url(\\ 76\\ 62\\ 73\\ 63\\ 72\\ 69\\ 70\\ 74:alert(1))</style>' ,
529+ # \6a\73\63\72\69\70\74 = "jscript"
530+ '<style>url(\\ 6a\\ 73\\ 63\\ 72\\ 69\\ 70\\ 74:alert(1))</style>' ,
531+ # \6c\69\76\65\73\63\72\69\70\74 = "livescript"
532+ '<style>url(\\ 6c\\ 69\\ 76\\ 65\\ 73\\ 63\\ 72\\ 69\\ 70\\ 74:alert(1))</style>' ,
533+ # \6d\6f\63\68\61 = "mocha"
534+ '<style>url(\\ 6d\\ 6f\\ 63\\ 68\\ 61:alert(1))</style>' ,
535+ ]
536+
537+ for html in test_cases :
538+ with self .subTest (html = html ):
539+ cleaned = clean_html (html )
540+ self .assertEqual ('<div><style>/* deleted */</style></div>' , cleaned )
541+
542+ def test_unicode_escape_with_whitespace_variations (self ):
543+ # Test different whitespace characters after Unicode escapes
544+ cleaner = Cleaner (safe_attrs_only = False )
545+ test_cases = [
546+ # Tab after escape
547+ ('<div style="@\\ 69\t mport url(evil.css)">test</div>' , '<div>test</div>' ),
548+ # Newline after escape (note: actual newline, not \n)
549+ ('<div style="@\\ 69\n mport url(evil.css)">test</div>' , '<div>test</div>' ),
550+ # Form feed after escape
551+ ('<div style="@\\ 69\f mport url(evil.css)">test</div>' , '<div>test</div>' ),
552+ ]
553+
554+ for html , expected in test_cases :
555+ with self .subTest (html = html ):
556+ cleaned = cleaner .clean_html (html )
557+ self .assertEqual (expected , cleaned )
558+
559+ def test_backslash_removal_after_unicode_decode (self ):
560+ # After decoding Unicode escapes, remaining backslashes are removed
561+ # This ensures double-obfuscation (unicode + backslashes) is caught
562+ test_cases = [
563+ # Step 1: \69 → 'i', Step 2: remove \, Result: @import
564+ '<style>@\\ 69\\ m\\ p\\ o\\ r\\ t url(evil.css)</style>' ,
565+ # Multiple unicode escapes with backslashes mixed in
566+ '<style>@\\ 69\\ 6d\\ p\\ 6f\\ r\\ t url(evil.css)</style>' ,
567+ ]
568+
569+ for html in test_cases :
570+ with self .subTest (html = html ):
571+ cleaned = clean_html (html )
572+ self .assertEqual ('<div><style>/* deleted */</style></div>' , cleaned )
573+
574+ def test_backslash_obfuscation_without_unicode (self ):
575+ # Test that patterns using ONLY backslash obfuscation (no unicode) are caught
576+ # Step 1: No unicode escapes, Step 2: remove \, Result: malicious pattern
577+ test_cases = [
578+ # @\i\m\p\o\r\t → @import (caught by '@import' check)
579+ '<style>@\\ i\\ m\\ p\\ o\\ r\\ t url(evil.css)</style>' ,
580+ # Can also test combinations that create javascript schemes
581+ '<style>@\\ import url(evil.css)</style>' ,
582+ ]
583+
584+ for html in test_cases :
585+ with self .subTest (html = html ):
586+ cleaned = clean_html (html )
587+ self .assertEqual ('<div><style>/* deleted */</style></div>' , cleaned )
0 commit comments