@@ -21,8 +21,8 @@ use crate::{
2121
2222use super :: { ConstantEvaluation , ConstantEvaluationCtx , ConstantValue } ;
2323
24- // Characters that are always unescaped in URI encoding: A-Z a-z 0-9 - _ . ! ~ * ' ( )
25- const URI_ALWAYS_UNESCAPED : & str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_ .!~*'()" ;
24+ // Characters that are always unescaped in URI encoding: A-Z a-z 0-9 _ - . ! ~ * ' ( )
25+ const URI_ALWAYS_UNESCAPED : & str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_- .!~*'()" ;
2626
2727fn is_uri_always_unescaped ( c : char ) -> bool {
2828 URI_ALWAYS_UNESCAPED . contains ( c)
@@ -50,10 +50,11 @@ pub fn try_fold_known_global_methods<'a>(
5050 arguments : & Vec < ' a , Argument < ' a > > ,
5151 ctx : & impl ConstantEvaluationCtx < ' a > ,
5252) -> Option < ConstantValue < ' a > > {
53- // Handle global function calls (e.g., encodeURI, decodeURI)
5453 if let Expression :: Identifier ( ident) = callee {
55- if let Some ( result) = try_fold_url_related_function ( ident, arguments, ctx) {
56- return Some ( result) ;
54+ if matches ! ( ident. name. as_str( ) , "encodeURI" | "encodeURIComponent" | "decodeURI" | "decodeURIComponent" ) {
55+ if let Some ( result) = try_fold_url_related_function ( ident, arguments, ctx) {
56+ return Some ( result) ;
57+ }
5758 }
5859 }
5960
@@ -550,13 +551,13 @@ fn try_fold_encode_uri<'a>(
550551 fn should_encode_uri ( c : char ) -> bool {
551552 match c {
552553 c if is_uri_always_unescaped ( c) => false ,
553- ';' | ',' | ' /' | '?' | ':' | '@' | '&' | '=' | '+' | '$' | '#' => false ,
554+ ';' | '/' | '?' | ':' | '@' | '&' | '=' | '+' | '$' | ', ' | '#' => false ,
554555 _ => true ,
555556 }
556557 }
557558
558559 let encoded = encode_uri_chars ( & string_value, should_encode_uri) ;
559- Some ( ConstantValue :: String ( Cow :: Owned ( encoded) ) )
560+ Some ( ConstantValue :: String ( encoded. into_owned ( ) . into ( ) ) )
560561}
561562
562563fn try_fold_encode_uri_component < ' a > (
@@ -575,7 +576,7 @@ fn try_fold_encode_uri_component<'a>(
575576 }
576577
577578 let encoded = encode_uri_chars ( & string_value, should_encode_uri_component) ;
578- Some ( ConstantValue :: String ( Cow :: Owned ( encoded) ) )
579+ Some ( ConstantValue :: String ( encoded. into_owned ( ) . into ( ) ) )
579580}
580581
581582fn try_fold_decode_uri < ' a > (
@@ -597,7 +598,7 @@ fn try_fold_decode_uri<'a>(
597598 }
598599
599600 let decoded = decode_uri_chars ( & string_value, should_not_decode_uri) ?;
600- Some ( ConstantValue :: String ( Cow :: Owned ( decoded) ) )
601+ Some ( ConstantValue :: String ( decoded. into_owned ( ) . into ( ) ) )
601602}
602603
603604fn try_fold_decode_uri_component < ' a > (
@@ -613,23 +614,31 @@ fn try_fold_decode_uri_component<'a>(
613614
614615 // decodeURIComponent decodes all percent-encoded sequences
615616 let decoded = decode_uri_chars ( & string_value, |_| false ) ?;
616- Some ( ConstantValue :: String ( Cow :: Owned ( decoded) ) )
617+ Some ( ConstantValue :: String ( decoded. into_owned ( ) . into ( ) ) )
617618}
618619
619620// this function is based on https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/src/enc.rs#L67
620621// MIT license: https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/LICENSE
621- fn encode_uri_chars ( input : & str , should_encode : fn ( char ) -> bool ) -> String {
622- let mut result = String :: new ( ) ;
623- let mut bytes = input. bytes ( ) ;
624- while let Some ( byte) = bytes. next ( ) {
625- let ch = byte as char ;
626- if byte < 128 && !should_encode ( ch) {
627- result. push ( ch) ;
628- } else {
629- result. push_str ( & format ! ( "%{:02X}" , byte) ) ;
622+ fn encode_uri_chars < ' a > ( input : & ' a str , should_encode : fn ( char ) -> bool ) -> Cow < ' a , str > {
623+ let mut out = std:: vec:: Vec :: with_capacity ( input. len ( ) ) ;
624+ let bytes = input. as_bytes ( ) ;
625+ let mut start = 0 ;
626+
627+ for ( i, & byte) in bytes. iter ( ) . enumerate ( ) {
628+ if byte >= 128 || should_encode ( byte as char ) {
629+ out. extend_from_slice ( & bytes[ start..i] ) ;
630+ out. extend_from_slice ( format ! ( "%{:02X}" , byte) . as_bytes ( ) ) ;
631+ start = i + 1 ;
630632 }
631633 }
632- result
634+
635+ if start == 0 {
636+ Cow :: Borrowed ( input)
637+ } else {
638+ out. extend_from_slice ( & bytes[ start..] ) ;
639+ // SAFETY: we started with a valid UTF-8 string and only changed ASCII characters
640+ Cow :: Owned ( unsafe { String :: from_utf8_unchecked ( out) } )
641+ }
633642}
634643
635644// this function is based on https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/src/dec.rs#L21
@@ -643,64 +652,114 @@ fn from_hex_digit(digit: u8) -> Option<u8> {
643652 }
644653}
645654
646- fn decode_uri_chars ( input : & str , should_not_decode : fn ( char ) -> bool ) -> Option < String > {
647- let mut out = std:: vec:: Vec :: with_capacity ( input. len ( ) ) ;
648- let mut bytes = input. bytes ( ) ;
649- while let Some ( byte) = bytes. next ( ) {
655+ // this function is based on https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/src/dec.rs#L21
656+ // MIT license: https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/LICENSE
657+ fn decode_uri_chars < ' a > ( input : & ' a str , should_not_decode : fn ( char ) -> bool ) -> Option < Cow < ' a , str > > {
658+ let mut out = std:: vec:: Vec :: new ( ) ;
659+ let mut has_changes = false ;
660+ let input_bytes = input. as_bytes ( ) ;
661+ let mut i = 0 ;
662+
663+ while i < input_bytes. len ( ) {
664+ let byte = input_bytes[ i] ;
650665 if byte == b'%' {
651- let h1 = bytes. next ( ) ?;
652- let h2 = bytes. next ( ) ?;
653- let decoded_byte = from_hex_digit ( h1) ? * 16 + from_hex_digit ( h2) ?;
666+ // Check if we have enough characters for a percent-encoded sequence
667+ if i + 2 >= input_bytes. len ( ) {
668+ // Invalid: % at end of string or incomplete sequence
669+ return None ;
670+ }
654671
655- // For ASCII characters, check if we should decode them
656- if decoded_byte < 0x80 {
657- let decoded_char = decoded_byte as char ;
658- if should_not_decode ( decoded_char) {
659- // Keep the original percent-encoded form
660- out. push ( b'%' ) ;
661- out. push ( h1) ;
662- out. push ( h2) ;
663- } else {
664- out. push ( decoded_byte) ;
665- }
666- } else {
667- // For multi-byte UTF-8 sequences, we need to collect all bytes
668- let mut utf8_bytes = vec ! [ decoded_byte] ;
669- let expected_bytes = if decoded_byte >= 0xF0 {
670- 4
671- } else if decoded_byte >= 0xE0 {
672- 3
672+ let h1 = input_bytes[ i + 1 ] ;
673+ let h2 = input_bytes[ i + 2 ] ;
674+
675+ if let ( Some ( d1) , Some ( d2) ) = ( from_hex_digit ( h1) , from_hex_digit ( h2) ) {
676+ let decoded_byte = d1 * 16 + d2;
677+
678+ if decoded_byte < 0x80 {
679+ let decoded_char = decoded_byte as char ;
680+ if should_not_decode ( decoded_char) {
681+ // Keep the original percent-encoded form
682+ if !has_changes {
683+ has_changes = true ;
684+ out. extend_from_slice ( & input_bytes[ 0 ..i + 3 ] ) ;
685+ } else {
686+ out. push ( b'%' ) ;
687+ out. push ( h1) ;
688+ out. push ( h2) ;
689+ }
690+ i += 3 ;
691+ continue ;
692+ } else {
693+ // Decode the character
694+ if !has_changes {
695+ has_changes = true ;
696+ out. extend_from_slice ( & input_bytes[ 0 ..i] ) ;
697+ }
698+ out. push ( decoded_byte) ;
699+ i += 3 ;
700+ continue ;
701+ }
673702 } else {
674- 2
675- } ;
676-
677- for _ in 1 ..expected_bytes {
678- if bytes. next ( ) != Some ( b'%' ) {
703+ // Multi-byte UTF-8 handling
704+ let expected_bytes = if decoded_byte >= 0xF0 {
705+ 4
706+ } else if decoded_byte >= 0xE0 {
707+ 3
708+ } else {
709+ 2
710+ } ;
711+
712+ if i + expected_bytes * 3 > input_bytes. len ( ) {
679713 return None ;
680714 }
681- let h1 = bytes. next ( ) ?;
682- let h2 = bytes. next ( ) ?;
683- let byte = from_hex_digit ( h1) ? * 16 + from_hex_digit ( h2) ?;
684- utf8_bytes. push ( byte) ;
685- }
686-
687- // Convert bytes to string and check if we should decode
688- let decoded_str = String :: from_utf8 ( utf8_bytes) . ok ( ) ?;
689- if let Some ( decoded_char) = decoded_str. chars ( ) . next ( ) {
690- if should_not_decode ( decoded_char) {
691- // For multi-byte sequences that shouldn't be decoded,
692- // we conservatively don't fold to avoid complexity
693- return None ;
715+
716+ let mut utf8_bytes = vec ! [ decoded_byte] ;
717+ let mut byte_index = i + 3 ;
718+
719+ for _ in 1 ..expected_bytes {
720+ if input_bytes[ byte_index] != b'%' {
721+ return None ;
722+ }
723+ let h1 = input_bytes[ byte_index + 1 ] ;
724+ let h2 = input_bytes[ byte_index + 2 ] ;
725+ let byte = from_hex_digit ( h1) ? * 16 + from_hex_digit ( h2) ?;
726+ utf8_bytes. push ( byte) ;
727+ byte_index += 3 ;
728+ }
729+
730+ let decoded_str = String :: from_utf8 ( utf8_bytes) . ok ( ) ?;
731+ if let Some ( decoded_char) = decoded_str. chars ( ) . next ( ) {
732+ if should_not_decode ( decoded_char) {
733+ return None ;
734+ } else {
735+ if !has_changes {
736+ has_changes = true ;
737+ out. extend_from_slice ( & input_bytes[ 0 ..i] ) ;
738+ }
739+ out. extend_from_slice ( decoded_str. as_bytes ( ) ) ;
740+ i = byte_index;
741+ continue ;
742+ }
694743 } else {
695- out . extend_from_slice ( decoded_str . as_bytes ( ) ) ;
744+ return None ;
696745 }
697- } else {
698- return None ;
699746 }
747+ } else {
748+ // Invalid hex digits - this should cause a URIError at runtime
749+ return None ;
700750 }
701751 } else {
702- out. push ( byte) ;
752+ // Regular character, copy if we're tracking changes
753+ if has_changes {
754+ out. push ( byte) ;
755+ }
756+ i += 1 ;
703757 }
704758 }
705- String :: from_utf8 ( out) . ok ( )
759+
760+ if has_changes {
761+ Some ( Cow :: Owned ( String :: from_utf8 ( out) . ok ( ) ?) )
762+ } else {
763+ Some ( Cow :: Borrowed ( input) )
764+ }
706765}
0 commit comments