@@ -747,6 +747,13 @@ static uint8_t unescaped_uri_component_set[16] =
747
747
*/
748
748
#define URI_ENCODED_BYTE_SIZE (3 )
749
749
750
+ /*
751
+ * These two types shows whether the byte is present in
752
+ * the original stream or decoded from a %xx sequence.
753
+ */
754
+ #define URI_DECODE_ORIGINAL_BYTE 0
755
+ #define URI_DECODE_DECODED_BYTE 1
756
+
750
757
/* *
751
758
* Helper function to decode URI.
752
759
*
@@ -835,23 +842,27 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
835
842
if (ecma_is_completion_value_empty (ret_value))
836
843
{
837
844
MEM_DEFINE_LOCAL_ARRAY (output_start_p,
838
- output_size,
845
+ output_size * 2 ,
839
846
lit_utf8_byte_t );
840
847
841
848
input_char_p = input_start_p;
842
849
lit_utf8_byte_t *output_char_p = output_start_p;
850
+ lit_utf8_byte_t *output_type_p = output_start_p + output_size;
843
851
844
852
while (input_char_p < input_end_p)
845
853
{
846
854
/* Input decode. */
847
855
if (*input_char_p != ' %' )
848
856
{
857
+ *output_type_p++ = URI_DECODE_ORIGINAL_BYTE;
849
858
*output_char_p = *input_char_p;
850
859
output_char_p++;
851
860
input_char_p++;
852
861
continue ;
853
862
}
854
863
864
+ *output_type_p++ = URI_DECODE_DECODED_BYTE;
865
+
855
866
lit_code_point_t decoded_byte;
856
867
857
868
lit_read_code_point_from_hex (input_char_p + 1 , 2 , &decoded_byte);
@@ -886,16 +897,38 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
886
897
if (valid_utf8)
887
898
{
888
899
lit_utf8_iterator_t characters = lit_utf8_iterator_create (output_start_p, output_size);
900
+ output_type_p = output_start_p + output_size;
901
+
889
902
while (!lit_utf8_iterator_is_eos (&characters))
890
903
{
904
+ bool original_byte = output_type_p[characters.buf_pos .offset ] == URI_DECODE_ORIGINAL_BYTE;
905
+
891
906
ecma_char_t character = lit_utf8_iterator_read_next (&characters);
892
907
893
908
/* Surrogate fragments are allowed in JS, but not accepted by URI decoding. */
894
- if (lit_is_code_unit_low_surrogate (character)
895
- || lit_is_code_unit_high_surrogate (character))
909
+ if (!original_byte)
896
910
{
897
- valid_utf8 = false ;
898
- break ;
911
+ if (lit_is_code_unit_high_surrogate (character))
912
+ {
913
+ /* Note: stray high/low surrogate pairs are not allowed in the stream. */
914
+ if (lit_utf8_iterator_is_eos (&characters))
915
+ {
916
+ valid_utf8 = false ;
917
+ break ;
918
+ }
919
+
920
+ if (output_type_p[characters.buf_pos .offset ] == URI_DECODE_ORIGINAL_BYTE
921
+ || !lit_is_code_unit_low_surrogate (lit_utf8_iterator_read_next (&characters)))
922
+ {
923
+ valid_utf8 = false ;
924
+ break ;
925
+ }
926
+ }
927
+ else if (lit_is_code_unit_low_surrogate (character))
928
+ {
929
+ valid_utf8 = false ;
930
+ break ;
931
+ }
899
932
}
900
933
}
901
934
}
0 commit comments