Skip to content

Commit 2630048

Browse files
committed
Fixes for URI decoding.
JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu
1 parent caeae0f commit 2630048

File tree

2 files changed

+47
-5
lines changed

2 files changed

+47
-5
lines changed

jerry-core/ecma/builtin-objects/ecma-builtin-global.cpp

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,13 @@ static uint8_t unescaped_uri_component_set[16] =
747747
*/
748748
#define URI_ENCODED_BYTE_SIZE (3)
749749

750+
/*
751+
* These two types shows whether the byte is present in
752+
* the original stream or decoded from a %xx sequence.
753+
*/
754+
#define URI_DECODE_ORIGINAL_BYTE 0
755+
#define URI_DECODE_DECODED_BYTE 1
756+
750757
/**
751758
* Helper function to decode URI.
752759
*
@@ -835,23 +842,27 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
835842
if (ecma_is_completion_value_empty (ret_value))
836843
{
837844
MEM_DEFINE_LOCAL_ARRAY (output_start_p,
838-
output_size,
845+
output_size * 2,
839846
lit_utf8_byte_t);
840847

841848
input_char_p = input_start_p;
842849
lit_utf8_byte_t *output_char_p = output_start_p;
850+
lit_utf8_byte_t *output_type_p = output_start_p + output_size;
843851

844852
while (input_char_p < input_end_p)
845853
{
846854
/* Input decode. */
847855
if (*input_char_p != '%')
848856
{
857+
*output_type_p++ = URI_DECODE_ORIGINAL_BYTE;
849858
*output_char_p = *input_char_p;
850859
output_char_p++;
851860
input_char_p++;
852861
continue;
853862
}
854863

864+
*output_type_p++ = URI_DECODE_DECODED_BYTE;
865+
855866
lit_code_point_t decoded_byte;
856867

857868
lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte);
@@ -886,16 +897,38 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
886897
if (valid_utf8)
887898
{
888899
lit_utf8_iterator_t characters = lit_utf8_iterator_create (output_start_p, output_size);
900+
output_type_p = output_start_p + output_size;
901+
889902
while (!lit_utf8_iterator_is_eos (&characters))
890903
{
904+
bool original_byte = output_type_p[characters.buf_pos.offset] == URI_DECODE_ORIGINAL_BYTE;
905+
891906
ecma_char_t character = lit_utf8_iterator_read_next (&characters);
892907

893908
/* Surrogate fragments are allowed in JS, but not accepted by URI decoding. */
894-
if (lit_is_code_unit_low_surrogate (character)
895-
|| lit_is_code_unit_high_surrogate (character))
909+
if (!original_byte)
896910
{
897-
valid_utf8 = false;
898-
break;
911+
if (lit_is_code_unit_high_surrogate (character))
912+
{
913+
/* Note: stray high/low surrogate pairs are not allowed in the stream. */
914+
if (lit_utf8_iterator_is_eos (&characters))
915+
{
916+
valid_utf8 = false;
917+
break;
918+
}
919+
920+
if (output_type_p[characters.buf_pos.offset] == URI_DECODE_ORIGINAL_BYTE
921+
|| !lit_is_code_unit_low_surrogate (lit_utf8_iterator_read_next (&characters)))
922+
{
923+
valid_utf8 = false;
924+
break;
925+
}
926+
}
927+
else if (lit_is_code_unit_low_surrogate (character))
928+
{
929+
valid_utf8 = false;
930+
break;
931+
}
899932
}
900933
}
901934
}

tests/jerry/global-uri-coding.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,12 @@ assert (decodeURI ({ x:1 }) === "[object Object]");
120120
assert (encodeURI (void 0) === "undefined");
121121
assert (encodeURI (216.000e1) === "2160");
122122

123+
// Combining surrogate fragments
124+
125+
assert (decodeURI("\ud800\udc00 \ud800 \udc00") === "\ud800\udc00 \ud800 \udc00");
126+
assert (decodeURI("%f0%90%80%80") === "\ud800\udc00");
127+
assert (decodeURI("\ud800%f0%90%80%80\ud800") === "\ud800\ud800\udc00\ud800");
128+
assert (decodeURI("\udc00%f0%90%80%80\udc00") === "\udc00\ud800\udc00\udc00");
129+
130+
checkDecodeURIParseError ("\ud800%ed%b0%80");
131+
checkDecodeURIParseError ("%ed%a0%80\udc00");

0 commit comments

Comments
 (0)