Skip to content

Commit 577c492

Browse files
committed
Change internal encoding of strings to CESU-8
JerryScript-DCO-1.0-Signed-off-by: Zsolt Borbély zsborbely.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai.u-szeged@partner.samsung.com
1 parent 6697523 commit 577c492

36 files changed

+936
-746
lines changed

jerry-core/ecma/base/ecma-globals.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -836,12 +836,6 @@ typedef struct ecma_string_t
836836
{
837837
mem_cpointer_t string1_cp : ECMA_POINTER_FIELD_WIDTH;
838838
mem_cpointer_t string2_cp : ECMA_POINTER_FIELD_WIDTH;
839-
840-
/**
841-
* Flag indicating that last code_unit of first string in concatenation is high surrogate
842-
* and first code_unit of second string is low surrogate
843-
*/
844-
unsigned int is_surrogate_pair_sliced : 1;
845839
} concatenation;
846840

847841
/** Identifier of magic string */

jerry-core/ecma/base/ecma-helpers-conversion.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
459459
}
460460

461461
/* Checking if significant part of parse string is equal to "Infinity" */
462-
const lit_utf8_byte_t *infinity_zt_str_p = lit_get_magic_string_utf8 (LIT_MAGIC_STRING_INFINITY_UL);
462+
const lit_utf8_byte_t *infinity_zt_str_p = lit_get_magic_string_cesu8 (LIT_MAGIC_STRING_INFINITY_UL);
463463

464464
for (const lit_utf8_byte_t *iter_p = begin_p, *iter_infinity_p = infinity_zt_str_p;
465465
;
@@ -782,9 +782,9 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
782782
* @return number of bytes copied to buffer
783783
*/
784784
ssize_t
785-
ecma_uint32_to_utf8_string (uint32_t value, /**< value to convert */
786-
lit_utf8_byte_t *out_buffer_p, /**< buffer for string */
787-
ssize_t buffer_size) /**< size of buffer */
785+
ecma_uint32_to_cesu8_string (uint32_t value, /**< value to convert */
786+
lit_utf8_byte_t *out_buffer_p, /**< buffer for string */
787+
ssize_t buffer_size) /**< size of buffer */
788788
{
789789
const lit_utf8_byte_t digits[10] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
790790

@@ -813,7 +813,7 @@ ecma_uint32_to_utf8_string (uint32_t value, /**< value to convert */
813813
}
814814

815815
return (ssize_t) bytes_copied;
816-
} /* ecma_uint32_to_utf8_string */
816+
} /* ecma_uint32_to_cesu8_string */
817817

818818
/**
819819
* ECMA-defined conversion of UInt32 value to Number value
@@ -1321,9 +1321,9 @@ ecma_number_to_decimal (ecma_number_t num, /**< ecma-number */
13211321
* @return size of utf-8 string
13221322
*/
13231323
lit_utf8_size_t
1324-
ecma_number_to_utf8_string (ecma_number_t num, /**< ecma-number */
1325-
lit_utf8_byte_t *buffer_p, /**< buffer for utf-8 string */
1326-
ssize_t buffer_size) /**< size of buffer */
1324+
ecma_number_to_cesu8_string (ecma_number_t num, /**< ecma-number */
1325+
lit_utf8_byte_t *buffer_p, /**< buffer for utf-8 string */
1326+
ssize_t buffer_size) /**< size of buffer */
13271327
{
13281328
const lit_utf8_byte_t digits[10] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
13291329
const lit_utf8_byte_t e_chars[2] = { 'e', 'E' };
@@ -1355,7 +1355,7 @@ ecma_number_to_utf8_string (ecma_number_t num, /**< ecma-number */
13551355
// 3.
13561356
*dst_p++ = minus_char;
13571357
ssize_t new_buffer_size = (buffer_size - (dst_p - buffer_p));
1358-
size = 1 + ecma_number_to_utf8_string (ecma_number_negate (num), dst_p, new_buffer_size);
1358+
size = 1 + ecma_number_to_cesu8_string (ecma_number_negate (num), dst_p, new_buffer_size);
13591359
}
13601360
else if (ecma_number_is_infinity (num))
13611361
{
@@ -1373,7 +1373,7 @@ ecma_number_to_utf8_string (ecma_number_t num, /**< ecma-number */
13731373
uint32_t num_uint32 = ecma_number_to_uint32 (num);
13741374
if (ecma_uint32_to_number (num_uint32) == num)
13751375
{
1376-
size = (lit_utf8_size_t) ecma_uint32_to_utf8_string (num_uint32, dst_p, buffer_size);
1376+
size = (lit_utf8_size_t) ecma_uint32_to_cesu8_string (num_uint32, dst_p, buffer_size);
13771377
}
13781378
else
13791379
{
@@ -1522,7 +1522,7 @@ ecma_number_to_utf8_string (ecma_number_t num, /**< ecma-number */
15221522
}
15231523

15241524
return size;
1525-
} /* ecma_number_to_utf8_string */
1525+
} /* ecma_number_to_cesu8_string */
15261526

15271527
/**
15281528
* @}

jerry-core/ecma/base/ecma-helpers-string.cpp

Lines changed: 101 additions & 159 deletions
Large diffs are not rendered by default.

jerry-core/ecma/base/ecma-helpers.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ extern bool ecma_is_completion_value_normal_false (ecma_completion_value_t value
110110
extern bool ecma_is_completion_value_empty (ecma_completion_value_t value);
111111

112112
/* ecma-helpers-string.c */
113-
extern ecma_string_t* ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *, lit_utf8_size_t);
113+
extern ecma_string_t* ecma_new_ecma_string_from_cesu8 (const lit_utf8_byte_t *, lit_utf8_size_t);
114114
extern ecma_string_t* ecma_new_ecma_string_from_code_unit (ecma_char_t);
115115
extern ecma_string_t* ecma_new_ecma_string_from_uint32 (uint32_t uint_number);
116116
extern ecma_string_t* ecma_new_ecma_string_from_number (ecma_number_t number);
@@ -129,9 +129,9 @@ extern ecma_number_t ecma_string_to_number (const ecma_string_t *str_p);
129129
extern bool ecma_string_get_array_index (const ecma_string_t *str_p, uint32_t *index);
130130

131131
extern ssize_t __attr_return_value_should_be_checked___
132-
ecma_string_to_utf8_string (const ecma_string_t *string_desc_p,
133-
lit_utf8_byte_t *buffer_p,
134-
ssize_t buffer_size);
132+
ecma_string_to_cesu8_string (const ecma_string_t *string_desc_p,
133+
lit_utf8_byte_t *buffer_p,
134+
ssize_t buffer_size);
135135

136136
extern bool ecma_compare_ecma_strings_equal_hashes (const ecma_string_t *string1_p,
137137
const ecma_string_t *string2_p);
@@ -318,12 +318,12 @@ ecma_free_external_pointer_in_property (ecma_property_t *prop_p);
318318

319319
/* ecma-helpers-conversion.cpp */
320320
extern ecma_number_t ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, lit_utf8_size_t str_size);
321-
extern ssize_t ecma_uint32_to_utf8_string (uint32_t value, lit_utf8_byte_t *out_buffer_p, ssize_t buffer_size);
321+
extern ssize_t ecma_uint32_to_cesu8_string (uint32_t value, lit_utf8_byte_t *out_buffer_p, ssize_t buffer_size);
322322
extern uint32_t ecma_number_to_uint32 (ecma_number_t value);
323323
extern int32_t ecma_number_to_int32 (ecma_number_t value);
324324
extern ecma_number_t ecma_int32_to_number (int32_t value);
325325
extern ecma_number_t ecma_uint32_to_number (uint32_t value);
326-
extern lit_utf8_size_t ecma_number_to_utf8_string (ecma_number_t, lit_utf8_byte_t *, ssize_t);
326+
extern lit_utf8_size_t ecma_number_to_cesu8_string (ecma_number_t, lit_utf8_byte_t *, ssize_t);
327327

328328
#endif /* !JERRY_ECMA_HELPERS_H */
329329

jerry-core/ecma/builtin-objects/ecma-builtin-date.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ ecma_date_parse_date_chars (lit_utf8_iterator_t *iter, /**< iterator of the utf8
6464
return ecma_number_make_nan ();
6565
}
6666

67-
copy_size += lit_get_unicode_char_size_by_utf8_first_byte (*(iter->buf_p + iter->buf_pos.offset));
67+
copy_size += lit_get_unicode_char_size_by_cesu8_first_byte (*(iter->buf_p + iter->buf_pos.offset));
6868
lit_utf8_iterator_incr (iter);
6969
}
7070

@@ -208,7 +208,7 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
208208
lit_utf8_size_t date_str_size = ecma_string_get_size (date_str_p);
209209
MEM_DEFINE_LOCAL_ARRAY (date_start_p, date_str_size, lit_utf8_byte_t);
210210

211-
ssize_t sz = ecma_string_to_utf8_string (date_str_p, date_start_p, (ssize_t) date_str_size);
211+
ssize_t sz = ecma_string_to_cesu8_string (date_str_p, date_start_p, (ssize_t) date_str_size);
212212
JERRY_ASSERT (sz >= 0);
213213

214214
lit_utf8_iterator_t iter = lit_utf8_iterator_create (date_start_p, date_str_size);

jerry-core/ecma/builtin-objects/ecma-builtin-error-prototype.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ ecma_builtin_error_prototype_object_to_string (ecma_value_t this_arg) /**< this
143143
MEM_DEFINE_LOCAL_ARRAY (ret_str_buffer, buffer_size, lit_utf8_byte_t);
144144
lit_utf8_byte_t *ret_str_buffer_p = ret_str_buffer;
145145

146-
ssize_t bytes = ecma_string_to_utf8_string (name_string_p, ret_str_buffer_p, buffer_size_left);
146+
ssize_t bytes = ecma_string_to_cesu8_string (name_string_p, ret_str_buffer_p, buffer_size_left);
147147
JERRY_ASSERT (bytes >= 0 && buffer_size_left - bytes >= 0);
148148

149149
buffer_size_left -= bytes;
@@ -161,14 +161,14 @@ ecma_builtin_error_prototype_object_to_string (ecma_value_t this_arg) /**< this
161161
buffer_size_left = buffer_size - (ret_str_buffer_p - ret_str_buffer);
162162
JERRY_ASSERT (buffer_size_left >= 0);
163163

164-
bytes = ecma_string_to_utf8_string (msg_string_p, ret_str_buffer_p, buffer_size_left);
164+
bytes = ecma_string_to_cesu8_string (msg_string_p, ret_str_buffer_p, buffer_size_left);
165165
JERRY_ASSERT (bytes >= 0 && buffer_size_left - bytes >= 0);
166166

167167
buffer_size_left -= bytes;
168168
JERRY_ASSERT (buffer_size_left >= 0);
169169

170-
ret_str_p = ecma_new_ecma_string_from_utf8 (ret_str_buffer,
171-
(jerry_api_size_t) (buffer_size - buffer_size_left));
170+
ret_str_p = ecma_new_ecma_string_from_cesu8 (ret_str_buffer,
171+
(jerry_api_size_t) (buffer_size - buffer_size_left));
172172

173173
MEM_FINALIZE_LOCAL_ARRAY (ret_str_buffer);
174174
}

jerry-core/ecma/builtin-objects/ecma-builtin-function.cpp

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,16 @@ ecma_builtin_function_helper_get_arguments (const ecma_value_t *arguments_list_p
9494
lit_utf8_size_t str_size = ecma_string_get_size (str_p);
9595
MEM_DEFINE_LOCAL_ARRAY (start_p, str_size, lit_utf8_byte_t);
9696

97-
ssize_t sz = ecma_string_to_utf8_string (str_p, start_p, (ssize_t) str_size);
97+
ssize_t sz = ecma_string_to_cesu8_string (str_p, start_p, (ssize_t) str_size);
9898
JERRY_ASSERT (sz >= 0);
9999

100-
lit_utf8_iterator_t iter = lit_utf8_iterator_create (start_p, str_size);
100+
lit_utf8_byte_t *current_p = start_p;
101+
const lit_utf8_byte_t *string_end_p = start_p + str_size;
101102

102-
while (!lit_utf8_iterator_is_eos (&iter))
103+
while (current_p < string_end_p)
103104
{
104-
ecma_char_t current_char = lit_utf8_iterator_read_next (&iter);
105+
ecma_char_t current_char;
106+
current_p += lit_read_code_unit_from_cesu8 (current_p, &current_char);
105107

106108
if (current_char == ',')
107109
{
@@ -194,36 +196,39 @@ ecma_builtin_function_dispatch_construct (const ecma_value_t *arguments_list_p,
194196
{
195197
MEM_DEFINE_LOCAL_ARRAY (start_p, str_size, lit_utf8_byte_t);
196198

197-
ssize_t sz = ecma_string_to_utf8_string (arguments_str_p, start_p, (ssize_t) str_size);
199+
ssize_t sz = ecma_string_to_cesu8_string (arguments_str_p, start_p, (ssize_t) str_size);
198200
JERRY_ASSERT (sz >= 0);
199201

200-
lit_utf8_iterator_t iter = lit_utf8_iterator_create (start_p, str_size);
201-
ecma_length_t last_separator = lit_utf8_iterator_get_index (&iter);
202-
ecma_length_t end_position;
202+
lit_utf8_byte_t *current_p = start_p;
203+
lit_utf8_byte_t *last_separator = start_p;
204+
lit_utf8_byte_t *end_position;
205+
const lit_utf8_byte_t *string_end_p = start_p + str_size;
203206
ecma_string_t *param_str_p;
204207

205-
while (!lit_utf8_iterator_is_eos (&iter))
208+
while (current_p < string_end_p)
206209
{
207-
ecma_char_t current_char = lit_utf8_iterator_read_next (&iter);
210+
ecma_char_t current_char;
211+
lit_utf8_size_t read_size = lit_read_code_unit_from_cesu8 (current_p, &current_char);
208212

209213
if (current_char == ',')
210214
{
211-
lit_utf8_iterator_decr (&iter);
212-
end_position = lit_utf8_iterator_get_index (&iter);
215+
end_position = current_p;
213216

214-
param_str_p = ecma_string_substr (arguments_str_p, last_separator, end_position);
217+
param_str_p = ecma_new_ecma_string_from_cesu8 (last_separator,
218+
(lit_utf8_size_t) (end_position - last_separator));
215219
string_params_p[params_count] = ecma_string_trim (param_str_p);
216220
ecma_deref_ecma_string (param_str_p);
217221

218-
lit_utf8_iterator_incr (&iter);
219-
last_separator = lit_utf8_iterator_get_index (&iter);
220-
222+
last_separator = current_p + read_size;
221223
params_count++;
222224
}
225+
226+
current_p += read_size;
223227
}
224228

225-
end_position = lit_utf8_string_length (start_p, str_size);
226-
param_str_p = ecma_string_substr (arguments_str_p, last_separator, end_position);
229+
end_position = (lit_utf8_byte_t *) string_end_p;
230+
param_str_p = ecma_new_ecma_string_from_cesu8 (last_separator,
231+
(lit_utf8_size_t) (end_position - last_separator));
227232
string_params_p[params_count] = ecma_string_trim (param_str_p);
228233
ecma_deref_ecma_string (param_str_p);
229234
params_count++;
@@ -260,9 +265,9 @@ ecma_builtin_function_dispatch_construct (const ecma_value_t *arguments_list_p,
260265
ssize_t utf8_string_buffer_pos = 0;
261266
for (uint32_t i = 0; i < params_count; i++)
262267
{
263-
ssize_t sz = ecma_string_to_utf8_string (string_params_p[i],
264-
&utf8_string_buffer_p[utf8_string_buffer_pos],
265-
(ssize_t) strings_buffer_size - utf8_string_buffer_pos);
268+
ssize_t sz = ecma_string_to_cesu8_string (string_params_p[i],
269+
&utf8_string_buffer_p[utf8_string_buffer_pos],
270+
(ssize_t) strings_buffer_size - utf8_string_buffer_pos);
266271
JERRY_ASSERT (sz >= 0);
267272

268273
utf8_string_params_p[i] = utf8_string_buffer_p + utf8_string_buffer_pos;

0 commit comments

Comments
 (0)