Skip to content

Commit 579b1ed

Browse files
committed
Refactor builtins to handle CESU-8 encoded strings.
JerryScript-DCO-1.0-Signed-off-by: Zsolt Borbély zsborbely.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai.u-szeged@partner.samsung.com
1 parent dcd610b commit 579b1ed

17 files changed

+517
-696
lines changed

jerry-core/ecma/base/ecma-helpers-conversion.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -354,42 +354,40 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
354354
return ECMA_NUMBER_ZERO;
355355
}
356356

357-
lit_utf8_iterator_t iter = lit_utf8_iterator_create (str_p, str_size);
357+
lit_utf8_byte_t *str_curr_p = (lit_utf8_byte_t *) str_p;
358+
const lit_utf8_byte_t *str_end_p = str_p + str_size;
358359
ecma_char_t code_unit;
359360

360-
while (!lit_utf8_iterator_is_eos (&iter))
361+
while (str_curr_p < str_end_p)
361362
{
362-
code_unit = lit_utf8_iterator_peek_next (&iter);
363+
code_unit = lit_utf8_peek_next (str_curr_p);
363364
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
364365
{
365-
lit_utf8_iterator_incr (&iter);
366+
lit_utf8_incr (&str_curr_p);
366367
}
367368
else
368369
{
369370
break;
370371
}
371372
}
372373

373-
JERRY_ASSERT (!iter.buf_pos.is_non_bmp_middle);
374-
const lit_utf8_byte_t *begin_p = iter.buf_p + iter.buf_pos.offset;
374+
const lit_utf8_byte_t *begin_p = str_curr_p;
375+
str_curr_p = (lit_utf8_byte_t *) str_end_p;
375376

376-
iter = lit_utf8_iterator_create (iter.buf_p + iter.buf_pos.offset, str_size - iter.buf_pos.offset);
377-
lit_utf8_iterator_seek_eos (&iter);
378-
while (!lit_utf8_iterator_is_bos (&iter))
377+
while (str_curr_p > str_p)
379378
{
380-
code_unit = lit_utf8_iterator_peek_prev (&iter);
379+
code_unit = lit_utf8_peek_prev (str_curr_p);
381380
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
382381
{
383-
lit_utf8_iterator_decr (&iter);
382+
lit_utf8_decr (&str_curr_p);
384383
}
385384
else
386385
{
387386
break;
388387
}
389388
}
390389

391-
JERRY_ASSERT (!iter.buf_pos.is_non_bmp_middle);
392-
const lit_utf8_byte_t *end_p = iter.buf_p + iter.buf_pos.offset - 1;
390+
const lit_utf8_byte_t *end_p = str_curr_p - 1;
393391

394392
if (begin_p > end_p)
395393
{

jerry-core/ecma/builtin-objects/ecma-builtin-date.cpp

Lines changed: 50 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -48,27 +48,22 @@
4848
* @return NaN if cannot read from string, ToNumber() otherwise
4949
*/
5050
static ecma_number_t
51-
ecma_date_parse_date_chars (lit_utf8_iterator_t *iter, /**< iterator of the utf8 string */
51+
ecma_date_parse_date_chars (lit_utf8_byte_t **str_p, /**< pointer to the cesu8 string */
52+
const lit_utf8_byte_t *str_end_p, /**< pointer to the end of the string */
5253
uint32_t num_of_chars) /**< number of characters to read and convert */
5354
{
5455
JERRY_ASSERT (num_of_chars > 0);
55-
56-
lit_utf8_size_t copy_size = 0;
57-
const lit_utf8_byte_t *str_start_p = iter->buf_p + iter->buf_pos.offset;
56+
const lit_utf8_byte_t *str_start_p = *str_p;
5857

5958
while (num_of_chars--)
6059
{
61-
if (lit_utf8_iterator_is_eos (iter)
62-
|| !lit_char_is_decimal_digit (lit_utf8_iterator_peek_next (iter)))
60+
if (*str_p >= str_end_p || !lit_char_is_decimal_digit (lit_utf8_read_next (str_p)))
6361
{
6462
return ecma_number_make_nan ();
6563
}
66-
67-
copy_size += lit_get_unicode_char_size_by_utf8_first_byte (*(iter->buf_p + iter->buf_pos.offset));
68-
lit_utf8_iterator_incr (iter);
6964
}
7065

71-
return ecma_utf8_string_to_number (str_start_p, copy_size);
66+
return ecma_utf8_string_to_number (str_start_p, (lit_utf8_size_t) (*str_p - str_start_p));
7267
} /* ecma_date_parse_date_chars */
7368

7469
/**
@@ -211,10 +206,11 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
211206
ssize_t sz = ecma_string_to_utf8_string (date_str_p, date_start_p, (ssize_t) date_str_size);
212207
JERRY_ASSERT (sz >= 0);
213208

214-
lit_utf8_iterator_t iter = lit_utf8_iterator_create (date_start_p, date_str_size);
209+
lit_utf8_byte_t *date_str_curr_p = date_start_p;
210+
const lit_utf8_byte_t *date_str_end_p = date_start_p + date_str_size;
215211

216212
/* 1. read year */
217-
ecma_number_t year = ecma_date_parse_date_chars (&iter, 4);
213+
ecma_number_t year = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 4);
218214

219215
if (!ecma_number_is_nan (year)
220216
&& year >= 0)
@@ -224,12 +220,12 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
224220
ecma_number_t time = ECMA_NUMBER_ZERO;
225221

226222
/* 2. read month if any */
227-
if (!lit_utf8_iterator_is_eos (&iter)
228-
&& lit_utf8_iterator_peek_next (&iter) == '-')
223+
if (date_str_curr_p < date_str_end_p
224+
&& *date_str_curr_p == '-')
229225
{
230226
/* eat up '-' */
231-
lit_utf8_iterator_incr (&iter);
232-
month = ecma_date_parse_date_chars (&iter, 2);
227+
date_str_curr_p++;
228+
month = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);
233229

234230
if (month > 12 || month < 1)
235231
{
@@ -238,12 +234,12 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
238234
}
239235

240236
/* 3. read day if any */
241-
if (!lit_utf8_iterator_is_eos (&iter)
242-
&& lit_utf8_iterator_peek_next (&iter) == '-')
237+
if (date_str_curr_p < date_str_end_p
238+
&& *date_str_curr_p == '-')
243239
{
244240
/* eat up '-' */
245-
lit_utf8_iterator_incr (&iter);
246-
day = ecma_date_parse_date_chars (&iter, 2);
241+
date_str_curr_p++;
242+
day = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);
247243

248244
if (day < 1 || day > 31)
249245
{
@@ -252,24 +248,24 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
252248
}
253249

254250
/* 4. read time if any */
255-
if (!lit_utf8_iterator_is_eos (&iter)
256-
&& lit_utf8_iterator_peek_next (&iter) == 'T')
251+
if (date_str_curr_p < date_str_end_p
252+
&& *date_str_curr_p == 'T')
257253
{
254+
/* eat up 'T' */
255+
date_str_curr_p++;
256+
258257
ecma_number_t hours = ECMA_NUMBER_ZERO;
259258
ecma_number_t minutes = ECMA_NUMBER_ZERO;
260259
ecma_number_t seconds = ECMA_NUMBER_ZERO;
261260
ecma_number_t milliseconds = ECMA_NUMBER_ZERO;
262261

263-
ecma_length_t num_of_visited_chars = lit_utf8_iterator_get_index (&iter);
264-
ecma_length_t date_str_len = lit_utf8_string_length (iter.buf_p, iter.buf_size) - 1;
262+
ecma_length_t remaining_length = lit_utf8_string_length (date_str_curr_p,
263+
(lit_utf8_size_t) (date_str_end_p - date_str_curr_p));
265264

266-
if ((date_str_len - num_of_visited_chars) >= 5)
265+
if (remaining_length >= 5)
267266
{
268-
/* eat up 'T' */
269-
lit_utf8_iterator_incr (&iter);
270-
271267
/* 4.1 read hours and minutes */
272-
hours = ecma_date_parse_date_chars (&iter, 2);
268+
hours = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);
273269

274270
if (hours < 0 || hours > 24)
275271
{
@@ -281,33 +277,35 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
281277
}
282278

283279
/* eat up ':' */
284-
lit_utf8_iterator_incr (&iter);
280+
date_str_curr_p++;
285281

286-
minutes = ecma_date_parse_date_chars (&iter, 2);
282+
minutes = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);
287283

288284
if (minutes < 0 || minutes > 59)
289285
{
290286
minutes = ecma_number_make_nan ();
291287
}
292288

293289
/* 4.2 read seconds if any */
294-
if (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_peek_next (&iter) == ':')
290+
if (date_str_curr_p < date_str_end_p
291+
&& *date_str_curr_p == ':')
295292
{
296293
/* eat up ':' */
297-
lit_utf8_iterator_incr (&iter);
298-
seconds = ecma_date_parse_date_chars (&iter, 2);
294+
date_str_curr_p++;
295+
seconds = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);
299296

300297
if (seconds < 0 || seconds > 59)
301298
{
302299
seconds = ecma_number_make_nan ();
303300
}
304301

305302
/* 4.3 read milliseconds if any */
306-
if (!lit_utf8_iterator_is_eos (&iter) && lit_utf8_iterator_peek_next (&iter) == '.')
303+
if (date_str_curr_p < date_str_end_p
304+
&& *date_str_curr_p == '.')
307305
{
308306
/* eat up '.' */
309-
lit_utf8_iterator_incr (&iter);
310-
milliseconds = ecma_date_parse_date_chars (&iter, 3);
307+
date_str_curr_p++;
308+
milliseconds = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 3);
311309

312310
if (milliseconds < 0)
313311
{
@@ -324,34 +322,34 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
324322
}
325323

326324
/* 4.4 read timezone if any */
327-
if (!lit_utf8_iterator_is_eos (&iter)
328-
&& lit_utf8_iterator_peek_next (&iter) == 'Z'
325+
if (date_str_curr_p < date_str_end_p
326+
&& *date_str_curr_p == 'Z'
329327
&& !ecma_number_is_nan (time))
330328
{
331-
lit_utf8_iterator_incr (&iter);
329+
date_str_curr_p++;
332330
time = ecma_date_make_time (hours, minutes, seconds, milliseconds);
333331
}
334-
else if (!lit_utf8_iterator_is_eos (&iter)
335-
&& (lit_utf8_iterator_peek_next (&iter) == '+'
336-
|| lit_utf8_iterator_peek_next (&iter) == '-'))
332+
else if (date_str_curr_p < date_str_end_p
333+
&& (*date_str_curr_p == '+' || *date_str_curr_p == '-'))
337334
{
338-
ecma_length_t num_of_visited_chars = lit_utf8_iterator_get_index (&iter);
339-
ecma_length_t date_str_len = lit_utf8_string_length (iter.buf_p, iter.buf_size) - 1;
335+
ecma_length_t remaining_length;
336+
remaining_length = lit_utf8_string_length (date_str_curr_p,
337+
(lit_utf8_size_t) (date_str_end_p - date_str_curr_p)) - 1;
340338

341-
if ((date_str_len - num_of_visited_chars) == 5)
339+
if (remaining_length == 5)
342340
{
343341
bool is_negative = false;
344342

345-
if (lit_utf8_iterator_peek_next (&iter) == '-')
343+
if (*date_str_curr_p == '-')
346344
{
347345
is_negative = true;
348346
}
349347

350348
/* eat up '+/-' */
351-
lit_utf8_iterator_incr (&iter);
349+
date_str_curr_p++;
352350

353351
/* read hours and minutes */
354-
hours = ecma_date_parse_date_chars (&iter, 2);
352+
hours = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);
355353

356354
if (hours < 0 || hours > 24)
357355
{
@@ -363,9 +361,9 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
363361
}
364362

365363
/* eat up ':' */
366-
lit_utf8_iterator_incr (&iter);
364+
date_str_curr_p++;
367365

368-
minutes = ecma_date_parse_date_chars (&iter, 2);
366+
minutes = ecma_date_parse_date_chars (&date_str_curr_p, date_str_end_p, 2);
369367

370368
if (minutes < 0 || minutes > 59)
371369
{
@@ -384,7 +382,7 @@ ecma_builtin_date_parse (ecma_value_t this_arg __attr_unused___, /**< this argum
384382
}
385383
}
386384

387-
if (lit_utf8_iterator_is_eos (&iter))
385+
if (date_str_curr_p >= date_str_end_p)
388386
{
389387
ecma_number_t date = ecma_date_make_day (year, month - 1, day);
390388
*date_num_p = ecma_date_make_date (date, time);

0 commit comments

Comments
 (0)