Skip to content

Commit fd9ff8e

Browse files
committed
Add core unicode functionality.
Add utf-8 processing routines. Change ecma_char_t from char/uint16_t to uint16_t. Apply all utf-8 processing routines. Change char to jerry_api_char in API functions' declarations. JerryScript-DCO-1.0-Signed-off-by: Andrey Shitov a.shitov@samsung.com
1 parent c4b0cd2 commit fd9ff8e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+2450
-1462
lines changed

jerry-core/ecma/base/ecma-globals.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ typedef struct
715715
mem_cpointer_t next_chunk_cp;
716716

717717
/** Characters */
718-
uint8_t data[ sizeof (uint64_t) - sizeof (mem_cpointer_t) ];
718+
lit_utf8_byte_t data[ sizeof (uint64_t) - sizeof (mem_cpointer_t) ];
719719
} ecma_collection_chunk_t;
720720

721721
/**

jerry-core/ecma/base/ecma-helpers-conversion.cpp

Lines changed: 70 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "ecma-globals.h"
2424
#include "ecma-helpers.h"
2525
#include "jrt-libc-includes.h"
26+
#include "lit-magic-strings.h"
2627

2728
/*
2829
* \addtogroup ecmahelpersbigintegers Helpers for operations intermediate 128-bit integers
@@ -325,36 +326,36 @@
325326
*/
326327

327328
/**
328-
* ECMA-defined conversion of string (zero-terminated) to Number.
329+
* ECMA-defined conversion of string to Number.
329330
*
330331
* See also:
331332
* ECMA-262 v5, 9.3.1
332333
*
333334
* @return ecma-number
334335
*/
335336
ecma_number_t
336-
ecma_zt_string_to_number (const ecma_char_t *str_p) /**< zero-terminated string */
337+
ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
338+
lit_utf8_size_t str_size) /**< string size */
337339
{
338340
TODO (Check license issues);
339341

340-
const ecma_char_t dec_digits_range[10] = { '0', '9' };
341-
const ecma_char_t hex_lower_digits_range[10] = { 'a', 'f' };
342-
const ecma_char_t hex_upper_digits_range[10] = { 'A', 'F' };
343-
const ecma_char_t hex_x_chars[2] = { 'x', 'X' };
344-
const ecma_char_t white_space[2] = { ' ', '\n' };
345-
const ecma_char_t e_chars[2] = { 'e', 'E' };
346-
const ecma_char_t plus_char = '+';
347-
const ecma_char_t minus_char = '-';
348-
const ecma_char_t dot_char = '.';
349-
350-
const ecma_char_t *begin_p = str_p;
351-
const ecma_char_t *end_p = begin_p;
352-
353-
while (*end_p != ECMA_CHAR_NULL)
342+
const lit_utf8_byte_t dec_digits_range[10] = { '0', '9' };
343+
const lit_utf8_byte_t hex_lower_digits_range[10] = { 'a', 'f' };
344+
const lit_utf8_byte_t hex_upper_digits_range[10] = { 'A', 'F' };
345+
const lit_utf8_byte_t hex_x_chars[2] = { 'x', 'X' };
346+
const lit_utf8_byte_t white_space[2] = { ' ', '\n' };
347+
const lit_utf8_byte_t e_chars[2] = { 'e', 'E' };
348+
const lit_utf8_byte_t plus_char = '+';
349+
const lit_utf8_byte_t minus_char = '-';
350+
const lit_utf8_byte_t dot_char = '.';
351+
352+
if (str_size == 0)
354353
{
355-
end_p++;
354+
return ECMA_NUMBER_ZERO;
356355
}
357-
end_p--;
356+
357+
const lit_utf8_byte_t *begin_p = str_p;
358+
const lit_utf8_byte_t *end_p = begin_p + str_size - 1;
358359

359360
while (begin_p <= end_p
360361
&& (*begin_p == white_space[0]
@@ -387,7 +388,7 @@ ecma_zt_string_to_number (const ecma_char_t *str_p) /**< zero-terminated string
387388

388389
ecma_number_t num = 0;
389390

390-
for (const ecma_char_t* iter_p = begin_p;
391+
for (const lit_utf8_byte_t * iter_p = begin_p;
391392
iter_p <= end_p;
392393
iter_p++)
393394
{
@@ -438,9 +439,9 @@ ecma_zt_string_to_number (const ecma_char_t *str_p) /**< zero-terminated string
438439
}
439440

440441
/* Checking if significant part of parse string is equal to "Infinity" */
441-
const ecma_char_t *infinity_zt_str_p = lit_get_magic_string_zt (LIT_MAGIC_STRING_INFINITY_UL);
442+
const lit_utf8_byte_t *infinity_zt_str_p = lit_get_magic_string_utf8 (LIT_MAGIC_STRING_INFINITY_UL);
442443

443-
for (const ecma_char_t *iter_p = begin_p, *iter_infinity_p = infinity_zt_str_p;
444+
for (const lit_utf8_byte_t *iter_p = begin_p, *iter_infinity_p = infinity_zt_str_p;
444445
;
445446
iter_infinity_p++, iter_p++)
446447
{
@@ -750,7 +751,7 @@ ecma_zt_string_to_number (const ecma_char_t *str_p) /**< zero-terminated string
750751

751752
return num;
752753
#endif /* CONFIG_ECMA_NUMBER_TYPE == CONFIG_ECMA_NUMBER_FLOAT32 */
753-
} /* ecma_zt_string_to_number */
754+
} /* ecma_utf8_string_to_number */
754755

755756
/**
756757
* ECMA-defined conversion of UInt32 to String (zero-terminated).
@@ -761,16 +762,14 @@ ecma_zt_string_to_number (const ecma_char_t *str_p) /**< zero-terminated string
761762
* @return number of bytes copied to buffer
762763
*/
763764
ssize_t
764-
ecma_uint32_to_string (uint32_t value, /**< value to convert */
765-
ecma_char_t *out_buffer_p, /**< buffer for zero-terminated string */
766-
ssize_t buffer_size) /**< size of buffer */
765+
ecma_uint32_to_utf8_string (uint32_t value, /**< value to convert */
766+
lit_utf8_byte_t *out_buffer_p, /**< buffer for string */
767+
ssize_t buffer_size) /**< size of buffer */
767768
{
768-
const ecma_char_t digits[10] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
769+
const lit_utf8_byte_t digits[10] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
769770

770-
ecma_char_t *p = (ecma_char_t*) ((uint8_t*) out_buffer_p + buffer_size) - 1;
771-
*p-- = ECMA_CHAR_NULL;
772-
773-
size_t bytes_copied = sizeof (ecma_char_t);
771+
lit_utf8_byte_t *p = out_buffer_p + buffer_size - 1;
772+
size_t bytes_copied = 0;
774773

775774
do
776775
{
@@ -779,7 +778,7 @@ ecma_uint32_to_string (uint32_t value, /**< value to convert */
779778
*p-- = digits[value % 10];
780779
value /= 10;
781780

782-
bytes_copied += sizeof (ecma_char_t);
781+
bytes_copied ++;
783782
}
784783
while (value != 0);
785784

@@ -789,12 +788,12 @@ ecma_uint32_to_string (uint32_t value, /**< value to convert */
789788

790789
if (likely (p != out_buffer_p))
791790
{
792-
ssize_t bytes_to_move = ((uint8_t*) out_buffer_p + buffer_size) - (uint8_t*) p;
791+
ssize_t bytes_to_move = out_buffer_p + buffer_size - p;
793792
memmove (out_buffer_p, p, (size_t) bytes_to_move);
794793
}
795794

796795
return (ssize_t) bytes_copied;
797-
} /* ecma_uint32_to_string */
796+
} /* ecma_uint32_to_utf8_string */
798797

799798
/**
800799
* ECMA-defined conversion of UInt32 value to Number value
@@ -1299,51 +1298,50 @@ ecma_number_to_decimal (ecma_number_t num, /**< ecma-number */
12991298
* ECMA-262 v5, 9.8.1
13001299
*
13011300
*
1302-
* @return length of zt-string
1301+
* @return size of utf-8 string
13031302
*/
1304-
ecma_length_t
1305-
ecma_number_to_zt_string (ecma_number_t num, /**< ecma-number */
1306-
ecma_char_t *buffer_p, /**< buffer for zt-string */
1307-
ssize_t buffer_size) /**< size of buffer */
1303+
lit_utf8_size_t
1304+
ecma_number_to_utf8_string (ecma_number_t num, /**< ecma-number */
1305+
lit_utf8_byte_t *buffer_p, /**< buffer for utf-8 string */
1306+
ssize_t buffer_size) /**< size of buffer */
13081307
{
1309-
const ecma_char_t digits[10] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
1310-
const ecma_char_t e_chars[2] = { 'e', 'E' };
1311-
const ecma_char_t plus_char = '+';
1312-
const ecma_char_t minus_char = '-';
1313-
const ecma_char_t dot_char = '.';
1308+
const lit_utf8_byte_t digits[10] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
1309+
const lit_utf8_byte_t e_chars[2] = { 'e', 'E' };
1310+
const lit_utf8_byte_t plus_char = '+';
1311+
const lit_utf8_byte_t minus_char = '-';
1312+
const lit_utf8_byte_t dot_char = '.';
1313+
lit_utf8_size_t size;
13141314

13151315
if (ecma_number_is_nan (num))
13161316
{
13171317
// 1.
1318-
ecma_copy_zt_string_to_buffer (lit_get_magic_string_zt (LIT_MAGIC_STRING_NAN),
1319-
buffer_p,
1320-
buffer_size);
1318+
lit_copy_magic_string_to_buffer (LIT_MAGIC_STRING_NAN, buffer_p, buffer_size);
1319+
size = lit_get_magic_string_size (LIT_MAGIC_STRING_NAN);
13211320
}
13221321
else
13231322
{
1324-
ecma_char_t *dst_p = buffer_p;
1323+
lit_utf8_byte_t *dst_p = buffer_p;
13251324

13261325
if (ecma_number_is_zero (num))
13271326
{
13281327
// 2.
13291328
*dst_p++ = digits[0];
1330-
*dst_p++ = ECMA_CHAR_NULL;
13311329

1332-
JERRY_ASSERT ((uint8_t*)dst_p - (uint8_t*)buffer_p <= (ssize_t) buffer_size);
1330+
JERRY_ASSERT (dst_p - buffer_p <= (ssize_t) buffer_size);
1331+
size = (lit_utf8_size_t) (dst_p - buffer_p);
13331332
}
13341333
else if (ecma_number_is_negative (num))
13351334
{
13361335
// 3.
13371336
*dst_p++ = minus_char;
1338-
ssize_t new_buffer_size = (buffer_size - ((uint8_t*)dst_p - (uint8_t*)buffer_p));
1339-
ecma_number_to_zt_string (ecma_number_negate (num), dst_p, new_buffer_size);
1337+
ssize_t new_buffer_size = (buffer_size - (dst_p - buffer_p));
1338+
size = 1 + ecma_number_to_utf8_string (ecma_number_negate (num), dst_p, new_buffer_size);
13401339
}
13411340
else if (ecma_number_is_infinity (num))
13421341
{
13431342
// 4.
1344-
ecma_copy_zt_string_to_buffer (lit_get_magic_string_zt (LIT_MAGIC_STRING_INFINITY_UL),
1345-
buffer_p,
1346-
buffer_size);
1343+
dst_p = lit_copy_magic_string_to_buffer (LIT_MAGIC_STRING_INFINITY_UL, buffer_p, buffer_size);
1344+
size = (lit_utf8_size_t) (dst_p - buffer_p);
13471345
}
13481346
else
13491347
{
@@ -1355,7 +1353,7 @@ ecma_number_to_zt_string (ecma_number_t num, /**< ecma-number */
13551353
uint32_t num_uint32 = ecma_number_to_uint32 (num);
13561354
if (ecma_uint32_to_number (num_uint32) == num)
13571355
{
1358-
ecma_uint32_to_string (num_uint32, dst_p, buffer_size);
1356+
size = (lit_utf8_size_t) ecma_uint32_to_utf8_string (num_uint32, dst_p, buffer_size);
13591357
}
13601358
else
13611359
{
@@ -1372,9 +1370,9 @@ ecma_number_to_zt_string (ecma_number_t num, /**< ecma-number */
13721370
if (k <= n && n <= 21)
13731371
{
13741372
dst_p += n;
1375-
JERRY_ASSERT ((ssize_t) sizeof (ecma_char_t) * ((dst_p - buffer_p) + 1) <= buffer_size);
1373+
JERRY_ASSERT ((ssize_t) (dst_p - buffer_p) <= buffer_size);
13761374

1377-
*dst_p = ECMA_CHAR_NULL;
1375+
size = (lit_utf8_size_t) (dst_p - buffer_p);
13781376

13791377
for (int32_t i = 0; i < n - k; i++)
13801378
{
@@ -1391,9 +1389,9 @@ ecma_number_to_zt_string (ecma_number_t num, /**< ecma-number */
13911389
{
13921390
// 7.
13931391
dst_p += k + 1;
1394-
JERRY_ASSERT ((ssize_t) sizeof (ecma_char_t) * ((dst_p - buffer_p) + 1) <= buffer_size);
1392+
JERRY_ASSERT ((ssize_t) (dst_p - buffer_p) <= buffer_size);
13951393

1396-
*dst_p = ECMA_CHAR_NULL;
1394+
size = (lit_utf8_size_t) (dst_p - buffer_p);
13971395

13981396
for (int32_t i = 0; i < k - n; i++)
13991397
{
@@ -1413,9 +1411,9 @@ ecma_number_to_zt_string (ecma_number_t num, /**< ecma-number */
14131411
{
14141412
// 8.
14151413
dst_p += k - n + 1 + 1;
1416-
JERRY_ASSERT ((ssize_t) sizeof (ecma_char_t) * ((dst_p - buffer_p) + 1) <= buffer_size);
1414+
JERRY_ASSERT ((ssize_t) (dst_p - buffer_p) <= buffer_size);
14171415

1418-
*dst_p = ECMA_CHAR_NULL;
1416+
size = (lit_utf8_size_t) (dst_p - buffer_p);
14191417

14201418
for (int32_t i = 0; i < k; i++)
14211419
{
@@ -1436,7 +1434,9 @@ ecma_number_to_zt_string (ecma_number_t num, /**< ecma-number */
14361434
if (k == 1)
14371435
{
14381436
// 9.
1439-
JERRY_ASSERT ((ssize_t) sizeof (ecma_char_t) <= buffer_size);
1437+
JERRY_ASSERT (1 <= buffer_size);
1438+
1439+
size = 1;
14401440

14411441
*dst_p++ = digits[s % 10];
14421442
s /= 10;
@@ -1445,7 +1445,7 @@ ecma_number_to_zt_string (ecma_number_t num, /**< ecma-number */
14451445
{
14461446
// 10.
14471447
dst_p += k + 1;
1448-
JERRY_ASSERT ((ssize_t) sizeof (ecma_char_t) * (dst_p - buffer_p) <= buffer_size);
1448+
JERRY_ASSERT ((ssize_t) (dst_p - buffer_p) <= buffer_size);
14491449

14501450
for (int32_t i = 0; i < k - 1; i++)
14511451
{
@@ -1461,14 +1461,14 @@ ecma_number_to_zt_string (ecma_number_t num, /**< ecma-number */
14611461
}
14621462

14631463
// 9., 10.
1464-
JERRY_ASSERT ((ssize_t) sizeof (ecma_char_t) * (dst_p - buffer_p + 2) <= buffer_size);
1464+
JERRY_ASSERT ((ssize_t) (dst_p - buffer_p + 2) <= buffer_size);
14651465
*dst_p++ = e_chars[0];
14661466
*dst_p++ = (n >= 1) ? plus_char : minus_char;
14671467
int32_t t = (n >= 1) ? (n - 1) : -(n - 1);
14681468

14691469
if (t == 0)
14701470
{
1471-
JERRY_ASSERT ((ssize_t) sizeof (ecma_char_t) * (dst_p - buffer_p + 1) <= buffer_size);
1471+
JERRY_ASSERT ((ssize_t) (dst_p - buffer_p) <= buffer_size);
14721472
*dst_p++ = digits[0];
14731473
}
14741474
else
@@ -1484,27 +1484,25 @@ ecma_number_to_zt_string (ecma_number_t num, /**< ecma-number */
14841484

14851485
while (t_mod != 0)
14861486
{
1487-
JERRY_ASSERT ((ssize_t) sizeof (ecma_char_t) * (dst_p - buffer_p + 1) <= buffer_size);
1487+
JERRY_ASSERT ((ssize_t) (dst_p - buffer_p + 1) <= buffer_size);
14881488
*dst_p++ = digits[t / t_mod];
14891489

14901490
t -= (t / t_mod) * t_mod;
14911491
t_mod /= 10;
14921492
}
14931493
}
14941494

1495-
JERRY_ASSERT ((ssize_t) sizeof (ecma_char_t) * (dst_p - buffer_p + 1) <= buffer_size);
1496-
*dst_p++ = ECMA_CHAR_NULL;
1495+
JERRY_ASSERT ((ssize_t) (dst_p - buffer_p) <= buffer_size);
1496+
size = (lit_utf8_size_t) (dst_p - buffer_p);
14971497
}
14981498

14991499
JERRY_ASSERT (s == 0);
15001500
}
15011501
}
15021502
}
15031503

1504-
ecma_length_t length = ecma_zt_string_length (buffer_p);
1505-
1506-
return length;
1507-
} /* ecma_number_to_zt_string */
1504+
return size;
1505+
} /* ecma_number_to_utf8_string */
15081506

15091507
/**
15101508
* @}

0 commit comments

Comments
 (0)