Skip to content

Commit 217df05

Browse files
committed
Use bit vector to store CESU-8 lookup table,
to improve lit_get_unicode_char_size_by_utf8_first_byte performance JerryScript-DCO-1.0-Signed-off-by: Xin Hu Xin.A.Hu@intel.com
1 parent 50d124b commit 217df05

File tree

1 file changed

+18
-13
lines changed

1 file changed

+18
-13
lines changed

jerry-core/lit/lit-strings.cpp

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -765,19 +765,24 @@ lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 stri
765765
lit_utf8_size_t
766766
lit_get_unicode_char_size_by_utf8_first_byte (const lit_utf8_byte_t first_byte) /**< buffer with characters */
767767
{
768-
if ((first_byte & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
769-
{
770-
return 1;
771-
}
772-
else if ((first_byte & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
773-
{
774-
return 2;
775-
}
776-
else
777-
{
778-
JERRY_ASSERT ((first_byte & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER);
779-
return 3;
780-
}
768+
JERRY_ASSERT (((first_byte >> 4) <= 7 || (first_byte >> 4) == 12 ||
769+
(first_byte >> 4) == 13 || (first_byte >> 4) == 14));
770+
771+
//compact CESU-8 length lookup table into an uint32_t, every two bits represent one item
772+
//const lit_utf8_byte_t table[]
773+
//{
774+
// 1, 1, 1, 1, 1, 1, 1, 1,
775+
// 0, 0, 0, 0,
776+
// 2, 2,
777+
// 3, 0
778+
//};
779+
// uint32 00 11 10 10 00 00 00 00 01 01 01 01 01 01 01 01
780+
// table index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
781+
782+
const uint32_t cesu_8_store = 0x3a005555;
783+
int shift = (first_byte >> 4) << 1;
784+
785+
return (cesu_8_store >> shift) & 0x3;
781786
} /* lit_get_unicode_char_size_by_utf8_first_byte */
782787

783788
/**

0 commit comments

Comments
 (0)