Skip to content

Commit fac0585

Browse files
committed
lit_get_unicode_char_size_by_utf8_first_byte performance improvement
- inline - on x86, use bit vector to store CESU-8 lookup table JerryScript-DCO-1.0-Signed-off-by: Xin Hu Xin.A.Hu@intel.com
1 parent 50d124b commit fac0585

File tree

1 file changed

+24
-1
lines changed

1 file changed

+24
-1
lines changed

jerry-core/lit/lit-strings.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -762,9 +762,31 @@ lit_utf8_string_code_unit_at (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 stri
762762
*
763763
* @return number of bytes occupied in CESU-8
764764
*/
765-
lit_utf8_size_t
765+
lit_utf8_size_t __attr_always_inline___
766766
lit_get_unicode_char_size_by_utf8_first_byte (const lit_utf8_byte_t first_byte) /**< buffer with characters */
767767
{
768+
#if defined (__i386__) || defined (__x86_64__)
769+
JERRY_ASSERT (((first_byte >> 4) <= 7 || (first_byte >> 4) == 12 ||
770+
(first_byte >> 4) == 13 || (first_byte >> 4) == 14));
771+
772+
//compact CESU-8 length lookup table into an uint32_t, every two bits represent one item
773+
//const lit_utf8_byte_t table[]
774+
//{
775+
// 1, 1, 1, 1, 1, 1, 1, 1,
776+
// 0, 0, 0, 0,
777+
// 2, 2,
778+
// 3, 0
779+
//};
780+
// uint32 00 11 10 10 00 00 00 00 01 01 01 01 01 01 01 01
781+
// table index 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
782+
783+
const uint32_t cesu_8_store = 0x3a005555;
784+
int shift = (first_byte >> 4) << 1;
785+
786+
return (cesu_8_store >> shift) & 0x3;
787+
788+
#else
789+
768790
if ((first_byte & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
769791
{
770792
return 1;
@@ -778,6 +800,7 @@ lit_get_unicode_char_size_by_utf8_first_byte (const lit_utf8_byte_t first_byte)
778800
JERRY_ASSERT ((first_byte & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER);
779801
return 3;
780802
}
803+
#endif
781804
} /* lit_get_unicode_char_size_by_utf8_first_byte */
782805

783806
/**

0 commit comments

Comments
 (0)