Skip to content

Optimize string #4

@lygstate

Description

@lygstate
  • Optimize lit_is_utf8_string_pair_magic by suffix-array
  • Optimize ecma_string_t to support short-direct string(without the need of ref) (size <= 13)
#include <stdint.h>

typedef uint32_t lit_string_hash_t;
typedef uint32_t ecma_value_t;
typedef unsigned char lit_utf8_byte_t;

typedef enum
{
  /**
   * For small string, all string content are resident in ecma_string_t
   * For smal integer string, it's contains all UINT32 values
   */
  ECMA_STRING_CONTAINER_SMALL_INTEGER_STRING, /* 1 <= size < 11, [0..9999999999].toString() */
  ECMA_STRING_CONTAINER_SMALL_STRING, /* 0 <= size < 11, UTF8 except [0..9999999999].toString() */

  ECMA_STRING_CONTAINER_SHORT_STRING, /** 11 <= size < 2^16, actual data is on the heap */

  ECMA_STRING_CONTAINER_LONG_STRING, /**< 2^16 <= size < 2^32, actual data is on the heap */

  ECMA_STRING_CONTAINER_EXTERNAL_STRING, /**< any size, actual data is allocated by external */

  ECMA_STRING_CONTAINER_SYMBOL, /**< the ecma-string is a symbol */
} ecma_string_container_t;

typedef struct
{
  uint32_t on_stack : 1; /* the content of ecma_string_t is stack and should not be freed */
  uint32_t container : 5; /** ecma_string_container_t */
  /** Reference counter for the string */
  uint32_t refs : 26;

  /**
   * Actual data or identifier of it's place in container (depending on 'container' field)
   */
  union
  {
    struct
    {
      /* size == length means ascii string */
      uint8_t size : 4; /**< size of this utf-8 string in bytes */
      uint8_t length : 4; /**< length of this utf-8 string in characters */
      uint8_t digits[11];
    } small;

    /** <Short CESU8 string-value descriptor header */
    struct
    {
      lit_string_hash_t hash; /**< hash of the ASCII/UTF8 string */
      /* size == length means ascii string */
      uint16_t size; /**< size of this utf-8 string in bytes */
      uint16_t length; /**< length of this utf-8 string in characters */
      uint8_t heap[4];
    } s_header;

    /** <Long CESU8 string-value descriptor header */
    struct
    {
      lit_string_hash_t hash; /**< hash of the ASCII/UTF8 string */
      /* size == length means ASCII string */
      uint32_t size; /**< size of this utf-8 string in bytes */
      uint32_t length; /**< length of this utf-8 string in characters */
    } l_header;

    /** <Long CESU8 string-value descriptor header */
    struct
    {
      lit_string_hash_t hash; /**< hash of the ASCII/UTF8 string */
      /* size == length means ASCII string */
      uint32_t size; /**< size of this utf-8 string in bytes */
      uint32_t length; /**< length of this utf-8 string in characters */
    } external;

    struct
    {
      lit_string_hash_t hash; /**< hash of the symbol */

      ecma_value_t descriptor; /**< symbol descriptor string-value */
      ecma_value_t value; /**< original key value corresponds to the map key string */
    } symbol;
  } u;
} ecma_string_t;
static_assert (sizeof (ecma_string_t) == 16, "");

/**
 * External CESU8 string-value descriptor
 */
typedef struct
{
  ecma_string_t header; /**< string header */
  const lit_utf8_byte_t *string_p; /**< string data */
  void *user_p; /**< user pointer passed to the callback when the string is freed */
} ecma_external_string_t;

static_assert (sizeof (ecma_external_string_t) == 32, "");
  • 8 ECMA_TYPE_INTEGER
    • directly encoded 31 bit signed integer
  • 2 ECMA_TYPE_DIRECT
    • directly encoded bigint value (28 bit signed integer)
    • directly encoded string value(0 <= length <= 3), there is a bit to show if it a integer(length <= 3) string
    • directly encoded symbol value(0 <= length <= 3), there is a bit to show if it a integer(length <= 3) string
    • directly encoded magic string value(length > 3), there is a bit to show if it a integer(length < 11) string
    • directly encoded magic symbol value(length > 3), there is a bit to show if it a integer(length < 11) string
    • directly encoded simple value
  • 2 ECMA_TYPE_FLOAT
    • pointer to a 64 bit floating point number (8 byte aligned)
  • 2 ECMA_TYPE_POINTER
    • pointer to (object, function, bigint, exception, extend point for future) (8 byte aligned)
  • 1 ECMA_TYPE_STRING
    • pointer to description of a string , that is ecma_string_t, the length > 3, (16 byte aligned)
  • 1 ECMA_TYPE_SYMBOL
    • pointer to description of a symbol that is ecma_string_t, the length > 3, (16 byte aligned)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions