Skip to content

RFC: Atom #18721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Zend/zend.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "zend_call_stack.h"
#include "zend_max_execution_timer.h"
#include "zend_hrtime.h"
#include "zend_atom.h"
#include "Optimizer/zend_optimizer.h"
#include "php.h"
#include "php_globals.h"
Expand Down Expand Up @@ -1055,6 +1056,7 @@ void zend_startup(zend_utility_functions *utility_functions) /* {{{ */

zend_interned_strings_init();
zend_startup_builtin_functions();
zend_startup_atoms();
zend_register_standard_constants();
zend_register_auto_global(zend_string_init_interned("GLOBALS", sizeof("GLOBALS") - 1, 1), 1, php_auto_globals_create_globals);

Expand Down
2 changes: 2 additions & 0 deletions Zend/zend_API.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ ZEND_API zend_string *zend_zval_get_legacy_type(const zval *arg) /* {{{ */
return ZSTR_KNOWN(ZEND_STR_STRING);
case IS_ARRAY:
return ZSTR_KNOWN(ZEND_STR_ARRAY);
case IS_ATOM:
return ZSTR_KNOWN(ZEND_STR_ATOM);
case IS_OBJECT:
return ZSTR_KNOWN(ZEND_STR_OBJECT);
case IS_RESOURCE:
Expand Down
168 changes: 168 additions & 0 deletions Zend/zend_atom.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/*
+----------------------------------------------------------------------+
| Zend Engine |
+----------------------------------------------------------------------+
| Copyright (c) Zend Technologies Ltd. (http://www.zend.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 2.00 of the Zend license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.zend.com/license/2_00.txt. |
| If you did not receive a copy of the Zend license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@zend.com so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: [Your Name] |
+----------------------------------------------------------------------+
*/

#include "zend.h"
#include "zend_atom.h"
#include "zend_hash.h"
#include "zend_string.h"
#include "zend_alloc.h"

ZEND_API HashTable atom_table;
ZEND_API uint32_t next_atom_id = 1; /* Start from 1, 0 is reserved for invalid */
static bool atoms_initialized = false;

static void atom_dtor(zval *zv)
{
zend_atom *atom = (zend_atom *)Z_PTR_P(zv);
zend_string_release(atom->name);
efree(atom);
}

ZEND_API void zend_atoms_init(void)
{
zend_hash_init(&atom_table, 64, NULL, atom_dtor, 0);
next_atom_id = 1;
}

ZEND_API void zend_atoms_shutdown(void)
{
zend_hash_destroy(&atom_table);
}

ZEND_API bool zend_atom_name_is_valid(const char *name, size_t name_len)
{
if (name_len == 0) {
return false;
}

/* First character must be [a-zA-Z_\x80-\xff] */
unsigned char c = (unsigned char)name[0];
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c >= 0x80)) {
return false;
}

/* Subsequent characters must be [a-zA-Z0-9_\x80-\xff] */
for (size_t i = 1; i < name_len; i++) {
c = (unsigned char)name[i];
if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') || c == '_' || c >= 0x80)) {
return false;
}
}

return true;
}

ZEND_API uint32_t zend_atom_create(zend_string *name)
{
zval *existing;
zend_atom *atom;
zval atom_zv;

if (!atoms_initialized) {
zend_startup_atoms();
}

/* Check if atom already exists */
existing = zend_hash_find(&atom_table, name);
if (existing) {
atom = (zend_atom *)Z_PTR_P(existing);
return atom->id;
}

/* Validate atom name */
if (!zend_atom_name_is_valid(ZSTR_VAL(name), ZSTR_LEN(name))) {
return ZEND_ATOM_INVALID_ID;
}

/* Create new atom */
atom = emalloc(sizeof(zend_atom));
atom->name = zend_string_copy(name);
atom->id = next_atom_id++;

ZVAL_PTR(&atom_zv, atom);
zend_hash_add(&atom_table, name, &atom_zv);

return atom->id;
}

ZEND_API uint32_t zend_atom_create_cstr(const char *name, size_t name_len)
{
zend_string *str = zend_string_init(name, name_len, 0);
uint32_t result = zend_atom_create(str);
zend_string_release(str);
return result;
}

ZEND_API uint32_t zend_atom_find(zend_string *name)
{
if (!atoms_initialized) {
return ZEND_ATOM_INVALID_ID;
}

zval *existing = zend_hash_find(&atom_table, name);
if (existing) {
zend_atom *atom = (zend_atom *)Z_PTR_P(existing);
return atom->id;
}
return ZEND_ATOM_INVALID_ID;
}

ZEND_API uint32_t zend_atom_find_cstr(const char *name, size_t name_len)
{
zend_string *str = zend_string_init(name, name_len, 0);
uint32_t result = zend_atom_find(str);
zend_string_release(str);
return result;
}

ZEND_API zend_string *zend_atom_name(uint32_t atom_id)
{
zval *entry;
zend_atom *atom;

if (atom_id == ZEND_ATOM_INVALID_ID) {
return NULL;
}

/* Linear search through atom table to find by ID */
/* TODO: Consider maintaining a reverse lookup table for performance */
ZEND_HASH_FOREACH_VAL(&atom_table, entry) {
atom = (zend_atom *)Z_PTR_P(entry);
if (atom->id == atom_id) {
return atom->name;
}
} ZEND_HASH_FOREACH_END();

return NULL;
}

ZEND_API bool zend_atom_exists(zend_string *name)
{
return zend_hash_exists(&atom_table, name);
}

ZEND_API void zend_startup_atoms(void)
{
if (atoms_initialized) {
return;
}

zend_hash_init(&atom_table, 0, NULL, atom_dtor, 1);
atoms_initialized = true;
}
57 changes: 57 additions & 0 deletions Zend/zend_atom.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
+----------------------------------------------------------------------+
| Zend Engine |
+----------------------------------------------------------------------+
| Copyright (c) Zend Technologies Ltd. (http://www.zend.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 2.00 of the Zend license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.zend.com/license/2_00.txt. |
| If you did not receive a copy of the Zend license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@zend.com so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: [Your Name] |
+----------------------------------------------------------------------+
*/

#ifndef ZEND_ATOM_H
#define ZEND_ATOM_H

#include "zend_types.h"
#include "zend_hash.h"

BEGIN_EXTERN_C()

typedef struct _zend_atom {
zend_string *name; /* atom name */
uint32_t id; /* unique atom ID */
} zend_atom;

/* Atom table globals */
ZEND_API extern HashTable atom_table;
ZEND_API extern uint32_t next_atom_id;

/* Core atom management functions */
ZEND_API void zend_atoms_init(void);
ZEND_API void zend_atoms_shutdown(void);
ZEND_API uint32_t zend_atom_create(zend_string *name);
ZEND_API uint32_t zend_atom_find(zend_string *name);
ZEND_API zend_string *zend_atom_name(uint32_t atom_id);
ZEND_API bool zend_atom_exists(zend_string *name);
ZEND_API void zend_startup_atoms(void);

/* Atom creation from C string (for internal use) */
ZEND_API uint32_t zend_atom_create_cstr(const char *name, size_t name_len);
ZEND_API uint32_t zend_atom_find_cstr(const char *name, size_t name_len);

/* Validation */
ZEND_API bool zend_atom_name_is_valid(const char *name, size_t name_len);

/* Utility macros */
#define ZEND_ATOM_INVALID_ID 0

END_EXTERN_C()

#endif /* ZEND_ATOM_H */
2 changes: 2 additions & 0 deletions Zend/zend_language_parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
%token <ast> T_LNUMBER "integer"
%token <ast> T_DNUMBER "floating-point number"
%token <ast> T_STRING "identifier"
%token <ast> T_ATOM "atom"
%token <ast> T_NAME_FULLY_QUALIFIED "fully qualified name"
%token <ast> T_NAME_RELATIVE "namespace-relative name"
%token <ast> T_NAME_QUALIFIED "namespaced name"
Expand Down Expand Up @@ -1443,6 +1444,7 @@ dereferenceable_scalar:
scalar:
T_LNUMBER { $$ = $1; }
| T_DNUMBER { $$ = $1; }
| T_ATOM { $$ = $1; }
| T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC { $$ = $2; }
| T_START_HEREDOC T_END_HEREDOC
{ $$ = zend_ast_create_zval_from_str(ZSTR_EMPTY_ALLOC()); }
Expand Down
4 changes: 4 additions & 0 deletions Zend/zend_language_scanner.l
Original file line number Diff line number Diff line change
Expand Up @@ -2394,6 +2394,10 @@ inline_char_handler:
RETURN_TOKEN(T_NS_SEPARATOR);
}

<ST_IN_SCRIPTING>":"{LABEL} {
RETURN_TOKEN_WITH_STR(T_ATOM, 1);
}

<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
RETURN_TOKEN_WITH_STR(T_STRING, 0);
}
Expand Down
1 change: 1 addition & 0 deletions Zend/zend_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,7 @@ EMPTY_SWITCH_DEFAULT_CASE()
_(ZEND_STR_INTEGER, "integer") \
_(ZEND_STR_DOUBLE, "double") \
_(ZEND_STR_ARRAY, "array") \
_(ZEND_STR_ATOM, "atom") \
_(ZEND_STR_RESOURCE, "resource") \
_(ZEND_STR_CLOSED_RESOURCE, "resource (closed)") \
_(ZEND_STR_NAME, "name") \
Expand Down
35 changes: 23 additions & 12 deletions Zend/zend_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ typedef union _zend_value {
void *ptr;
zend_class_entry *ce;
zend_function *func;
uint32_t atom_id; /* atom identifier */
struct {
uint32_t w1;
uint32_t w2;
Expand Down Expand Up @@ -609,25 +610,26 @@ struct _zend_ast_ref {
#define IS_RESOURCE 9
#define IS_REFERENCE 10
#define IS_CONSTANT_AST 11 /* Constant expressions */
#define IS_ATOM 12 /* Atom type */

/* Fake types used only for type hinting.
* These are allowed to overlap with the types below. */
#define IS_CALLABLE 12
#define IS_ITERABLE 13
#define IS_VOID 14
#define IS_STATIC 15
#define IS_MIXED 16
#define IS_NEVER 17
#define IS_CALLABLE 13
#define IS_ITERABLE 14
#define IS_VOID 15
#define IS_STATIC 16
#define IS_MIXED 17
#define IS_NEVER 18

/* internal types */
#define IS_INDIRECT 12
#define IS_PTR 13
#define IS_ALIAS_PTR 14
#define _IS_ERROR 15
#define IS_INDIRECT 13
#define IS_PTR 14
#define IS_ALIAS_PTR 15
#define _IS_ERROR 16

/* used for casts */
#define _IS_BOOL 18
#define _IS_NUMBER 19
#define _IS_BOOL 19
#define _IS_NUMBER 20

/* guard flags */
#define ZEND_GUARD_PROPERTY_GET (1<<0)
Expand Down Expand Up @@ -1045,6 +1047,9 @@ static zend_always_inline uint32_t zval_gc_info(uint32_t gc_type_info) {
#define Z_PTR(zval) (zval).value.ptr
#define Z_PTR_P(zval_p) Z_PTR(*(zval_p))

#define Z_ATOM_ID(zval) (zval).value.atom_id
#define Z_ATOM_ID_P(zval_p) Z_ATOM_ID(*(zval_p))

#define ZVAL_UNDEF(z) do { \
Z_TYPE_INFO_P(z) = IS_UNDEF; \
} while (0)
Expand Down Expand Up @@ -1261,6 +1266,12 @@ static zend_always_inline uint32_t zval_gc_info(uint32_t gc_type_info) {
Z_TYPE_INFO_P(z) = IS_ALIAS_PTR; \
} while (0)

#define ZVAL_ATOM(z, id) do { \
zval *__z = (z); \
Z_ATOM_ID_P(__z) = (id); \
Z_TYPE_INFO_P(__z) = IS_ATOM; \
} while (0)

#define ZVAL_ERROR(z) do { \
Z_TYPE_INFO_P(z) = _IS_ERROR; \
} while (0)
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1711,6 +1711,7 @@ PHP_ADD_SOURCES([Zend], m4_normalize([
Optimizer/zend_optimizer.c
Optimizer/zend_ssa.c
zend_alloc.c
zend_atom.c
zend_API.c
zend_ast.c
zend_atomic.c
Expand Down
8 changes: 8 additions & 0 deletions ext/standard/basic_functions.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -3597,6 +3597,14 @@ function gettype(mixed $value): string {}
*/
function get_debug_type(mixed $value): string {}

function atom(string $name): mixed {}

function string(mixed $atom): string {}

function get_defined_atoms(): array {}

function atom_exists(string $name): bool {}

function settype(mixed &$var, string $type): bool {}

/**
Expand Down
Loading