Skip to content

Proposal: Support overriding short_open_tag in token_get_all() #9612

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Zend/zend_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ struct _zend_php_scanner_globals {
int heredoc_indentation;
bool heredoc_indentation_uses_spaces;

/* Short tags - either from defaults or tokenizer extension overrides */
bool short_tags;

/* original (unfiltered) script */
unsigned char *script_org;
size_t script_org_size;
Expand Down
2 changes: 2 additions & 0 deletions Zend/zend_language_scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ typedef struct _zend_lex_state {

zend_ast *ast;
zend_arena *ast_arena;

bool short_tags;
} zend_lex_state;

typedef struct _zend_heredoc_label {
Expand Down
10 changes: 7 additions & 3 deletions Zend/zend_language_scanner.l
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)

lex_state->on_event = SCNG(on_event);
lex_state->on_event_context = SCNG(on_event_context);
lex_state->short_tags = SCNG(short_tags);

lex_state->ast = CG(ast);
lex_state->ast_arena = CG(ast_arena);
Expand Down Expand Up @@ -289,6 +290,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)

SCNG(on_event) = lex_state->on_event;
SCNG(on_event_context) = lex_state->on_event_context;
SCNG(short_tags) = lex_state->short_tags;

CG(ast) = lex_state->ast;
CG(ast_arena) = lex_state->ast_arena;
Expand Down Expand Up @@ -542,6 +544,7 @@ ZEND_API zend_result open_file_for_scanning(zend_file_handle *file_handle)
/* Reset the scanner for scanning the new file */
SCNG(yy_in) = file_handle;
SCNG(yy_start) = NULL;
SCNG(short_tags) = CG(short_tags);

if (size != (size_t)-1) {
if (CG(multibyte)) {
Expand Down Expand Up @@ -731,6 +734,7 @@ ZEND_API void zend_prepare_string_for_scanning(zval *str, zend_string *filename)

SCNG(yy_in) = NULL;
SCNG(yy_start) = NULL;
SCNG(short_tags) = CG(short_tags);

buf = Z_STRVAL_P(str);
size = old_len;
Expand Down Expand Up @@ -2239,7 +2243,7 @@ string:
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
}
/* Degenerate case: <?phpX is interpreted as <? phpX with short tags. */
if (CG(short_tags)) {
if (SCNG(short_tags)) {
yyless(2);
BEGIN(ST_IN_SCRIPTING);
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
Expand All @@ -2248,7 +2252,7 @@ string:
}

<INITIAL>"<?" {
if (CG(short_tags)) {
if (SCNG(short_tags)) {
BEGIN(ST_IN_SCRIPTING);
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
} else {
Expand All @@ -2273,7 +2277,7 @@ inline_char_handler:
}

if (*YYCURSOR == '?') {
if (CG(short_tags) /* <? */
if (SCNG(short_tags) /* <? */
|| (*(YYCURSOR + 1) == '=') /* <?= */
|| (!strncasecmp((char*)YYCURSOR + 1, "php", 3) && /* <?php[ \t\r\n] */
(YYCURSOR + 4 == YYLIMIT ||
Expand Down
2 changes: 2 additions & 0 deletions ext/tokenizer/php_tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ extern zend_module_entry tokenizer_module_entry;
#define PHP_TOKENIZER_VERSION PHP_VERSION

#define TOKEN_PARSE (1 << 0)
#define TOKEN_ENABLE_SHORT_OPEN_TAG (1 << 1)
#define TOKEN_DISABLE_SHORT_OPEN_TAG (1 << 2)

#ifdef ZTS
#include "TSRM.h"
Expand Down
2 changes: 2 additions & 0 deletions ext/tokenizer/tests/bug81342.phpt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
--TEST--
Bug #81342: New ampersand token parsing depends on new line after it
--EXTENSIONS--
tokenizer
--FILE--
<?php

Expand Down
20 changes: 20 additions & 0 deletions ext/tokenizer/tests/short_open_tag.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
--TEST--
token_get_all() TOKEN_*_SHORT_OPEN_TAG flag
--EXTENSIONS--
tokenizer
--FILE--
<?php
// This can be overridden for individual calls to token_get_all
echo json_encode(token_get_all('<?xml', TOKEN_DISABLE_SHORT_OPEN_TAG)), "\n";
echo json_encode(token_get_all('<?xml', TOKEN_ENABLE_SHORT_OPEN_TAG)), "\n";
// When setting both flags, TOKEN_DISABLE_SHORT_OPEN_TAG takes precedence
echo json_encode(token_get_all('<?xml', TOKEN_DISABLE_SHORT_OPEN_TAG|TOKEN_ENABLE_SHORT_OPEN_TAG)), "\n";
echo json_encode(PhpToken::tokenize('<?xml', TOKEN_DISABLE_SHORT_OPEN_TAG)), "\n";
echo json_encode(PhpToken::tokenize('<?xml', TOKEN_ENABLE_SHORT_OPEN_TAG)), "\n";
?>
--EXPECTF--
[[%d,"<?xml",1]]
[[%d,"<?",1],[%d,"xml",1]]
[[%d,"<?xml",1]]
[{"id":%d,"text":"<?xml","line":1,"pos":0}]
[{"id":%d,"text":"<?","line":1,"pos":0},{"id":%d,"text":"xml","line":1,"pos":2}]
21 changes: 16 additions & 5 deletions ext/tokenizer/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ static void add_token(
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &token);
}

static bool tokenize(zval *return_value, zend_string *source, zend_class_entry *token_class)
static bool tokenize(zval *return_value, zend_string *source, zend_class_entry *token_class, bool short_tags)
{
zval source_zval;
zend_lex_state original_lex_state;
Expand All @@ -335,6 +335,7 @@ static bool tokenize(zval *return_value, zend_string *source, zend_class_entry *
zend_prepare_string_for_scanning(&source_zval, ZSTR_EMPTY_ALLOC());

LANG_SCNG(yy_state) = yycINITIAL;
LANG_SCNG(short_tags) = short_tags;
zend_hash_init(&interned_strings, 0, NULL, NULL, 0);
array_init(return_value);

Expand Down Expand Up @@ -452,7 +453,7 @@ void on_event(
}

static bool tokenize_parse(
zval *return_value, zend_string *source, zend_class_entry *token_class)
zval *return_value, zend_string *source, zend_class_entry *token_class, bool short_tags)
{
zval source_zval;
struct event_context ctx;
Expand All @@ -478,6 +479,7 @@ static bool tokenize_parse(
LANG_SCNG(yy_state) = yycINITIAL;
LANG_SCNG(on_event) = on_event;
LANG_SCNG(on_event_context) = &ctx;
LANG_SCNG(short_tags) = short_tags;

if((success = (zendparse() == SUCCESS))) {
ZVAL_COPY_VALUE(return_value, &token_stream);
Expand All @@ -500,14 +502,23 @@ static bool tokenize_parse(
static bool tokenize_common(
zval *return_value, zend_string *source, zend_long flags, zend_class_entry *token_class)
{
bool result;
bool short_tags;
if (flags & (TOKEN_ENABLE_SHORT_OPEN_TAG|TOKEN_DISABLE_SHORT_OPEN_TAG)) {
/* TOKEN_DISABLE_SHORT_OPEN_TAG takes precedence over TOKEN_ENABLE_SHORT_OPEN_TAG */
short_tags = (flags & TOKEN_DISABLE_SHORT_OPEN_TAG) == 0;
} else {
short_tags = CG(short_tags);
}
if (flags & TOKEN_PARSE) {
return tokenize_parse(return_value, source, token_class);
result = tokenize_parse(return_value, source, token_class, short_tags);
} else {
int success = tokenize(return_value, source, token_class);
int success = tokenize(return_value, source, token_class, short_tags);
/* Normal token_get_all() should not throw. */
zend_clear_exception();
return success;
result = success;
}
return result;
}

/* }}} */
Expand Down
10 changes: 10 additions & 0 deletions ext/tokenizer/tokenizer.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
* @cvalue TOKEN_PARSE
*/
const TOKEN_PARSE = UNKNOWN;
/**
* @var int
* @cvalue TOKEN_ENABLE_SHORT_OPEN_TAG
*/
const TOKEN_ENABLE_SHORT_OPEN_TAG = UNKNOWN;
/**
* @var int
* @cvalue TOKEN_DISABLE_SHORT_OPEN_TAG
*/
const TOKEN_DISABLE_SHORT_OPEN_TAG = UNKNOWN;

function token_get_all(string $code, int $flags = 0): array {}

Expand Down
4 changes: 3 additions & 1 deletion ext/tokenizer/tokenizer_arginfo.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.