Skip to content

Commit 580109a

Browse files
committed
Proposal: Support overriding short_open_tag in token_get_all()
Details ------- Add the flags TOKEN_ENABLE_SHORT_OPEN_TAG/TOKEN_DISABLE_SHORT_OPEN_TAG to `token_get_all` and `PhpToken::tokenize()` If both flags are accidentally combined, TOKEN_DISABLE_SHORT_OPEN_TAG takes precedence. If neither bit flag is provided, then token_get_all continues to use the value of the system ini setting `short_open_tag` - https://www.php.net/manual/en/ini.core.php#ini.short-open-tag Motivation ---------- - Allow linters/analyzers/IDEs to parse/analyze/lint projects targeting a deployment environment supporting short open tags/no short open tags, regardless of what the user configured locally. - Make it more convenient to programmatically convert short open tags to standard `<?php` tags in migration scripts - Allow linters/scripts to easily warn about code with short open tags that would be echoed instead of compiled/run (T_INLINE_HTML) when short tags are disabled - In applications/libraries that use token_get_all, avoid platform dependence when running the test suites (in PHP versions including this change). (And make it convenient to test both setting versions in the same test suite) Implementation details ---------------------- When preparing to scan tokens from a string or from a file, this copies constant `CG(short_tags)` corresponding to the system ini setting `short_open_tags` to `SCNG(short_tags)`. PHP modules such as `tokenizer` can then conditionally override it after the call to `zend_prepare_string_for_scanning` (PECLs can also make use of this safely starting) To handle edge cases such as compilation triggering an error handler which calls token_get_all, zend_save_lexical_state and zend_restore_lexical_state also save/restore `short_tags`
1 parent 036009b commit 580109a

File tree

9 files changed

+64
-9
lines changed

9 files changed

+64
-9
lines changed

Zend/zend_globals.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,9 @@ struct _zend_php_scanner_globals {
323323
int heredoc_indentation;
324324
bool heredoc_indentation_uses_spaces;
325325

326+
/* Short tags - either from defaults or tokenizer extension overrides */
327+
bool short_tags;
328+
326329
/* original (unfiltered) script */
327330
unsigned char *script_org;
328331
size_t script_org_size;

Zend/zend_language_scanner.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ typedef struct _zend_lex_state {
5757

5858
zend_ast *ast;
5959
zend_arena *ast_arena;
60+
61+
bool short_tags;
6062
} zend_lex_state;
6163

6264
typedef struct _zend_heredoc_label {

Zend/zend_language_scanner.l

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
246246

247247
lex_state->on_event = SCNG(on_event);
248248
lex_state->on_event_context = SCNG(on_event_context);
249+
lex_state->short_tags = SCNG(short_tags);
249250

250251
lex_state->ast = CG(ast);
251252
lex_state->ast_arena = CG(ast_arena);
@@ -289,6 +290,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
289290

290291
SCNG(on_event) = lex_state->on_event;
291292
SCNG(on_event_context) = lex_state->on_event_context;
293+
SCNG(short_tags) = lex_state->short_tags;
292294

293295
CG(ast) = lex_state->ast;
294296
CG(ast_arena) = lex_state->ast_arena;
@@ -731,6 +733,7 @@ ZEND_API void zend_prepare_string_for_scanning(zval *str, zend_string *filename)
731733

732734
SCNG(yy_in) = NULL;
733735
SCNG(yy_start) = NULL;
736+
SCNG(short_tags) = CG(short_tags);
734737

735738
buf = Z_STRVAL_P(str);
736739
size = old_len;
@@ -2239,7 +2242,7 @@ string:
22392242
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
22402243
}
22412244
/* Degenerate case: <?phpX is interpreted as <? phpX with short tags. */
2242-
if (CG(short_tags)) {
2245+
if (SCNG(short_tags)) {
22432246
yyless(2);
22442247
BEGIN(ST_IN_SCRIPTING);
22452248
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
@@ -2248,7 +2251,7 @@ string:
22482251
}
22492252

22502253
<INITIAL>"<?" {
2251-
if (CG(short_tags)) {
2254+
if (SCNG(short_tags)) {
22522255
BEGIN(ST_IN_SCRIPTING);
22532256
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
22542257
} else {
@@ -2273,7 +2276,7 @@ inline_char_handler:
22732276
}
22742277

22752278
if (*YYCURSOR == '?') {
2276-
if (CG(short_tags) /* <? */
2279+
if (SCNG(short_tags) /* <? */
22772280
|| (*(YYCURSOR + 1) == '=') /* <?= */
22782281
|| (!strncasecmp((char*)YYCURSOR + 1, "php", 3) && /* <?php[ \t\r\n] */
22792282
(YYCURSOR + 4 == YYLIMIT ||

ext/tokenizer/php_tokenizer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ extern zend_module_entry tokenizer_module_entry;
2424
#define PHP_TOKENIZER_VERSION PHP_VERSION
2525

2626
#define TOKEN_PARSE (1 << 0)
27+
#define TOKEN_ENABLE_SHORT_OPEN_TAG (1 << 1)
28+
#define TOKEN_DISABLE_SHORT_OPEN_TAG (1 << 2)
2729

2830
#ifdef ZTS
2931
#include "TSRM.h"

ext/tokenizer/tests/bug81342.phpt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
--TEST--
22
Bug #81342: New ampersand token parsing depends on new line after it
3+
--EXTENSIONS--
4+
tokenizer
35
--FILE--
46
<?php
57

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
--TEST--
2+
token_get_all() TOKEN_*_SHORT_OPEN_TAG flag
3+
--EXTENSIONS--
4+
tokenizer
5+
--FILE--
6+
<?php
7+
// This can be overridden for individual calls to token_get_all
8+
echo json_encode(token_get_all('<?xml', TOKEN_DISABLE_SHORT_OPEN_TAG)), "\n";
9+
echo json_encode(token_get_all('<?xml', TOKEN_ENABLE_SHORT_OPEN_TAG)), "\n";
10+
// When setting both flags, TOKEN_DISABLE_SHORT_OPEN_TAG takes precedence
11+
echo json_encode(token_get_all('<?xml', TOKEN_DISABLE_SHORT_OPEN_TAG|TOKEN_ENABLE_SHORT_OPEN_TAG)), "\n";
12+
echo json_encode(PhpToken::tokenize('<?xml', TOKEN_DISABLE_SHORT_OPEN_TAG)), "\n";
13+
echo json_encode(PhpToken::tokenize('<?xml', TOKEN_ENABLE_SHORT_OPEN_TAG)), "\n";
14+
?>
15+
--EXPECTF--
16+
[[%d,"<?xml",1]]
17+
[[%d,"<?",1],[%d,"xml",1]]
18+
[[%d,"<?xml",1]]
19+
[{"id":%d,"text":"<?xml","line":1,"pos":0}]
20+
[{"id":%d,"text":"<?","line":1,"pos":0},{"id":%d,"text":"xml","line":1,"pos":2}]

ext/tokenizer/tokenizer.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ static void add_token(
319319
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &token);
320320
}
321321

322-
static bool tokenize(zval *return_value, zend_string *source, zend_class_entry *token_class)
322+
static bool tokenize(zval *return_value, zend_string *source, zend_class_entry *token_class, bool short_tags)
323323
{
324324
zval source_zval;
325325
zend_lex_state original_lex_state;
@@ -335,6 +335,7 @@ static bool tokenize(zval *return_value, zend_string *source, zend_class_entry *
335335
zend_prepare_string_for_scanning(&source_zval, ZSTR_EMPTY_ALLOC());
336336

337337
LANG_SCNG(yy_state) = yycINITIAL;
338+
LANG_SCNG(short_tags) = short_tags;
338339
zend_hash_init(&interned_strings, 0, NULL, NULL, 0);
339340
array_init(return_value);
340341

@@ -452,7 +453,7 @@ void on_event(
452453
}
453454

454455
static bool tokenize_parse(
455-
zval *return_value, zend_string *source, zend_class_entry *token_class)
456+
zval *return_value, zend_string *source, zend_class_entry *token_class, bool short_tags)
456457
{
457458
zval source_zval;
458459
struct event_context ctx;
@@ -478,6 +479,7 @@ static bool tokenize_parse(
478479
LANG_SCNG(yy_state) = yycINITIAL;
479480
LANG_SCNG(on_event) = on_event;
480481
LANG_SCNG(on_event_context) = &ctx;
482+
LANG_SCNG(short_tags) = short_tags;
481483

482484
if((success = (zendparse() == SUCCESS))) {
483485
ZVAL_COPY_VALUE(return_value, &token_stream);
@@ -500,14 +502,23 @@ static bool tokenize_parse(
500502
static bool tokenize_common(
501503
zval *return_value, zend_string *source, zend_long flags, zend_class_entry *token_class)
502504
{
505+
bool result;
506+
bool short_tags;
507+
if (flags & (TOKEN_ENABLE_SHORT_OPEN_TAG|TOKEN_DISABLE_SHORT_OPEN_TAG)) {
508+
/* TOKEN_DISABLE_SHORT_OPEN_TAG takes precedence over TOKEN_ENABLE_SHORT_OPEN_TAG */
509+
short_tags = (flags & TOKEN_DISABLE_SHORT_OPEN_TAG) == 0;
510+
} else {
511+
short_tags = CG(short_tags);
512+
}
503513
if (flags & TOKEN_PARSE) {
504-
return tokenize_parse(return_value, source, token_class);
514+
result = tokenize_parse(return_value, source, token_class, short_tags);
505515
} else {
506-
int success = tokenize(return_value, source, token_class);
516+
int success = tokenize(return_value, source, token_class, short_tags);
507517
/* Normal token_get_all() should not throw. */
508518
zend_clear_exception();
509-
return success;
519+
result = success;
510520
}
521+
return result;
511522
}
512523

513524
/* }}} */

ext/tokenizer/tokenizer.stub.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@
77
* @cvalue TOKEN_PARSE
88
*/
99
const TOKEN_PARSE = UNKNOWN;
10+
/**
11+
* @var int
12+
* @cvalue TOKEN_ENABLE_SHORT_OPEN_TAG
13+
*/
14+
const TOKEN_ENABLE_SHORT_OPEN_TAG = UNKNOWN;
15+
/**
16+
* @var int
17+
* @cvalue TOKEN_DISABLE_SHORT_OPEN_TAG
18+
*/
19+
const TOKEN_DISABLE_SHORT_OPEN_TAG = UNKNOWN;
1020

1121
function token_get_all(string $code, int $flags = 0): array {}
1222

ext/tokenizer/tokenizer_arginfo.h

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)