Skip to content

Commit 3c43510

Browse files
nicolas-grekasremicollet
authored andcommitted
add PREG_UNMATCHED_AS_NULL flag to allow distinguish between unmatched subpatterns and empty matches
1 parent 0f20970 commit 3c43510

File tree

9 files changed

+83
-74
lines changed

9 files changed

+83
-74
lines changed

UPGRADING

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,6 @@ PHP 7.2 UPGRADE NOTES
5656
parameter (assoc) is null. Previously JSON_OBJECT_AS_ARRAY was always
5757
ignored.
5858

59-
- PCRE:
60-
. preg_match() and other PCRE functions now distinguish between unmatched
61-
subpatterns and empty matches by reporting NULL and "" (empty string),
62-
respectively. Formerly, either was reported as empty string.
63-
6459
- Session:
6560
. Removed register_globals related code and "!" can be used as $_SESSION key name.
6661
. Session is made to manage session status correctly and prevents invalid operations.
@@ -109,6 +104,9 @@ PHP 7.2 UPGRADE NOTES
109104

110105
- PCRE:
111106
. Added `J` modifier for setting PCRE_DUPNAMES.
107+
. Added `PREG_UNMATCHED_AS_NULL` flag to allow distinguish between unmatched
108+
subpatterns and empty matches by reporting NULL and "" (empty string),
109+
respectively.
112110

113111
- Standard:
114112
. Simplified password hashing API updated to support Argon2i hashes when PHP is compiled with libargon2
@@ -278,6 +276,9 @@ See also: https://wiki.php.net/rfc/deprecations_php_7_2
278276
. IMG_EFFECT_MULTIPLY
279277
. IMG_BMP
280278

279+
- PCRE
280+
. PREG_UNMATCHED_AS_NULL
281+
281282
- Standard:
282283
. PASSWORD_ARGON2_DEFAULT_MEMORY_COST
283284
. PASSWORD_ARGON2_DEFAULT_TIME_COST

ext/pcre/php_pcre.c

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#define PREG_PATTERN_ORDER 1
3434
#define PREG_SET_ORDER 2
3535
#define PREG_OFFSET_CAPTURE (1<<8)
36+
#define PREG_UNMATCHED_AS_NULL (1<<9)
3637

3738
#define PREG_SPLIT_NO_EMPTY (1<<0)
3839
#define PREG_SPLIT_DELIM_CAPTURE (1<<1)
@@ -188,6 +189,7 @@ static PHP_MINIT_FUNCTION(pcre)
188189
REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
189190
REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
190191
REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
192+
REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
191193
REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
192194
REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
193195
REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
@@ -639,14 +641,14 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra,
639641
/* }}} */
640642

641643
/* {{{ add_offset_pair */
642-
static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
644+
static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null)
643645
{
644646
zval match_pair, tmp;
645647

646648
array_init_size(&match_pair, 2);
647649

648650
/* Add (match, offset) to the return value */
649-
if (offset < 0) { /* unset substring */
651+
if (unmatched_as_null && offset < 0) {
650652
ZVAL_NULL(&tmp);
651653
} else {
652654
ZVAL_STRINGL(&tmp, str, len);
@@ -705,7 +707,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
705707
{
706708
zval result_set, /* Holds a set of subpatterns after
707709
a global match */
708-
*match_sets = NULL; /* An array of sets of matches for each
710+
*match_sets = NULL; /* An array of sets of matches for each
709711
subpattern after a global match */
710712
pcre_extra *extra = pce->extra;/* Holds results of studying */
711713
pcre_extra extra_data; /* Used locally for exec options */
@@ -720,9 +722,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
720722
char **subpat_names; /* Array for named subpatterns */
721723
int i;
722724
int subpats_order; /* Order of subpattern matches */
723-
int offset_capture; /* Capture match offsets: yes/no */
724-
unsigned char *mark = NULL; /* Target for MARK name */
725-
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
725+
int offset_capture; /* Capture match offsets: yes/no */
726+
int unmatched_as_null; /* Null non-matches: yes/no */
727+
unsigned char *mark = NULL; /* Target for MARK name */
728+
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
726729
ALLOCA_FLAG(use_heap);
727730

728731
ZVAL_UNDEF(&marks);
@@ -737,6 +740,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
737740

738741
if (use_flags) {
739742
offset_capture = flags & PREG_OFFSET_CAPTURE;
743+
unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
740744

741745
/*
742746
* subpats_order is pre-set to pattern mode so we change it only if
@@ -752,6 +756,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
752756
}
753757
} else {
754758
offset_capture = 0;
759+
unmatched_as_null = 0;
755760
}
756761

757762
/* Negative offset counts from the end of the string. */
@@ -847,11 +852,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
847852
if (offset_capture) {
848853
for (i = 0; i < count; i++) {
849854
add_offset_pair(&match_sets[i], (char *)stringlist[i],
850-
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
855+
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
851856
}
852857
} else {
853858
for (i = 0; i < count; i++) {
854-
if (offsets[i<<1] < 0) { /* unset substring */
859+
if (unmatched_as_null && offsets[i<<1] < 0) {
855860
add_next_index_null(&match_sets[i]);
856861
} else {
857862
add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
@@ -869,11 +874,15 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
869874
/*
870875
* If the number of captured subpatterns on this run is
871876
* less than the total possible number, pad the result
872-
* arrays with NULLs.
877+
* arrays with NULLs or empty strings.
873878
*/
874879
if (count < num_subpats) {
875880
for (; i < num_subpats; i++) {
876-
add_next_index_null(&match_sets[i]);
881+
if (unmatched_as_null) {
882+
add_next_index_null(&match_sets[i]);
883+
} else {
884+
add_next_index_string(&match_sets[i], "");
885+
}
877886
}
878887
}
879888
} else {
@@ -885,19 +894,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
885894
if (offset_capture) {
886895
for (i = 0; i < count; i++) {
887896
add_offset_pair(&result_set, (char *)stringlist[i],
888-
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
897+
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null);
889898
}
890899
} else {
891900
for (i = 0; i < count; i++) {
892901
if (subpat_names[i]) {
893-
if (offsets[i<<1] < 0) { /* unset substring */
894-
add_assoc_null(&result_set, subpat_names[i]);
895-
} else {
902+
if (unmatched_as_null && offsets[i<<1] < 0) {
903+
add_assoc_null(&result_set, subpat_names[i]);
904+
} else {
896905
add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
897-
offsets[(i<<1)+1] - offsets[i<<1]);
898-
}
906+
offsets[(i<<1)+1] - offsets[i<<1]);
907+
}
899908
}
900-
if (offsets[i<<1] < 0) { /* unset substring */
909+
if (unmatched_as_null && offsets[i<<1] < 0) {
901910
add_next_index_null(&result_set);
902911
} else {
903912
add_next_index_stringl(&result_set, (char *)stringlist[i],
@@ -909,11 +918,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
909918
if (offset_capture) {
910919
for (i = 0; i < count; i++) {
911920
add_offset_pair(&result_set, (char *)stringlist[i],
912-
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
921+
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
913922
}
914923
} else {
915924
for (i = 0; i < count; i++) {
916-
if (offsets[i<<1] < 0) { /* unset substring */
925+
if (unmatched_as_null && offsets[i<<1] < 0) {
917926
add_next_index_null(&result_set);
918927
} else {
919928
add_next_index_stringl(&result_set, (char *)stringlist[i],
@@ -936,19 +945,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
936945
for (i = 0; i < count; i++) {
937946
add_offset_pair(subpats, (char *)stringlist[i],
938947
offsets[(i<<1)+1] - offsets[i<<1],
939-
offsets[i<<1], subpat_names[i]);
948+
offsets[i<<1], subpat_names[i], unmatched_as_null);
940949
}
941950
} else {
942951
for (i = 0; i < count; i++) {
943952
if (subpat_names[i]) {
944-
if (offsets[i<<1] < 0) { /* unset substring */
953+
if (unmatched_as_null && offsets[i<<1] < 0) {
945954
add_assoc_null(subpats, subpat_names[i]);
946955
} else {
947956
add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
948957
offsets[(i<<1)+1] - offsets[i<<1]);
949958
}
950959
}
951-
if (offsets[i<<1] < 0) { /* unset substring */
960+
if (unmatched_as_null && offsets[i<<1] < 0) {
952961
add_next_index_null(subpats);
953962
} else {
954963
add_next_index_stringl(subpats, (char *)stringlist[i],
@@ -961,11 +970,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
961970
for (i = 0; i < count; i++) {
962971
add_offset_pair(subpats, (char *)stringlist[i],
963972
offsets[(i<<1)+1] - offsets[i<<1],
964-
offsets[i<<1], NULL);
973+
offsets[i<<1], NULL, unmatched_as_null);
965974
}
966975
} else {
967976
for (i = 0; i < count; i++) {
968-
if (offsets[i<<1] < 0) { /* unset substring */
977+
if (unmatched_as_null && offsets[i<<1] < 0) {
969978
add_next_index_null(subpats);
970979
} else {
971980
add_next_index_stringl(subpats, (char *)stringlist[i],
@@ -1869,7 +1878,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
18691878

18701879
if (offset_capture) {
18711880
/* Add (match, offset) pair to the return value */
1872-
add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1881+
add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL, 0);
18731882
} else {
18741883
/* Add the piece to the return value */
18751884
ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
@@ -1891,7 +1900,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
18911900
/* If we have matched a delimiter */
18921901
if (!no_empty || match_len > 0) {
18931902
if (offset_capture) {
1894-
add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1903+
add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
18951904
} else {
18961905
ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
18971906
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
@@ -1928,11 +1937,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
19281937

19291938
start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
19301939

1931-
if (!no_empty || start_offset < subject_len)
1932-
{
1940+
if (!no_empty || start_offset < subject_len) {
19331941
if (offset_capture) {
19341942
/* Add the last (match, offset) pair to the return value */
1935-
add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1943+
add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL, 0);
19361944
} else {
19371945
/* Add the last piece to the return value */
19381946
ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);

ext/pcre/tests/001.phpt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ array(10) {
5252
[2]=>
5353
string(2) "06"
5454
[3]=>
55-
NULL
55+
string(0) ""
5656
["month"]=>
5757
string(2) "12"
5858
[4]=>
@@ -75,7 +75,7 @@ array(10) {
7575
[2]=>
7676
string(2) "12"
7777
[3]=>
78-
NULL
78+
string(0) ""
7979
["month"]=>
8080
string(3) "Aug"
8181
[4]=>

ext/pcre/tests/003.phpt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ array(10) {
5858
[0]=>
5959
string(2) "20"
6060
[1]=>
61-
NULL
61+
string(0) ""
6262
}
6363
["month"]=>
6464
array(2) {
@@ -127,7 +127,7 @@ array(2) {
127127
[2]=>
128128
string(2) "12"
129129
[3]=>
130-
NULL
130+
string(0) ""
131131
["month"]=>
132132
string(3) "Aug"
133133
[4]=>

ext/pcre/tests/004.phpt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ array(2) {
2424
[1]=>
2525
string(12) "unsigned int"
2626
[2]=>
27-
NULL
27+
string(0) ""
2828
[3]=>
2929
string(0) ""
3030
[4]=>
@@ -41,13 +41,13 @@ array(2) {
4141
[1]=>
4242
string(5) "short"
4343
[2]=>
44-
NULL
44+
string(0) ""
4545
[3]=>
4646
string(0) ""
4747
[4]=>
4848
string(1) "a"
4949
[5]=>
50-
NULL
50+
string(0) ""
5151
[6]=>
5252
string(3) ", b"
5353
}

ext/pcre/tests/bug61780.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Bug #61780 (Inconsistent PCRE captures in match results): basics
33
--FILE--
44
<?php
5-
preg_match('/(a)?([a-z]*)(\d*)/', '123', $matches);
5+
preg_match('/(a)?([a-z]*)(\d*)/', '123', $matches, PREG_UNMATCHED_AS_NULL);
66
var_dump($matches);
77
?>
88
--EXPECT--

ext/pcre/tests/bug61780_1.phpt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,22 @@
22
Bug #61780 (Inconsistent PCRE captures in match results): numeric subpatterns
33
--FILE--
44
<?php
5-
preg_match('/(4)?(2)?\d/', '23456', $matches);
5+
preg_match('/(4)?(2)?\d/', '23456', $matches, PREG_UNMATCHED_AS_NULL);
66
var_export($matches);
77
echo "\n\n";
8-
preg_match('/(4)?(2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE);
8+
preg_match('/(4)?(2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
99
var_export($matches);
1010
echo "\n\n";
11-
preg_match_all('/(4)?(2)?\d/', '123456', $matches);
11+
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_UNMATCHED_AS_NULL);
1212
var_export($matches);
1313
echo "\n\n";
14-
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE);
14+
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
1515
var_export($matches);
1616
echo "\n\n";
17-
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER);
17+
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL);
1818
var_export($matches);
1919
echo "\n\n";
20-
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
20+
preg_match_all('/(4)?(2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
2121
var_export($matches);
2222
?>
2323
--EXPECT--

ext/pcre/tests/bug61780_2.phpt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,22 @@
22
Bug #61780 (Inconsistent PCRE captures in match results): named subpatterns
33
--FILE--
44
<?php
5-
preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches);
5+
preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches, PREG_UNMATCHED_AS_NULL);
66
var_export($matches);
77
echo "\n\n";
8-
preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE);
8+
preg_match('/(?<a>4)?(?<b>2)?\d/', '23456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
99
var_export($matches);
1010
echo "\n\n";
11-
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches);
11+
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_UNMATCHED_AS_NULL);
1212
var_export($matches);
1313
echo "\n\n";
14-
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE);
14+
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
1515
var_export($matches);
1616
echo "\n\n";
17-
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER);
17+
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL);
1818
var_export($matches);
1919
echo "\n\n";
20-
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
20+
preg_match_all('/(?<a>4)?(?<b>2)?\d/', '123456', $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL);
2121
var_export($matches);
2222
?>
2323
--EXPECT--

0 commit comments

Comments
 (0)