33
33
#define PREG_PATTERN_ORDER 1
34
34
#define PREG_SET_ORDER 2
35
35
#define PREG_OFFSET_CAPTURE (1<<8)
36
+ #define PREG_UNMATCHED_AS_NULL (1<<9)
36
37
37
38
#define PREG_SPLIT_NO_EMPTY (1<<0)
38
39
#define PREG_SPLIT_DELIM_CAPTURE (1<<1)
@@ -188,6 +189,7 @@ static PHP_MINIT_FUNCTION(pcre)
188
189
REGISTER_LONG_CONSTANT ("PREG_PATTERN_ORDER" , PREG_PATTERN_ORDER , CONST_CS | CONST_PERSISTENT );
189
190
REGISTER_LONG_CONSTANT ("PREG_SET_ORDER" , PREG_SET_ORDER , CONST_CS | CONST_PERSISTENT );
190
191
REGISTER_LONG_CONSTANT ("PREG_OFFSET_CAPTURE" , PREG_OFFSET_CAPTURE , CONST_CS | CONST_PERSISTENT );
192
+ REGISTER_LONG_CONSTANT ("PREG_UNMATCHED_AS_NULL" , PREG_UNMATCHED_AS_NULL , CONST_CS | CONST_PERSISTENT );
191
193
REGISTER_LONG_CONSTANT ("PREG_SPLIT_NO_EMPTY" , PREG_SPLIT_NO_EMPTY , CONST_CS | CONST_PERSISTENT );
192
194
REGISTER_LONG_CONSTANT ("PREG_SPLIT_DELIM_CAPTURE" , PREG_SPLIT_DELIM_CAPTURE , CONST_CS | CONST_PERSISTENT );
193
195
REGISTER_LONG_CONSTANT ("PREG_SPLIT_OFFSET_CAPTURE" , PREG_SPLIT_OFFSET_CAPTURE , CONST_CS | CONST_PERSISTENT );
@@ -639,14 +641,14 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra,
639
641
/* }}} */
640
642
641
643
/* {{{ add_offset_pair */
642
- static inline void add_offset_pair (zval * result , char * str , int len , int offset , char * name )
644
+ static inline void add_offset_pair (zval * result , char * str , int len , int offset , char * name , int unmatched_as_null )
643
645
{
644
646
zval match_pair , tmp ;
645
647
646
648
array_init_size (& match_pair , 2 );
647
649
648
650
/* Add (match, offset) to the return value */
649
- if (offset < 0 ) { /* unset substring */
651
+ if (unmatched_as_null && offset < 0 ) {
650
652
ZVAL_NULL (& tmp );
651
653
} else {
652
654
ZVAL_STRINGL (& tmp , str , len );
@@ -705,7 +707,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
705
707
{
706
708
zval result_set , /* Holds a set of subpatterns after
707
709
a global match */
708
- * match_sets = NULL ; /* An array of sets of matches for each
710
+ * match_sets = NULL ; /* An array of sets of matches for each
709
711
subpattern after a global match */
710
712
pcre_extra * extra = pce -> extra ;/* Holds results of studying */
711
713
pcre_extra extra_data ; /* Used locally for exec options */
@@ -720,9 +722,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
720
722
char * * subpat_names ; /* Array for named subpatterns */
721
723
int i ;
722
724
int subpats_order ; /* Order of subpattern matches */
723
- int offset_capture ; /* Capture match offsets: yes/no */
724
- unsigned char * mark = NULL ; /* Target for MARK name */
725
- zval marks ; /* Array of marks for PREG_PATTERN_ORDER */
725
+ int offset_capture ; /* Capture match offsets: yes/no */
726
+ int unmatched_as_null ; /* Null non-matches: yes/no */
727
+ unsigned char * mark = NULL ; /* Target for MARK name */
728
+ zval marks ; /* Array of marks for PREG_PATTERN_ORDER */
726
729
ALLOCA_FLAG (use_heap );
727
730
728
731
ZVAL_UNDEF (& marks );
@@ -737,6 +740,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
737
740
738
741
if (use_flags ) {
739
742
offset_capture = flags & PREG_OFFSET_CAPTURE ;
743
+ unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL ;
740
744
741
745
/*
742
746
* subpats_order is pre-set to pattern mode so we change it only if
@@ -752,6 +756,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
752
756
}
753
757
} else {
754
758
offset_capture = 0 ;
759
+ unmatched_as_null = 0 ;
755
760
}
756
761
757
762
/* Negative offset counts from the end of the string. */
@@ -847,11 +852,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
847
852
if (offset_capture ) {
848
853
for (i = 0 ; i < count ; i ++ ) {
849
854
add_offset_pair (& match_sets [i ], (char * )stringlist [i ],
850
- offsets [(i <<1 )+ 1 ] - offsets [i <<1 ], offsets [i <<1 ], NULL );
855
+ offsets [(i <<1 )+ 1 ] - offsets [i <<1 ], offsets [i <<1 ], NULL , unmatched_as_null );
851
856
}
852
857
} else {
853
858
for (i = 0 ; i < count ; i ++ ) {
854
- if (offsets [i <<1 ] < 0 ) { /* unset substring */
859
+ if (unmatched_as_null && offsets [i <<1 ] < 0 ) {
855
860
add_next_index_null (& match_sets [i ]);
856
861
} else {
857
862
add_next_index_stringl (& match_sets [i ], (char * )stringlist [i ],
@@ -869,11 +874,15 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
869
874
/*
870
875
* If the number of captured subpatterns on this run is
871
876
* less than the total possible number, pad the result
872
- * arrays with NULLs.
877
+ * arrays with NULLs or empty strings .
873
878
*/
874
879
if (count < num_subpats ) {
875
880
for (; i < num_subpats ; i ++ ) {
876
- add_next_index_null (& match_sets [i ]);
881
+ if (unmatched_as_null ) {
882
+ add_next_index_null (& match_sets [i ]);
883
+ } else {
884
+ add_next_index_string (& match_sets [i ], "" );
885
+ }
877
886
}
878
887
}
879
888
} else {
@@ -885,19 +894,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
885
894
if (offset_capture ) {
886
895
for (i = 0 ; i < count ; i ++ ) {
887
896
add_offset_pair (& result_set , (char * )stringlist [i ],
888
- offsets [(i <<1 )+ 1 ] - offsets [i <<1 ], offsets [i <<1 ], subpat_names [i ]);
897
+ offsets [(i <<1 )+ 1 ] - offsets [i <<1 ], offsets [i <<1 ], subpat_names [i ], unmatched_as_null );
889
898
}
890
899
} else {
891
900
for (i = 0 ; i < count ; i ++ ) {
892
901
if (subpat_names [i ]) {
893
- if (offsets [i <<1 ] < 0 ) { /* unset substring */
894
- add_assoc_null (& result_set , subpat_names [i ]);
895
- } else {
902
+ if (unmatched_as_null && offsets [i <<1 ] < 0 ) {
903
+ add_assoc_null (& result_set , subpat_names [i ]);
904
+ } else {
896
905
add_assoc_stringl (& result_set , subpat_names [i ], (char * )stringlist [i ],
897
- offsets [(i <<1 )+ 1 ] - offsets [i <<1 ]);
898
- }
906
+ offsets [(i <<1 )+ 1 ] - offsets [i <<1 ]);
907
+ }
899
908
}
900
- if (offsets [i <<1 ] < 0 ) { /* unset substring */
909
+ if (unmatched_as_null && offsets [i <<1 ] < 0 ) {
901
910
add_next_index_null (& result_set );
902
911
} else {
903
912
add_next_index_stringl (& result_set , (char * )stringlist [i ],
@@ -909,11 +918,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
909
918
if (offset_capture ) {
910
919
for (i = 0 ; i < count ; i ++ ) {
911
920
add_offset_pair (& result_set , (char * )stringlist [i ],
912
- offsets [(i <<1 )+ 1 ] - offsets [i <<1 ], offsets [i <<1 ], NULL );
921
+ offsets [(i <<1 )+ 1 ] - offsets [i <<1 ], offsets [i <<1 ], NULL , unmatched_as_null );
913
922
}
914
923
} else {
915
924
for (i = 0 ; i < count ; i ++ ) {
916
- if (offsets [i <<1 ] < 0 ) { /* unset substring */
925
+ if (unmatched_as_null && offsets [i <<1 ] < 0 ) {
917
926
add_next_index_null (& result_set );
918
927
} else {
919
928
add_next_index_stringl (& result_set , (char * )stringlist [i ],
@@ -936,19 +945,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
936
945
for (i = 0 ; i < count ; i ++ ) {
937
946
add_offset_pair (subpats , (char * )stringlist [i ],
938
947
offsets [(i <<1 )+ 1 ] - offsets [i <<1 ],
939
- offsets [i <<1 ], subpat_names [i ]);
948
+ offsets [i <<1 ], subpat_names [i ], unmatched_as_null );
940
949
}
941
950
} else {
942
951
for (i = 0 ; i < count ; i ++ ) {
943
952
if (subpat_names [i ]) {
944
- if (offsets [i <<1 ] < 0 ) { /* unset substring */
953
+ if (unmatched_as_null && offsets [i <<1 ] < 0 ) {
945
954
add_assoc_null (subpats , subpat_names [i ]);
946
955
} else {
947
956
add_assoc_stringl (subpats , subpat_names [i ], (char * )stringlist [i ],
948
957
offsets [(i <<1 )+ 1 ] - offsets [i <<1 ]);
949
958
}
950
959
}
951
- if (offsets [i <<1 ] < 0 ) { /* unset substring */
960
+ if (unmatched_as_null && offsets [i <<1 ] < 0 ) {
952
961
add_next_index_null (subpats );
953
962
} else {
954
963
add_next_index_stringl (subpats , (char * )stringlist [i ],
@@ -961,11 +970,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
961
970
for (i = 0 ; i < count ; i ++ ) {
962
971
add_offset_pair (subpats , (char * )stringlist [i ],
963
972
offsets [(i <<1 )+ 1 ] - offsets [i <<1 ],
964
- offsets [i <<1 ], NULL );
973
+ offsets [i <<1 ], NULL , unmatched_as_null );
965
974
}
966
975
} else {
967
976
for (i = 0 ; i < count ; i ++ ) {
968
- if (offsets [i <<1 ] < 0 ) { /* unset substring */
977
+ if (unmatched_as_null && offsets [i <<1 ] < 0 ) {
969
978
add_next_index_null (subpats );
970
979
} else {
971
980
add_next_index_stringl (subpats , (char * )stringlist [i ],
@@ -1869,7 +1878,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
1869
1878
1870
1879
if (offset_capture ) {
1871
1880
/* Add (match, offset) pair to the return value */
1872
- add_offset_pair (return_value , last_match , (int )(& subject [offsets [0 ]]- last_match ), next_offset , NULL );
1881
+ add_offset_pair (return_value , last_match , (int )(& subject [offsets [0 ]]- last_match ), next_offset , NULL , 0 );
1873
1882
} else {
1874
1883
/* Add the piece to the return value */
1875
1884
ZVAL_STRINGL (& tmp , last_match , & subject [offsets [0 ]]- last_match );
@@ -1891,7 +1900,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
1891
1900
/* If we have matched a delimiter */
1892
1901
if (!no_empty || match_len > 0 ) {
1893
1902
if (offset_capture ) {
1894
- add_offset_pair (return_value , & subject [offsets [i <<1 ]], match_len , offsets [i <<1 ], NULL );
1903
+ add_offset_pair (return_value , & subject [offsets [i <<1 ]], match_len , offsets [i <<1 ], NULL , 0 );
1895
1904
} else {
1896
1905
ZVAL_STRINGL (& tmp , & subject [offsets [i <<1 ]], match_len );
1897
1906
zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & tmp );
@@ -1928,11 +1937,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
1928
1937
1929
1938
start_offset = (int )(last_match - subject ); /* the offset might have been incremented, but without further successful matches */
1930
1939
1931
- if (!no_empty || start_offset < subject_len )
1932
- {
1940
+ if (!no_empty || start_offset < subject_len ) {
1933
1941
if (offset_capture ) {
1934
1942
/* Add the last (match, offset) pair to the return value */
1935
- add_offset_pair (return_value , & subject [start_offset ], subject_len - start_offset , start_offset , NULL );
1943
+ add_offset_pair (return_value , & subject [start_offset ], subject_len - start_offset , start_offset , NULL , 0 );
1936
1944
} else {
1937
1945
/* Add the last piece to the return value */
1938
1946
ZVAL_STRINGL (& tmp , last_match , subject + subject_len - last_match );
0 commit comments