@@ -827,21 +827,19 @@ The following structure is used as the C<pprivate> struct by perl's
827
827
regex engine. Since it is specific to perl it is only of curiosity
828
828
value to other engine implementations.
829
829
830
- typedef struct regexp_internal {
831
- U32 *offsets; /* offset annotations 20001228 MJD
832
- * data about mapping the program to
833
- * the string*/
834
- regnode *regstclass; /* Optional startclass as identified or
835
- * constructed by the optimiser */
836
- struct reg_data *data; /* Additional miscellaneous data used
837
- * by the program. Used to make it
838
- * easier to clone and free arbitrary
839
- * data that the regops need. Often the
840
- * ARG field of a regop is an index
841
- * into this structure */
842
- regnode program[1]; /* Unwarranted chumminess with
843
- * compiler. */
844
- } regexp_internal;
830
+ typedef struct regexp_internal {
831
+ union {
832
+ U32 *offsets;
833
+ U32 proglen;
834
+ } u;
835
+ regnode *regstclass;
836
+ struct reg_data *data;
837
+ struct reg_code_blocks *code_blocks;
838
+ int name_list_idx;
839
+ regnode program[1];
840
+ } regexp_internal;
841
+
842
+ Description of the attributes is as follows:
845
843
846
844
=over 5
847
845
@@ -851,6 +849,10 @@ Offsets holds a mapping of offset in the C<program>
851
849
to offset in the C<precomp> string. This is only used by ActiveState's
852
850
visual regex debugger.
853
851
852
+ =item C<proglen>
853
+
854
+ Stores the length of the compiled program in units of regops.
855
+
854
856
=item C<regstclass>
855
857
856
858
Special regop that is used by C<re_intuit_start()> to check if a pattern
@@ -878,6 +880,38 @@ what array. During compilation regops that need special structures stored
878
880
will add an element to each array using the add_data() routine and then store
879
881
the index in the regop.
880
882
883
+ In modern perls the 0th element of this structure is reserved and is NEVER
884
+ used to store anything of use. This is to allow things that need to index
885
+ into this array to represent "no value".
886
+
887
+ =item C<code_blocks>
888
+
889
+ This optional structure is used to manage C<(?{})> constructs in the
890
+ pattern. It is made up of the following structures.
891
+
892
+ /* record the position of a (?{...}) within a pattern */
893
+ struct reg_code_block {
894
+ STRLEN start;
895
+ STRLEN end;
896
+ OP *block;
897
+ REGEXP *src_regex;
898
+ };
899
+
900
+ /* array of reg_code_block's plus header info */
901
+ struct reg_code_blocks {
902
+ int refcnt; /* we may be pointed to from a regex
903
+ and from the savestack */
904
+ int count; /* how many code blocks */
905
+ struct reg_code_block *cb; /* array of reg_code_block's */
906
+ };
907
+
908
+ =item C<name_list_idx>
909
+
910
+ This is the index into the data array where an AV is stored that contains
911
+ the names of any named capture buffers in the pattern, should there be
912
+ any. This is only used in the debugging version of the regex engine. It
913
+ will be 0 if there is no such data.
914
+
881
915
=item C<program>
882
916
883
917
Compiled program. Inlined into the structure so the entire struct can be
0 commit comments