1
+ package com .turbolent .regex ;
2
+
3
+ import com .turbolent .regex .instructions .*;
4
+
5
+ import java .util .ArrayList ;
6
+ import java .util .HashSet ;
7
+ import java .util .List ;
8
+
9
+ /**
10
+ * A {@code Parser} matches a pattern, compiled into
11
+ * {@linkplain com.turbolent.regex.instructions.Instruction matching instructions},
12
+ * against a list of {@link #values values}.
13
+ * <p>
14
+ * An implementation of Rob Pike's Virtual Machine-based regular expression engine, as described
15
+ * in great detail by Russ Cox in "Regular Expression Matching: the Virtual Machine Approach"
16
+ * (see http://swtch.com/~rsc/regexp/regexp2.html)
17
+ *
18
+ * @param <Value> the type of the input values
19
+ * @param <Result> the type of the match result
20
+ */
21
+ public class Parser <Value , Result > {
22
+ private final HashSet <Instruction <Value , Result >> seen = new HashSet <>();
23
+ final Instruction <Value , Result > code ;
24
+ final List <Value > values ;
25
+
26
+ private Parser (Instruction <Value , Result > code , List <Value > values ) {
27
+ this .code = code ;
28
+ this .values = values ;
29
+ }
30
+
31
+ public static <Value , Result > Match <Value , Result > match (Instruction <Value , Result > code ,
32
+ List <Value > values )
33
+ {
34
+ return new Parser <>(code , values ).match ();
35
+ }
36
+
37
+ private Match <Value , Result > match () {
38
+ List <Thread <Value , Result >> currentThreads = new ArrayList <>();
39
+ List <Thread <Value , Result >> newThreads = new ArrayList <>();
40
+
41
+ ThreadState <Result > matchedState = null ;
42
+
43
+ int index = 0 ;
44
+ addThread (new Thread <>(this .code , new ThreadState <>()),
45
+ index , currentThreads );
46
+ for (; !currentThreads .isEmpty (); index ++) {
47
+ Value value = null ;
48
+ if (index < this .values .size ()) {
49
+ value = this .values .get (index );
50
+ }
51
+
52
+ this .seen .clear ();
53
+ for (int i = 0 ; i < currentThreads .size (); i ++) {
54
+ final Thread <Value , Result > thread = currentThreads .get (i );
55
+ final Instruction <Value , Result > instruction = thread .instruction ;
56
+ final ThreadState <Result > state = thread .state ;
57
+
58
+ if (instruction instanceof Atom ) {
59
+ Atom <Value , Result > atom = (Atom <Value , Result >)instruction ;
60
+ if (value != null && atom .predicate .test (value ))
61
+ addThread (new Thread <>(instruction .next , state ),
62
+ index + 1 , newThreads );
63
+ else
64
+ state .decrementReferenceCount ();
65
+ } else if (instruction instanceof Accept ) {
66
+ if (matchedState != null )
67
+ matchedState .decrementReferenceCount ();
68
+
69
+ matchedState = state ;
70
+
71
+ for (i ++; i < currentThreads .size (); i ++) {
72
+ Thread remainingThread = currentThreads .get (i );
73
+ remainingThread .state .decrementReferenceCount ();
74
+ }
75
+
76
+ break ;
77
+ } else
78
+ throw Instruction .newUnsupportedException (instruction );
79
+ }
80
+
81
+ // swap currentThreads for newThreads
82
+ List <Thread <Value , Result >> threads = currentThreads ;
83
+ currentThreads = newThreads ;
84
+ newThreads = threads ;
85
+
86
+ newThreads .clear ();
87
+
88
+ if (value == null )
89
+ break ;
90
+ }
91
+
92
+ if (matchedState == null )
93
+ return null ;
94
+ return new Match <>(this .values , matchedState );
95
+ }
96
+
97
+ private void addThread (Thread <Value , Result > thread , int index ,
98
+ List <Thread <Value , Result >> threads )
99
+ {
100
+ Instruction <Value , Result > instruction = thread .instruction ;
101
+
102
+ if (this .skipIfSeen (instruction )) {
103
+ thread .state .decrementReferenceCount ();
104
+ return ;
105
+ }
106
+
107
+ if (instruction instanceof Split ) {
108
+ final Split <Value , Result > split = (Split <Value , Result >)instruction ;
109
+ thread .state .incrementReferenceCount ();
110
+ addThread (new Thread <>(split .next , thread .state ), index , threads );
111
+ addThread (new Thread <>(split .split , thread .state ), index , threads );
112
+ } else if (instruction instanceof Save ) {
113
+ final Save <Value , Result > save = (Save <Value , Result >)instruction ;
114
+ final ThreadState <Result > state = thread .state .maybeCloneState ();
115
+ switch (save .position ) {
116
+ case START :
117
+ state .updateStartIndex (save .identifier , index );
118
+ break ;
119
+ case END :
120
+ state .updateEndIndex (save .identifier , index );
121
+ break ;
122
+ default :
123
+ throw Save .Position .newUnsupportedException (save .position );
124
+ }
125
+ addThread (new Thread <>(instruction .next , state ), index , threads );
126
+ } else if (instruction instanceof Mark ) {
127
+ final Mark <Value , Result > mark = (Mark <Value , Result >) instruction ;
128
+ final ThreadState <Result > state = thread .state .maybeCloneState ();
129
+ switch (mark .position ) {
130
+ case START :
131
+ state .addMark ();
132
+ break ;
133
+ case END :
134
+ state .removeMark ();
135
+ break ;
136
+ default :
137
+ throw Mark .Position .newUnsupportedException (mark .position );
138
+ }
139
+ addThread (new Thread <>(instruction .next , state ), index , threads );
140
+ } else if (instruction instanceof Call ) {
141
+ final Call <Value , Result > call = (Call <Value , Result >)instruction ;
142
+ final ThreadState <Result > state = thread .state .maybeCloneState ();
143
+ call .consumer .accept (this , new PartialMatch <>(this .values , state ));
144
+ addThread (new Thread <>(instruction .next , state ), index , threads );
145
+ } else
146
+ threads .add (thread );
147
+ }
148
+
149
+ private boolean skipIfSeen (Instruction <Value , Result > instruction ) {
150
+ if (this .seen .contains (instruction )) {
151
+ return true ;
152
+ } else {
153
+ this .seen .add (instruction );
154
+ return false ;
155
+ }
156
+ }
157
+ }
0 commit comments