Skip to content

Commit 13f085d

Browse files
committed
initial commit
0 parents  commit 13f085d

32 files changed

+3182
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.idea/
2+
*.iml
3+
target/

pom.xml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
7+
<groupId>com.turbolent</groupId>
8+
<artifactId>regex</artifactId>
9+
<version>0.1-SNAPSHOT</version>
10+
11+
<build>
12+
<plugins>
13+
<plugin>
14+
<groupId>org.apache.maven.plugins</groupId>
15+
<artifactId>maven-compiler-plugin</artifactId>
16+
<version>3.2</version>
17+
<configuration>
18+
<source>8</source>
19+
<target>8</target>
20+
</configuration>
21+
</plugin>
22+
</plugins>
23+
</build>
24+
25+
<dependencies>
26+
<dependency>
27+
<groupId>junit</groupId>
28+
<artifactId>junit</artifactId>
29+
<version>4.12</version>
30+
<scope>test</scope>
31+
</dependency>
32+
<dependency>
33+
<groupId>org.hamcrest</groupId>
34+
<artifactId>hamcrest-library</artifactId>
35+
<version>1.3</version>
36+
<scope>test</scope>
37+
</dependency>
38+
</dependencies>
39+
40+
<properties>
41+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
42+
</properties>
43+
</project>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package com.turbolent.regex;
2+
3+
/**
4+
* A {@code Marker} is a unique identifier valid while matching
5+
* a {@linkplain com.turbolent.regex.patterns.Marked marked pattern}.
6+
*/
7+
public class Marker {
8+
@Override
9+
public String toString() {
10+
return String.format("Marker(%s)",
11+
System.identityHashCode(this));
12+
}
13+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package com.turbolent.regex;
2+
3+
import java.util.List;
4+
5+
/**
6+
* A {@code Match} is the result of a successful
7+
* {@linkplain Parser#match(com.turbolent.regex.instructions.Instruction, java.util.List) parse}
8+
*
9+
* @param <Value> the type of the input values
10+
* @param <Result> the type of the match result
11+
*/
12+
13+
public class Match<Value, Result> {
14+
private final List<Value> input;
15+
protected final ThreadState<Result> state;
16+
17+
Match(List<Value> input, ThreadState<Result> state) {
18+
this.input = input;
19+
this.state = state;
20+
}
21+
22+
public List<Value> group(Object identifier) {
23+
Integer start = this.state.getStart(identifier);
24+
if (start == null)
25+
return null;
26+
Integer end = this.state.getEnd(identifier);
27+
return this.input.subList(start, end);
28+
}
29+
30+
public Result getResult() {
31+
return this.state.getResult();
32+
}
33+
34+
@Override
35+
public String toString() {
36+
return String.format("Match(%s)", this.state);
37+
}
38+
}
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
package com.turbolent.regex;
2+
3+
import com.turbolent.regex.instructions.*;
4+
5+
import java.util.ArrayList;
6+
import java.util.HashSet;
7+
import java.util.List;
8+
9+
/**
10+
* A {@code Parser} matches a pattern, compiled into
11+
* {@linkplain com.turbolent.regex.instructions.Instruction matching instructions},
12+
* against a list of {@link #values values}.
13+
* <p>
14+
* An implementation of Rob Pike's Virtual Machine-based regular expression engine, as described
15+
* in great detail by Russ Cox in "Regular Expression Matching: the Virtual Machine Approach"
16+
* (see http://swtch.com/~rsc/regexp/regexp2.html)
17+
*
18+
* @param <Value> the type of the input values
19+
* @param <Result> the type of the match result
20+
*/
21+
public class Parser<Value, Result> {
22+
private final HashSet<Instruction<Value, Result>> seen = new HashSet<>();
23+
final Instruction<Value, Result> code;
24+
final List<Value> values;
25+
26+
private Parser(Instruction<Value, Result> code, List<Value> values) {
27+
this.code = code;
28+
this.values = values;
29+
}
30+
31+
public static <Value, Result> Match<Value, Result> match(Instruction<Value, Result> code,
32+
List<Value> values)
33+
{
34+
return new Parser<>(code, values).match();
35+
}
36+
37+
private Match<Value, Result> match() {
38+
List<Thread<Value, Result>> currentThreads = new ArrayList<>();
39+
List<Thread<Value, Result>> newThreads = new ArrayList<>();
40+
41+
ThreadState<Result> matchedState = null;
42+
43+
int index = 0;
44+
addThread(new Thread<>(this.code, new ThreadState<>()),
45+
index, currentThreads);
46+
for (; !currentThreads.isEmpty(); index++) {
47+
Value value = null;
48+
if (index < this.values.size()) {
49+
value = this.values.get(index);
50+
}
51+
52+
this.seen.clear();
53+
for (int i = 0; i < currentThreads.size(); i++) {
54+
final Thread<Value, Result> thread = currentThreads.get(i);
55+
final Instruction<Value, Result> instruction = thread.instruction;
56+
final ThreadState<Result> state = thread.state;
57+
58+
if (instruction instanceof Atom) {
59+
Atom<Value, Result> atom = (Atom<Value, Result>)instruction;
60+
if (value != null && atom.predicate.test(value))
61+
addThread(new Thread<>(instruction.next, state),
62+
index + 1, newThreads);
63+
else
64+
state.decrementReferenceCount();
65+
} else if (instruction instanceof Accept) {
66+
if (matchedState != null)
67+
matchedState.decrementReferenceCount();
68+
69+
matchedState = state;
70+
71+
for (i++; i < currentThreads.size(); i++) {
72+
Thread remainingThread = currentThreads.get(i);
73+
remainingThread.state.decrementReferenceCount();
74+
}
75+
76+
break;
77+
} else
78+
throw Instruction.newUnsupportedException(instruction);
79+
}
80+
81+
// swap currentThreads for newThreads
82+
List<Thread<Value, Result>> threads = currentThreads;
83+
currentThreads = newThreads;
84+
newThreads = threads;
85+
86+
newThreads.clear();
87+
88+
if (value == null)
89+
break;
90+
}
91+
92+
if (matchedState == null)
93+
return null;
94+
return new Match<>(this.values, matchedState);
95+
}
96+
97+
private void addThread(Thread<Value, Result> thread, int index,
98+
List<Thread<Value, Result>> threads)
99+
{
100+
Instruction<Value, Result> instruction = thread.instruction;
101+
102+
if (this.skipIfSeen(instruction)) {
103+
thread.state.decrementReferenceCount();
104+
return;
105+
}
106+
107+
if (instruction instanceof Split) {
108+
final Split<Value, Result> split = (Split<Value, Result>)instruction;
109+
thread.state.incrementReferenceCount();
110+
addThread(new Thread<>(split.next, thread.state), index, threads);
111+
addThread(new Thread<>(split.split, thread.state), index, threads);
112+
} else if (instruction instanceof Save) {
113+
final Save<Value, Result> save = (Save<Value, Result>)instruction;
114+
final ThreadState<Result> state = thread.state.maybeCloneState();
115+
switch (save.position) {
116+
case START:
117+
state.updateStartIndex(save.identifier, index);
118+
break;
119+
case END:
120+
state.updateEndIndex(save.identifier, index);
121+
break;
122+
default:
123+
throw Save.Position.newUnsupportedException(save.position);
124+
}
125+
addThread(new Thread<>(instruction.next, state), index, threads);
126+
} else if (instruction instanceof Mark) {
127+
final Mark<Value, Result> mark = (Mark<Value, Result>) instruction;
128+
final ThreadState<Result> state = thread.state.maybeCloneState();
129+
switch (mark.position) {
130+
case START:
131+
state.addMark();
132+
break;
133+
case END:
134+
state.removeMark();
135+
break;
136+
default:
137+
throw Mark.Position.newUnsupportedException(mark.position);
138+
}
139+
addThread(new Thread<>(instruction.next, state), index, threads);
140+
} else if (instruction instanceof Call) {
141+
final Call<Value, Result> call = (Call<Value, Result>)instruction;
142+
final ThreadState<Result> state = thread.state.maybeCloneState();
143+
call.consumer.accept(this, new PartialMatch<>(this.values, state));
144+
addThread(new Thread<>(instruction.next, state), index, threads);
145+
} else
146+
threads.add(thread);
147+
}
148+
149+
private boolean skipIfSeen(Instruction<Value, Result> instruction) {
150+
if (this.seen.contains(instruction)) {
151+
return true;
152+
} else {
153+
this.seen.add(instruction);
154+
return false;
155+
}
156+
}
157+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package com.turbolent.regex;
2+
3+
import java.util.List;
4+
5+
/**
6+
* A {@code PartialMatch} is a {@link Match} that was constructed up until
7+
* now in the current {@linkplain com.turbolent.regex.Thread thread}.
8+
* <p>
9+
* The {@link com.turbolent.regex.patterns.Call#consumer consumer} of a
10+
* {@linkplain com.turbolent.regex.patterns.Call Call pattern} has access to a partial match
11+
* and may use {@link #setResult(Object)} to update the current thread's result,
12+
* and {@link #getCurrentMarker()} to get the current {@link Marker},
13+
* if the {@linkplain com.turbolent.regex.patterns.Call Call pattern}
14+
* is part of a {@linkplain com.turbolent.regex.patterns.Marked Marked pattern}.
15+
*
16+
* @param <Value> the type of the input values
17+
* @param <Result> the type of the match result
18+
*/
19+
public class PartialMatch<Value, Result> extends Match<Value, Result> {
20+
21+
public PartialMatch(List<Value> input, ThreadState<Result> state) {
22+
super(input, state);
23+
}
24+
25+
public void setResult(Result result) {
26+
this.state.setResult(result);
27+
}
28+
29+
public Marker getCurrentMarker() {
30+
return this.state.getCurrentMarker();
31+
}
32+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package com.turbolent.regex;
2+
3+
import com.turbolent.regex.instructions.Instruction;
4+
5+
/**
6+
*
7+
* @param <Value> the type of the input values
8+
* @param <Result> the type of the match result
9+
*/
10+
public class Thread<Value, Result> {
11+
final Instruction<Value, Result> instruction;
12+
final ThreadState<Result> state;
13+
14+
Thread(Instruction<Value, Result> instruction, ThreadState<Result> state) {
15+
this.instruction = instruction;
16+
this.state = state;
17+
}
18+
19+
@Override
20+
public String toString() {
21+
return String.format("Thread(%s, %s)",
22+
this.instruction, this.state);
23+
}
24+
}

0 commit comments

Comments
 (0)