Skip to content

Commit c64d7e1

Browse files
authored
Reorder epsilon transitions (#145)
1 parent ae47b9a commit c64d7e1

File tree

8 files changed

+78
-62
lines changed

8 files changed

+78
-62
lines changed

src/regex.nim

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,10 +1562,10 @@ when isMainModule:
15621562

15631563
doAssert graph(toRegex(re2"^a+$")) == """digraph graphname {
15641564
0 [label="q0";color=blue];
1565-
2 [label="q1";color=black];
1566-
4 [label="q2";color=blue];
1567-
0 -> 2 [label="a, {^}, i=0"];
1568-
2 -> 2 [label="a, i=0"];2 -> 4 [label="{eoe}, {$}, i=1"];
1565+
1 [label="q1";color=black];
1566+
3 [label="q2";color=blue];
1567+
0 -> 1 [label="a, {^}, i=0"];
1568+
1 -> 1 [label="a, i=0"];1 -> 3 [label="{eoe}, {$}, i=1"];
15691569
}
15701570
"""
15711571

src/regex/dotgraph.nim

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ import std/os
55
import ./nfatype
66
import ./types
77

8+
func getEpsilonTransitions(nfa: Nfa, n: Node, nti: int): seq[int] =
9+
doAssert not isEpsilonTransition(n)
10+
doAssert nti <= n.next.len-1
11+
for i in nti+1 .. n.next.len-1:
12+
if not isEpsilonTransition(nfa.s[n.next[i]]):
13+
break
14+
result.add n.next[i]
15+
816
func color(n: Node): string =
917
case n.kind
1018
of matchableKind: "black"
@@ -29,12 +37,13 @@ func graph*(nfa: Nfa): string =
2937
result.add tab
3038
var t = ""
3139
var ii = 0
32-
for n2 in n.next:
40+
for nti, n2 in pairs n.next:
3341
if isEpsilonTransition(nfa.s[n2]):
42+
continue
43+
for n3 in getEpsilonTransitions(nfa, n, nti):
3444
if t.len > 0:
3545
t &= ", "
36-
t &= $nfa.s[n2]
37-
continue
46+
t &= $nfa.s[n3]
3847
if t.len > 0:
3948
t = ", {" & t & "}"
4049
let label = ($nfa.s[n2] & t & ", i=" & $ii).replace(r"\", r"\\")

src/regex/nfa.nim

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,8 @@ func eNfa*(exp: RpnExp): Enfa {.raises: [RegexError].} =
147147
result.s.add initSkipNode(states)
148148

149149
type
150-
Etransitions = seq[int16] # xxx transitions
151-
TeClosure = seq[(int16, Etransitions)]
150+
Transitions = seq[int16]
151+
TeClosure = seq[(int16, Transitions)]
152152

153153
func isEpsilonTransition2(n: Node): bool {.inline.} =
154154
result = case n.kind
@@ -164,24 +164,24 @@ func teClosure(
164164
eNfa: Enfa,
165165
state: int16,
166166
processing: var seq[int16],
167-
eTransitions: Etransitions
167+
transitions: Transitions
168168
) =
169-
var eTransitionsCurr = eTransitions
169+
var transitionsCurr = transitions
170170
if isEpsilonTransition2 eNfa.s[state]:
171-
eTransitionsCurr.add state
171+
transitionsCurr.add state
172172
if eNfa.s[state].kind in matchableKind + {reEOE}:
173-
result.add (state, eTransitionsCurr)
173+
result.add (state, transitionsCurr)
174174
return
175175
for i, s in pairs eNfa.s[state].next:
176176
# Enter loops only once. "a", re"(a*)*" -> ["a", ""]
177177
if eNfa.s[state].kind in repetitionKind:
178178
if s notin processing or i == int(eNfa.s[state].isGreedy):
179179
processing.add s
180-
teClosure(result, eNfa, s, processing, eTransitionsCurr)
180+
teClosure(result, eNfa, s, processing, transitionsCurr)
181181
discard processing.pop()
182182
# else skip loop
183183
else:
184-
teClosure(result, eNfa, s, processing, eTransitionsCurr)
184+
teClosure(result, eNfa, s, processing, transitionsCurr)
185185

186186
func teClosure(
187187
result: var TeClosure,
@@ -190,9 +190,9 @@ func teClosure(
190190
processing: var seq[int16]
191191
) =
192192
doAssert processing.len == 0
193-
var eTransitions: Etransitions
193+
var transitions: Transitions
194194
for s in eNfa.s[state].next:
195-
teClosure(result, eNfa, s, processing, eTransitions)
195+
teClosure(result, eNfa, s, processing, transitions)
196196

197197
func eRemoval*(eNfa: Enfa): Nfa {.raises: [].} =
198198
## Remove e-transitions and return
@@ -225,16 +225,16 @@ func eRemoval*(eNfa: Enfa): Nfa {.raises: [].} =
225225
teClosure(closure, eNfa, qa, processing)
226226
doAssert statesMap[qa] > -1
227227
result.s[statesMap[qa]].next.setLen 0
228-
for qb, eTransitions in closure.items:
229-
for eti in eTransitions:
230-
if statesMap[eti] == -1:
231-
result.s.add eNfa.s[eti]
232-
statesMap[eti] = result.s.len.int16-1
233-
result.s[statesMap[qa]].next.add statesMap[eti]
228+
for qb, transitions in closure.items:
234229
if statesMap[qb] == -1:
235230
result.s.add eNfa.s[qb]
236231
statesMap[qb] = result.s.len.int16-1
237232
result.s[statesMap[qa]].next.add statesMap[qb]
233+
for eti in transitions:
234+
if statesMap[eti] == -1:
235+
result.s.add eNfa.s[eti]
236+
statesMap[eti] = result.s.len.int16-1
237+
result.s[statesMap[qa]].next.add statesMap[eti]
238238
if qb notin qu:
239239
qu.incl qb
240240
qw.addFirst qb

src/regex/nfafindall.nim

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,16 @@ func submatch(
125125
var eoeFound = false
126126
var smi = 0
127127
while smi < smA.len:
128+
let L = nfa[n].next.len
128129
var nti = 0
129-
while nti <= nfa[n].next.len-1:
130-
matched = true
130+
while nti < L:
131+
let isEoe = ntn.kind == reEoe
132+
let nt0 = nt
133+
matched = not smB.hasState(nt) and
134+
(ntn.match(c.Rune) or ntn.kind == reEoe)
135+
inc nti
131136
captx = capt
132-
while isEpsilonTransition(ntn):
137+
while nti < L and isEpsilonTransition(ntn):
133138
if matched:
134139
case ntn.kind
135140
of groupKind:
@@ -146,10 +151,8 @@ func submatch(
146151
doAssert false
147152
discard
148153
inc nti
149-
if matched and
150-
not smB.hasState(nt) and
151-
(ntn.match(c.Rune) or ntn.kind == reEoe):
152-
if ntn.kind == reEoe:
154+
if matched:
155+
if isEoe:
153156
#debugEcho "eoe ", bounds, " ", ms.m
154157
ms.m.add (captx, bounds.a .. i-1)
155158
smA.clear()
@@ -158,8 +161,7 @@ func submatch(
158161
smA.add (0'i16, -1'i32, i .. i-1)
159162
smi = -1
160163
break
161-
smB.add (nt, captx, bounds.a .. i-1)
162-
inc nti
164+
smB.add (nt0, captx, bounds.a .. i-1)
163165
inc smi
164166
swap smA, smB
165167

src/regex/nfafindall2.nim

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,16 @@ func submatch(
165165
while smi < smA.len:
166166
if capt != -1:
167167
capts.keepAlive capt
168+
let L = nfa[n].next.len
168169
var nti = 0
169-
while nti <= nfa[n].next.len-1:
170-
matched = true
170+
while nti < L:
171+
let isEoe = ntn.kind == reEoe
172+
let nt0 = nt
173+
matched = not smB.hasState(nt) and
174+
(ntn.match(c.Rune) or ntn.kind == reEoe)
175+
inc nti
171176
captx = capt
172-
while isEpsilonTransition(ntn):
177+
while nti < L and isEpsilonTransition(ntn):
173178
if matched:
174179
case ntn.kind
175180
of reGroupStart:
@@ -192,10 +197,8 @@ func submatch(
192197
doAssert false
193198
discard
194199
inc nti
195-
if matched and
196-
not smB.hasState(nt) and
197-
(ntn.match(c.Rune) or ntn.kind == reEoe):
198-
if ntn.kind == reEoe:
200+
if matched:
201+
if isEoe:
199202
#debugEcho "eoe ", bounds, " ", ms.m
200203
ms.add (captx, bounds.a .. i-1)
201204
smA.clear()
@@ -204,8 +207,7 @@ func submatch(
204207
smA.add (0'i16, -1'i32, i .. i-1)
205208
smi = -1
206209
break
207-
smB.add (nt, captx, bounds.a .. i-1)
208-
inc nti
210+
smB.add (nt0, captx, bounds.a .. i-1)
209211
inc smi
210212
swap smA, smB
211213
capts.recycle()

src/regex/nfamacro.nim

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ type
3434
nfa: Nfa,
3535
look: Lookaround,
3636
flags: set[MatchFlag]
37-
): NimNode {.noSideEffect, raises: [].}
37+
): NimNode {.nimcall, noSideEffect, raises: [].}
3838
Lookaround = object
3939
ahead, behind: Sig
4040
smL: NimNode
@@ -266,11 +266,12 @@ func genLookaroundMatch(
266266
removeLast `smL`
267267

268268
func getEpsilonTransitions(nfa: Nfa, n: Node, nti: int): seq[int] =
269-
for i in countdown(nti-1, 0):
269+
doAssert not isEpsilonTransition(n)
270+
doAssert nti <= n.next.len-1
271+
for i in nti+1 .. n.next.len-1:
270272
if not isEpsilonTransition(nfa.s[n.next[i]]):
271273
break
272274
result.add n.next[i]
273-
result.reverse()
274275

275276
func genMatchedBody(
276277
smB, ntLit, capt, bounds, matched, captx,

src/regex/nfamatch.nim

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ type
1818
look: var Lookaround,
1919
start: int,
2020
flags: set[MatchFlag]
21-
): bool {.noSideEffect, raises: [].}
21+
): bool {.nimcall, noSideEffect, raises: [].}
2222
BehindSig = proc (
2323
smA, smB: var Submatches,
2424
capts: var Capts,
@@ -28,7 +28,7 @@ type
2828
look: var Lookaround,
2929
start, limit: int,
3030
flags: set[MatchFlag]
31-
): int {.noSideEffect, raises: [].}
31+
): int {.nimcall, noSideEffect, raises: [].}
3232
Lookaround* = object
3333
ahead*: AheadSig
3434
behind*: BehindSig
@@ -78,11 +78,15 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
7878
if not smB.hasState n:
7979
smB.add (n, capt, bounds)
8080
break
81+
let L = nfa.s[n].next.len
8182
var nti = 0
82-
while nti <= nfa.s[n].next.len-1:
83-
matched = true
83+
while nti < L:
84+
let nt0 = nt
85+
matched = not smB.hasState(nt) and
86+
(ntn.match(c) or (anchored and ntn.kind == reEoe))
87+
inc nti
8488
captx = capt
85-
while isEpsilonTransition(ntn):
89+
while nti < L and isEpsilonTransition(ntn):
8690
if matched:
8791
case ntn.kind
8892
of groupKind:
@@ -102,11 +106,8 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
102106
doAssert false
103107
discard
104108
inc nti
105-
if matched and
106-
not smB.hasState(nt) and
107-
(ntn.match(c) or (anchored and ntn.kind == reEoe)):
108-
smB.add (nt, captx, bounds2)
109-
inc nti
109+
if matched:
110+
smB.add (nt0, captx, bounds2)
110111
swap smA, smB
111112

112113
func matchImpl(

src/regex/nfamatch2.nim

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,15 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
9191
if not smB.hasState n:
9292
smB.add (n, capt, bounds)
9393
break
94+
let L = nfa.s[n].next.len
9495
var nti = 0
95-
while nti <= nfa.s[n].next.len-1:
96-
matched = true
96+
while nti < L:
97+
let nt0 = nt
98+
matched = not smB.hasState(nt) and
99+
(ntn.match(c) or (anchored and ntn.kind == reEoe))
100+
inc nti
97101
captx = capt
98-
while isEpsilonTransition(ntn):
102+
while nti < L and isEpsilonTransition(ntn):
99103
if matched:
100104
case ntn.kind
101105
of reGroupStart:
@@ -124,11 +128,8 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} =
124128
doAssert false
125129
discard
126130
inc nti
127-
if matched and
128-
not smB.hasState(nt) and
129-
(ntn.match(c) or (anchored and ntn.kind == reEoe)):
130-
smB.add (nt, captx, bounds2)
131-
inc nti
131+
if matched:
132+
smB.add (nt0, captx, bounds2)
132133
swap smA, smB
133134
capts.recycle()
134135

0 commit comments

Comments
 (0)