Skip to content

Parallel states refactor #153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
nim: [1.6.18, 2.0.0, 2.2.0]
nim: [1.6.18, 1.6.20, 2.0.0, 2.0.14, 2.2.0]
steps:
- uses: actions/checkout@v2
- name: Run Tests
Expand Down
4 changes: 4 additions & 0 deletions bench/bench.nim
Original file line number Diff line number Diff line change
Expand Up @@ -244,4 +244,8 @@ when isMainModule:
# open the log with KCachegrind

$ nim c --debugger:native --threads:off -d:danger -d:useMalloc -o:bin/bench2 bench/bench2.nim && valgrind --tool=callgrind -v ./bin/bench2

# Bench

$ nim c -r --threads:off -d:danger --mm:arc -o:bin/bench bench/bench.nim
]#
30 changes: 13 additions & 17 deletions src/regex/nfafindall.nim
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ type
s: seq[MatchItem]
i: int
RegexMatches* = object
a, b: Submatches
a, b: Pstates
m: Matches
c: Capts
look: Lookaround
Expand All @@ -46,22 +46,18 @@ func add(ms: var Matches, m: MatchItem) {.inline.} =
func clear(ms: var Matches) {.inline.} =
ms.i = 0

template initMaybeImpl(
func initMaybeImpl(
ms: var RegexMatches,
size: int
) =
if ms.a == nil:
assert ms.b == nil
ms.a = newSubmatches size
ms.b = newSubmatches size
ms.look = initLook()
doAssert ms.a.cap >= size and
ms.b.cap >= size
) {.inline.} =
ms.a.reset size
ms.b.reset size
ms.look = initLook()

template initMaybeImpl(
func initMaybeImpl(
ms: var RegexMatches,
regex: Regex
) =
) {.inline.} =
initMaybeImpl(ms, regex.nfa.s.len)

func hasMatches(ms: RegexMatches): bool {.inline.} =
Expand Down Expand Up @@ -130,7 +126,7 @@ func submatch(
while nti < L:
let isEoe = ntn.kind == reEoe
let nt0 = nt
matched = not smB.hasState(nt) and
matched = nt notin smB and
(ntn.match(c.Rune) or ntn.kind == reEoe)
inc nti
captx = capt
Expand Down Expand Up @@ -158,10 +154,10 @@ func submatch(
smA.clear()
if not eoeFound:
eoeFound = true
smA.add (0'i16, -1.CaptIdx, i .. i-1)
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
smi = -1
break
smB.add (nt0, captx, bounds.a .. i-1)
smB.add initPstate(nt0, captx, bounds.a .. i-1)
inc smi
swap smA, smB

Expand All @@ -181,7 +177,7 @@ func findSomeImpl*(
i = start.int
iPrev = start.int
optFlag = mfFindMatchOpt in flags
smA.add (0'i16, -1.CaptIdx, i .. i-1)
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
if start-1 in 0 .. text.len-1:
cPrev = bwRuneAt(text, start-1).int32
while i < text.len:
Expand All @@ -200,7 +196,7 @@ func findSomeImpl*(
# else: # XXX clear captures
if optFlag:
return i
smA.add (0'i16, -1.CaptIdx, i .. i-1)
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
iPrev = i
cPrev = c.int32
submatch(ms, text, regex, iPrev, cPrev, -1'i32)
Expand Down
32 changes: 14 additions & 18 deletions src/regex/nfafindall2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -54,28 +54,24 @@ type
bounds: Bounds
Matches = seq[MatchItem]
RegexMatches2* = object
a, b: Submatches
a, b: Pstates
m: Matches
c: Capts3
look: Lookaround

template initMaybeImpl(
func initMaybeImpl(
ms: var RegexMatches2,
size, groupsLen: int
) =
if ms.a == nil:
assert ms.b == nil
ms.a = newSubmatches size
ms.b = newSubmatches size
ms.c = initCapts3 groupsLen
ms.look = initLook()
doAssert ms.a.cap >= size and
ms.b.cap >= size
) {.inline.} =
ms.a.reset(size)
ms.b.reset(size)
ms.c.reset(groupsLen)
ms.look = initLook()

template initMaybeImpl(
func initMaybeImpl(
ms: var RegexMatches2,
regex: Regex
) =
) {.inline.} =
initMaybeImpl(ms, regex.nfa.s.len, regex.groupsCount)

func add(ms: var RegexMatches2, m: MatchItem) {.inline.} =
Expand Down Expand Up @@ -170,7 +166,7 @@ func nextState(
while nti < L:
let isEoe = ntn.kind == reEoe
let nt0 = nt
matched = not smB.hasState(nt) and
matched = nt notin smB and
(ntn.match(c.Rune) or ntn.kind == reEoe)
inc nti
captx = capt
Expand All @@ -187,10 +183,10 @@ func nextState(
smA.clear()
if not eoeFound:
eoeFound = true
smA.add (0'i16, -1.CaptIdx, i .. i-1)
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
smi = -1
break
smB.add (nt0, captx, bounds.a .. i-1)
smB.add initPstate(nt0, captx, bounds.a .. i-1)
inc smi
swap smA, smB
capts.recycle()
Expand All @@ -214,7 +210,7 @@ func findSomeImpl*(
flags = regex.flags.toMatchFlags + flags
optFlag = mfFindMatchOpt in flags
binFlag = mfBytesInput in flags
smA.add (0'i16, -1.CaptIdx, i .. i-1)
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
if start-1 in 0 .. text.len-1:
cPrev = if binFlag:
text[start-1].int32
Expand All @@ -236,7 +232,7 @@ func findSomeImpl*(
return i
if optFlag:
return i
smA.add (0'i16, -1.CaptIdx, i .. i-1)
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
iPrev = i
cPrev = c.int32
nextState(ms, text, regex, iPrev, cPrev, -1'i32, flags)
Expand Down
52 changes: 25 additions & 27 deletions src/regex/nfamacro.nim
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ type
): NimNode {.nimcall, noSideEffect, raises: [].}
Lookaround = object
ahead, behind: Sig
smL: NimNode

# todo: can not use unicodeplus due to
# https://github.com/nim-lang/Nim/issues/7059
Expand Down Expand Up @@ -240,9 +239,7 @@ func genLookaroundMatch(
look: Lookaround
): NimNode =
template nfa: untyped = n.subExp.nfa
template smL: untyped = look.smL
let smlA = quote do: lastA(`smL`)
let smlB = quote do: lastB(`smL`)
defVars smlA, smlB
var flags = {mfAnchored}
if n.subExp.reverseCapts:
flags.incl mfReverseCapts
Expand All @@ -262,10 +259,9 @@ func genLookaroundMatch(
`matched` = not `matched`
let nfaLenLit = newLit nfa.s.len
result = quote do:
grow `smL`
`smL`.last.setLen `nfaLenLit`
var `smlA` = initPstates(`nfaLenLit`)
var `smlB` = initPstates(`nfaLenLit`)
`lookaroundStmt`
removeLast `smL`

func getEpsilonTransitions(nfa: Nfa, n: Node, nti: int): seq[int] =
doAssert not isEpsilonTransition(n)
Expand Down Expand Up @@ -293,7 +289,7 @@ func genMatchedBody(
let eTransitions = getEpsilonTransitions(nfa, n, nti)
if eTransitions.len == 0:
return quote do:
add(`smB`, (`ntLit`, `capt`, `bounds2`))
add(`smB`, initPstate(`ntLit`, `capt`, `bounds2`))
var matchedBody = newSeq[NimNode]()
matchedBody.add quote do:
`matched` = true
Expand Down Expand Up @@ -325,7 +321,7 @@ func genMatchedBody(
doAssert false
matchedBody.add quote do:
if `matched`:
add(`smB`, (`ntLit`, `captx`, `bounds2`))
add(`smB`, initPstate(`ntLit`, `captx`, `bounds2`))
return newStmtList matchedBody

func genNextState(
Expand All @@ -339,10 +335,10 @@ func genNextState(
#[
case n
of 0:
if not smB.hasState(1):
if not smB.contains(1):
if c == 'a':
smB.add((1, capt, bounds))
if not smB.hasState(4):
if not smB.contains(4):
if c == 'b':
smB.add((4, capt, bounds))
of 1:
Expand Down Expand Up @@ -384,11 +380,11 @@ func genNextState(
i, nti, nfa, look, flags)
if mfAnchored in flags and s[nt].kind == reEoe:
branchBodyN.add quote do:
if not hasState(`smB`, `ntLit`):
if not contains(`smB`, `ntLit`):
`matchedBodyStmt`
else:
branchBodyN.add quote do:
if not hasState(`smB`, `ntLit`) and `matchCond`:
if not contains(`smB`, `ntLit`) and `matchCond`:
`matchedBodyStmt`
doAssert eoeOnly or branchBodyN.len > 0
if branchBodyN.len > 0:
Expand Down Expand Up @@ -418,12 +414,15 @@ func nextState(
flags: set[MatchFlag],
eoeOnly = false
): NimNode =
defForVars n, capt, bounds
defForVars pstate
let n = quote do: `pstate`.ni
let capt = quote do: `pstate`.ci
let bounds = quote do: `pstate`.bounds
let eoeBailOut = if mfAnchored in flags:
quote do:
if `n` == `eoe`:
if not hasState(`smB`, `n`):
add(`smB`, (`n`, `capt`, `bounds`))
if not contains(`smB`, `n`):
add(`smB`, initPstate(`n`, `capt`, `bounds`))
break
else:
newEmptyNode()
Expand All @@ -433,7 +432,7 @@ func nextState(
flags, eoeOnly)
result = quote do:
`smB`.clear()
for `n`, `capt`, `bounds` in `smA`.items:
for `pstate` in `smA`.items:
`eoeBailOut`
`nextStateStmt`
swap `smA`, `smB`
Expand Down Expand Up @@ -483,7 +482,7 @@ func matchImpl(
if `start`-1 in 0 .. `text`.len-1:
`cPrev` = bwRuneAt(`text`, `start`-1).int32
clear(`smA`)
add(`smA`, (0'i16, `captIdx`, `i` .. `i`-1))
add(`smA`, initPstate(0'i16, `captIdx`, `i` .. `i`-1))
while `i` < `text`.len:
fastRuneAt(`text`, iNext, `c`, true)
`nextStateStmt`
Expand Down Expand Up @@ -534,7 +533,7 @@ func reversedMatchImpl(
if `start` in 0 .. `text`.len-1:
`cPrev` = runeAt(`text`, `start`).int32
clear(`smA`)
add(`smA`, (0'i16, `captIdx`, `i` .. `i`-1))
add(`smA`, initPstate(0'i16, `captIdx`, `i` .. `i`-1))
while iNext > 0:
bwFastRuneAt(`text`, iNext, `c`)
`nextStateStmt`
Expand All @@ -551,11 +550,11 @@ func reversedMatchImpl(
`captsStmt`
`matched` = `smA`.len > 0

template look(smL: NimNode): untyped =
template look: untyped =
Lookaround(
ahead: matchImpl,
behind: reversedMatchImpl,
smL: smL)
behind: reversedMatchImpl
)

template constructSubmatches2(
captures, txt, capts, capt, size: untyped
Expand All @@ -578,24 +577,23 @@ proc matchImpl*(text, expLit, body: NimNode): NimNode =
if not (expLit.kind == nnkCallStrLit and $expLit[0] == "rex"):
error "not a regex literal; only rex\"regex\" is allowed", expLit
let exp = expLit[1]
defVars smA, smB, capts, capt, matched, smL
defVars smA, smB, capts, capt, matched
let regex = reCt(exp.strVal)
let startLit = newLit 0
let flags: set[MatchFlag] = {}
let matchImplStmt = matchImpl(
smA, smB, capts, capt, matched,
text, startLit, regex.nfa, look(smL), flags)
text, startLit, regex.nfa, look(), flags)
let nfaLenLit = newLit regex.nfa.s.len
let nfaGroupsLen = int(regex.groupsCount)
result = quote do:
block:
var
`smA` = newSubmatches `nfaLenLit`
`smB` = newSubmatches `nfaLenLit`
`smA` = initPstates `nfaLenLit`
`smB` = initPstates `nfaLenLit`
`capts` = default(Capts)
`capt` = -1'i32
`matched` = false
`smL` {.used.} = default(SmLookaround)
`matchImplStmt`
if `matched`:
var matches {.used, inject.} = newSeq[string]()
Expand Down
Loading