Skip to content

Commit 519d13d

Browse files
Multimodcraftershilangyu
authored andcommitted
Address review comments
1 parent ccdab18 commit 519d13d

File tree

4 files changed

+32
-39
lines changed

4 files changed

+32
-39
lines changed

regex-automata/src/nfa/thompson/builder.rs

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,7 @@ enum State {
4141
},
4242
/// A state that only transitions to another state if the current input
4343
/// byte is in a particular range of bytes.
44-
ByteRange {
45-
trans: Transition,
46-
},
44+
ByteRange { trans: Transition },
4745
/// A state with possibly many transitions, represented in a sparse
4846
/// fashion. Transitions must be ordered lexicographically by input range
4947
/// and be non-overlapping. As such, this may only be used when every
@@ -57,15 +55,10 @@ enum State {
5755
/// that `Sparse` is used for via `Union`. But this creates a more bloated
5856
/// NFA with more epsilon transitions than is necessary in the special case
5957
/// of character classes.
60-
Sparse {
61-
transitions: Vec<Transition>,
62-
},
58+
Sparse { transitions: Vec<Transition> },
6359
/// A conditional epsilon transition satisfied via some sort of
6460
/// look-around.
65-
Look {
66-
look: Look,
67-
next: StateID,
68-
},
61+
Look { look: Look, next: StateID },
6962
/// An empty state that records the start of a capture location. This is an
7063
/// unconditional epsilon transition like `Empty`, except it can be used to
7164
/// record position information for a capture group when using the NFA for
@@ -98,20 +91,21 @@ enum State {
9891
/// The next state that this state should transition to.
9992
next: StateID,
10093
},
101-
WriteLookaround {
102-
lookaround_index: usize,
103-
},
94+
/// An empty state that behaves analogously to a `Match` state but for
95+
/// the look-around sub-expression with the given index.
96+
WriteLookaround { lookaround_index: SmallIndex },
97+
/// A conditional epsilon transition that will only be taken if the
98+
/// look-around sub-expression with the given index evaluates to `positive`
99+
/// at the current position in the haystack.
104100
CheckLookaround {
105-
lookaround_index: usize,
101+
lookaround_index: SmallIndex,
106102
positive: bool,
107103
next: StateID,
108104
},
109105
/// An alternation such that there exists an epsilon transition to all
110106
/// states in `alternates`, where matches found via earlier transitions
111107
/// are preferred over later transitions.
112-
Union {
113-
alternates: Vec<StateID>,
114-
},
108+
Union { alternates: Vec<StateID> },
115109
/// An alternation such that there exists an epsilon transition to all
116110
/// states in `alternates`, where matches found via later transitions are
117111
/// preferred over earlier transitions.
@@ -127,9 +121,7 @@ enum State {
127121
/// to be amortized constant time. But if we used a `Union`, we'd need to
128122
/// prepend the state, which takes O(n) time. There are other approaches we
129123
/// could use to solve this, but this seems simple enough.
130-
UnionReverse {
131-
alternates: Vec<StateID>,
132-
},
124+
UnionReverse { alternates: Vec<StateID> },
133125
/// A state that cannot be transitioned out of. This is useful for cases
134126
/// where you want to prevent matching from occurring. For example, if your
135127
/// regex parser permits empty character classes, then one could choose a
@@ -143,9 +135,7 @@ enum State {
143135
///
144136
/// `pattern_id` refers to the ID of the pattern itself, which corresponds
145137
/// to the pattern's index (starting at 0).
146-
Match {
147-
pattern_id: PatternID,
148-
},
138+
Match { pattern_id: PatternID },
149139
}
150140

151141
impl State {
@@ -736,7 +726,7 @@ impl Builder {
736726
/// is satisfied at the current position.
737727
pub fn add_write_lookaround(
738728
&mut self,
739-
index: usize,
729+
index: SmallIndex,
740730
) -> Result<StateID, BuildError> {
741731
self.add(State::WriteLookaround { lookaround_index: index })
742732
}
@@ -745,7 +735,7 @@ impl Builder {
745735
/// index is satisfied at the current position.
746736
pub fn add_check_lookaround(
747737
&mut self,
748-
index: usize,
738+
index: SmallIndex,
749739
positive: bool,
750740
next: StateID,
751741
) -> Result<StateID, BuildError> {

regex-automata/src/nfa/thompson/compiler.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use crate::{
1919
},
2020
util::{
2121
look::{Look, LookMatcher},
22-
primitives::{PatternID, StateID},
22+
primitives::{PatternID, SmallIndex, StateID},
2323
},
2424
};
2525

@@ -1681,14 +1681,14 @@ impl Compiler {
16811681

16821682
fn add_write_lookaround(
16831683
&self,
1684-
index: usize,
1684+
index: SmallIndex,
16851685
) -> Result<StateID, BuildError> {
16861686
self.builder.borrow_mut().add_write_lookaround(index)
16871687
}
16881688

16891689
fn add_check_lookaround(
16901690
&self,
1691-
index: usize,
1691+
index: SmallIndex,
16921692
positive: bool,
16931693
) -> Result<StateID, BuildError> {
16941694
self.builder.borrow_mut().add_check_lookaround(

regex-automata/src/nfa/thompson/nfa.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,8 +1102,8 @@ impl NFA {
11021102

11031103
/// Returns how many lookaround sub-expressions this nfa contains
11041104
#[inline]
1105-
pub fn look_count(&self) -> usize {
1106-
self.0.look_count
1105+
pub fn lookaround_count(&self) -> SmallIndex {
1106+
self.0.lookaround_count
11071107
}
11081108

11091109
// FIXME: The `look_set_prefix_all` computation was not correct, and it
@@ -1266,7 +1266,10 @@ pub(super) struct Inner {
12661266
/// zero-length prefix for any of the patterns in this NFA.
12671267
look_set_prefix_all: LookSet,
12681268
*/
1269-
look_count: usize,
1269+
/// How many look-around expression this NFA contains.
1270+
/// This is needed to initialize the table for storing the result of
1271+
/// look-around evaluation
1272+
lookaround_count: SmallIndex,
12701273
/// Heap memory used indirectly by NFA states and other things (like the
12711274
/// various capturing group representations above). Since each state
12721275
/// might use a different amount of heap, we need to keep track of this
@@ -1384,7 +1387,7 @@ impl Inner {
13841387
}
13851388
State::CheckLookaround { look_idx, .. }
13861389
| State::WriteLookaround { look_idx } => {
1387-
self.look_count = self.look_count.max(look_idx);
1390+
self.lookaround_count = self.lookaround_count.max(look_idx);
13881391
}
13891392
State::Union { .. }
13901393
| State::BinaryUnion { .. }
@@ -1565,15 +1568,15 @@ pub enum State {
15651568
/// index `look_idx`
15661569
WriteLookaround {
15671570
/// The index of the lookaround expression that matches
1568-
look_idx: usize,
1571+
look_idx: SmallIndex,
15691572
},
15701573
/// This indicates that we need to check whether lookaround expression with
15711574
/// index `look_idx` holds at the current position in the haystack
15721575
/// If `positive` is false, then the lookaround expression is negative and
15731576
/// hence must NOT hold.
15741577
CheckLookaround {
15751578
/// The index of the lookaround expression that must be satisfied
1576-
look_idx: usize,
1579+
look_idx: SmallIndex,
15771580
/// Whether this is a positive lookaround expression
15781581
positive: bool,
15791582
/// The next state to transition if the lookaround assertion is satisfied
@@ -1791,13 +1794,13 @@ impl fmt::Debug for State {
17911794
write!(f, "{:?} => {:?}", look, next.as_usize())
17921795
}
17931796
State::WriteLookaround { look_idx } => {
1794-
write!(f, "Write Lookaround: {}", look_idx)
1797+
write!(f, "Write Lookaround: {}", look_idx.as_u32())
17951798
}
17961799
State::CheckLookaround { look_idx, positive, next } => {
17971800
write!(
17981801
f,
17991802
"Check Lookaround {} is {} => {}",
1800-
look_idx,
1803+
look_idx.as_u32(),
18011804
positive,
18021805
next.as_usize()
18031806
)

regex-automata/src/nfa/thompson/pikevm.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,8 +1216,8 @@ impl PikeVM {
12161216
}
12171217

12181218
impl PikeVM {
1219-
fn look_count(&self) -> usize {
1220-
self.nfa.look_count()
1219+
fn lookaround_count(&self) -> SmallIndex {
1220+
self.nfa.lookaround_count()
12211221
}
12221222

12231223
/// The implementation of standard leftmost search.
@@ -1984,7 +1984,7 @@ impl Cache {
19841984
next: ActiveStates::new(re),
19851985
lookaround: {
19861986
let mut res = Vec::new();
1987-
res.resize(re.look_count(), false);
1987+
res.resize(re.lookaround_count().as_usize(), false);
19881988
res
19891989
},
19901990
}

0 commit comments

Comments
 (0)