Skip to content

Commit cccfc23

Browse files
Multimodcraftershilangyu
authored andcommitted
Implement look-around index generation
1 parent 519d13d commit cccfc23

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

regex-automata/src/nfa/thompson/compiler.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,13 @@ pub struct Compiler {
711711
/// State used for caching common suffixes when compiling reverse UTF-8
712712
/// automata (for Unicode character classes).
713713
utf8_suffix: RefCell<Utf8SuffixMap>,
714+
/// Top level alternation state which is used to run all look-around
715+
/// assertion checks in lockstep with the main expression. Each look-around
716+
/// expression is compiled to a set of states that is patched into this
717+
/// state, and this state is updated on each new pattern being compiled.
714718
lookaround_alt: RefCell<Option<StateID>>,
719+
/// The next index to use for a look-around expression.
720+
lookaround_index: RefCell<SmallIndex>,
715721
}
716722

717723
impl Compiler {
@@ -725,6 +731,7 @@ impl Compiler {
725731
trie_state: RefCell::new(RangeTrie::new()),
726732
utf8_suffix: RefCell::new(Utf8SuffixMap::new(1000)),
727733
lookaround_alt: RefCell::new(None),
734+
lookaround_index: RefCell::new(SmallIndex::ZERO),
728735
}
729736
}
730737

@@ -1046,7 +1053,11 @@ impl Compiler {
10461053
LookAround::NegativeLookBehind(_) => false,
10471054
LookAround::PositiveLookBehind(_) => true,
10481055
};
1049-
let idx = todo!("get index");
1056+
let idx = *self.lookaround_index.borrow();
1057+
*self.lookaround_index.borrow_mut() = SmallIndex::new(idx.one_more())
1058+
.map_err(|e| {
1059+
BuildError::too_many_lookarounds(e.attempted() as usize)
1060+
})?;
10501061
let check = self.add_check_lookaround(idx, pos)?;
10511062
let write = self.add_write_lookaround(idx)?;
10521063
self.patch(sub.end, write)?;

regex-automata/src/nfa/thompson/error.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::util::{
22
captures, look,
3-
primitives::{PatternID, StateID},
3+
primitives::{PatternID, SmallIndex, StateID},
44
};
55

66
/// An error that can occurred during the construction of a thompson NFA.
@@ -55,6 +55,14 @@ enum BuildErrorKind {
5555
/// The limit on the number of states.
5656
limit: usize,
5757
},
58+
/// An error that occurs if too many indices need to be generated for
59+
/// look-around sub-expressions while building an NFA.
60+
TooManyLookArounds {
61+
/// The number of sub-expressions that exceeded the limit.
62+
given: usize,
63+
/// The limit on the number of sub-expressions.
64+
limit: usize,
65+
},
5866
/// An error that occurs when NFA compilation exceeds a configured heap
5967
/// limit.
6068
ExceededSizeLimit {
@@ -115,6 +123,13 @@ impl BuildError {
115123
BuildError { kind: BuildErrorKind::TooManyStates { given, limit } }
116124
}
117125

126+
pub(crate) fn too_many_lookarounds(given: usize) -> BuildError {
127+
let limit = SmallIndex::LIMIT;
128+
BuildError {
129+
kind: BuildErrorKind::TooManyLookArounds { given, limit },
130+
}
131+
}
132+
118133
pub(crate) fn exceeded_size_limit(limit: usize) -> BuildError {
119134
BuildError { kind: BuildErrorKind::ExceededSizeLimit { limit } }
120135
}
@@ -164,6 +179,12 @@ impl core::fmt::Display for BuildError {
164179
which exceeds the limit of {}",
165180
given, limit,
166181
),
182+
BuildErrorKind::TooManyLookArounds { given, limit } => write!(
183+
f,
184+
"attempted to compile {} look-around expressions, \
185+
which exceeds the limit of {}",
186+
given, limit,
187+
),
167188
BuildErrorKind::ExceededSizeLimit { limit } => write!(
168189
f,
169190
"heap usage during NFA compilation exceeded limit of {}",

0 commit comments

Comments
 (0)