Skip to content

Commit 9f1a5e1

Browse files
Multimodcraftershilangyu
authored andcommitted
Add lookaround expressions to HIR
This is the first step to supporting captureless lookbehind assertions
1 parent 2695e29 commit 9f1a5e1

File tree

5 files changed

+70
-2
lines changed

5 files changed

+70
-2
lines changed

regex-automata/src/meta/reverse_inner.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ fn top_concat(mut hir: &Hir) -> Option<Vec<Hir>> {
170170
| HirKind::Literal(_)
171171
| HirKind::Class(_)
172172
| HirKind::Look(_)
173+
| HirKind::Lookaround(_)
173174
| HirKind::Repetition(_)
174175
| HirKind::Alternation(_) => return None,
175176
HirKind::Capture(hir::Capture { ref sub, .. }) => sub,
@@ -206,6 +207,7 @@ fn flatten(hir: &Hir) -> Hir {
206207
HirKind::Literal(hir::Literal(ref x)) => Hir::literal(x.clone()),
207208
HirKind::Class(ref x) => Hir::class(x.clone()),
208209
HirKind::Look(ref x) => Hir::look(x.clone()),
210+
HirKind::Lookaround(ref x) => Hir::lookaround(x.clone()),
209211
HirKind::Repetition(ref x) => Hir::repetition(x.with(flatten(&x.sub))),
210212
// This is the interesting case. We just drop the group information
211213
// entirely and use the child HIR itself.

regex-automata/src/nfa/thompson/compiler.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,7 @@ impl Compiler {
10031003
Class(Class::Bytes(ref c)) => self.c_byte_class(c),
10041004
Class(Class::Unicode(ref c)) => self.c_unicode_class(c),
10051005
Look(ref look) => self.c_look(look),
1006+
Lookaround(_) => todo!("implement lookaround NFA compilation"),
10061007
Repetition(ref rep) => self.c_repetition(rep),
10071008
Capture(ref c) => self.c_cap(c.index, c.name.as_deref(), &c.sub),
10081009
Concat(ref es) => self.c_concat(es.iter().map(|e| self.c(e))),

regex-syntax/src/hir/literal.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,9 @@ impl Extractor {
172172
use crate::hir::HirKind::*;
173173

174174
match *hir.kind() {
175-
Empty | Look(_) => Seq::singleton(self::Literal::exact(vec![])),
175+
Empty | Look(_) | Lookaround(_) => {
176+
Seq::singleton(self::Literal::exact(vec![]))
177+
}
176178
Literal(hir::Literal(ref bytes)) => {
177179
let mut seq =
178180
Seq::singleton(self::Literal::exact(bytes.to_vec()));

regex-syntax/src/hir/mod.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,13 @@ impl Hir {
373373
Hir { kind: HirKind::Look(look), props }
374374
}
375375

376+
/// Creates a look-around subexpression HIR expression.
377+
#[inline]
378+
pub fn lookaround(lookaround: Lookaround) -> Hir {
379+
let props = Properties::lookaround(&lookaround);
380+
Hir { kind: HirKind::Lookaround(lookaround), props }
381+
}
382+
376383
/// Creates a repetition HIR expression.
377384
#[inline]
378385
pub fn repetition(mut rep: Repetition) -> Hir {
@@ -728,6 +735,8 @@ pub enum HirKind {
728735
Class(Class),
729736
/// A look-around assertion. A look-around match always has zero length.
730737
Look(Look),
738+
/// A look-around subexpression
739+
Lookaround(Lookaround),
731740
/// A repetition operation applied to a sub-expression.
732741
Repetition(Repetition),
733742
/// A capturing group, which contains a sub-expression.
@@ -761,6 +770,7 @@ impl HirKind {
761770
| HirKind::Literal(_)
762771
| HirKind::Class(_)
763772
| HirKind::Look(_) => &[],
773+
HirKind::Lookaround(ref lookaround) => from_ref(lookaround.sub()),
764774
HirKind::Repetition(Repetition { ref sub, .. }) => from_ref(sub),
765775
HirKind::Capture(Capture { ref sub, .. }) => from_ref(sub),
766776
HirKind::Concat(ref subs) => subs,
@@ -1786,6 +1796,37 @@ impl Look {
17861796
}
17871797
}
17881798

1799+
/// Represents a general lookaround assertion
1800+
///
1801+
/// Currently, only lookbehind assertions are supported.
1802+
/// Furthermore, capture groups inside assertions are not supported.
1803+
#[derive(Clone, Debug, Eq, PartialEq)]
1804+
pub enum Lookaround {
1805+
/// A positive lookbehind assertion
1806+
PositiveLookBehind(Box<Hir>),
1807+
/// A negative lookbehind assertion
1808+
NegativeLookBehind(Box<Hir>),
1809+
}
1810+
1811+
impl Lookaround {
1812+
/// Returns a reference to the inner expression that must match for this
1813+
/// lookaround assertion to hold.
1814+
pub fn sub(&self) -> &Hir {
1815+
match self {
1816+
Lookaround::PositiveLookBehind(sub)
1817+
| Lookaround::NegativeLookBehind(sub) => sub,
1818+
}
1819+
}
1820+
1821+
/// Returns a mutable reference to the inner expression
1822+
pub fn sub_mut(&mut self) -> &mut Hir {
1823+
match self {
1824+
Lookaround::PositiveLookBehind(sub)
1825+
| Lookaround::NegativeLookBehind(sub) => sub,
1826+
}
1827+
}
1828+
}
1829+
17891830
/// The high-level intermediate representation for a capturing group.
17901831
///
17911832
/// A capturing group always has an index and a child expression. It may
@@ -1935,6 +1976,9 @@ impl Drop for Hir {
19351976
| HirKind::Literal(_)
19361977
| HirKind::Class(_)
19371978
| HirKind::Look(_) => {}
1979+
HirKind::Lookaround(ref mut x) => {
1980+
stack.push(mem::replace(x.sub_mut(), Hir::empty()));
1981+
}
19381982
HirKind::Capture(ref mut x) => {
19391983
stack.push(mem::replace(&mut x.sub, Hir::empty()));
19401984
}
@@ -2499,6 +2543,18 @@ impl Properties {
24992543
Properties(Box::new(inner))
25002544
}
25012545

2546+
fn lookaround(lookaround: &Lookaround) -> Properties {
2547+
let sub_p = lookaround.sub().properties();
2548+
let inner = PropertiesI {
2549+
minimum_len: Some(0),
2550+
maximum_len: Some(0),
2551+
literal: false,
2552+
alternation_literal: false,
2553+
..*sub_p.0.clone()
2554+
};
2555+
Properties(Box::new(inner))
2556+
}
2557+
25022558
/// Create a new set of HIR properties for a repetition.
25032559
fn repetition(rep: &Repetition) -> Properties {
25042560
let p = rep.sub.properties();

regex-syntax/src/hir/print.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,12 @@ impl<W: fmt::Write> Visitor for Writer<W> {
227227
self.wtr.write_str(r"\b{end-half}")?;
228228
}
229229
},
230+
HirKind::Lookaround(hir::Lookaround::PositiveLookBehind(_)) => {
231+
self.wtr.write_str(r"(?<=)")?;
232+
}
233+
HirKind::Lookaround(hir::Lookaround::NegativeLookBehind(_)) => {
234+
self.wtr.write_str(r"(?<!)")?;
235+
}
230236
HirKind::Capture(hir::Capture { ref name, .. }) => {
231237
self.wtr.write_str("(")?;
232238
if let Some(ref name) = *name {
@@ -293,7 +299,8 @@ impl<W: fmt::Write> Visitor for Writer<W> {
293299
}
294300
HirKind::Capture(_)
295301
| HirKind::Concat(_)
296-
| HirKind::Alternation(_) => {
302+
| HirKind::Alternation(_)
303+
| HirKind::Lookaround(_) => {
297304
self.wtr.write_str(r")")?;
298305
}
299306
}

0 commit comments

Comments
 (0)