Skip to content

Commit dd0ce23

Browse files
committed
Add LookAround to Ast
1 parent 2c2c6bd commit dd0ce23

File tree

5 files changed

+74
-0
lines changed

5 files changed

+74
-0
lines changed

regex-syntax/src/ast/mod.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,8 @@ pub enum Ast {
479479
Dot(Box<Span>),
480480
/// A single zero-width assertion.
481481
Assertion(Box<Assertion>),
482+
/// A single look-around regular expression.
483+
LookAround(Box<LookAround>),
482484
/// A single Unicode character class, e.g., `\pL` or `\p{Greek}`.
483485
ClassUnicode(Box<ClassUnicode>),
484486
/// A single perl character class, e.g., `\d` or `\W`.
@@ -523,6 +525,11 @@ impl Ast {
523525
Ast::Assertion(Box::new(e))
524526
}
525527

528+
/// Create a "look-around" AST item.
529+
pub fn look_around(e: LookAround) -> Ast {
530+
Ast::LookAround(Box::new(e))
531+
}
532+
526533
/// Create a "Unicode class" AST item.
527534
pub fn class_unicode(e: ClassUnicode) -> Ast {
528535
Ast::ClassUnicode(Box::new(e))
@@ -566,6 +573,7 @@ impl Ast {
566573
Ast::Literal(ref x) => &x.span,
567574
Ast::Dot(ref span) => span,
568575
Ast::Assertion(ref x) => &x.span,
576+
Ast::LookAround(ref x) => &x.span,
569577
Ast::ClassUnicode(ref x) => &x.span,
570578
Ast::ClassPerl(ref x) => &x.span,
571579
Ast::ClassBracketed(ref x) => &x.span,
@@ -598,6 +606,7 @@ impl Ast {
598606
Ast::ClassBracketed(_)
599607
| Ast::Repetition(_)
600608
| Ast::Group(_)
609+
| Ast::LookAround(_)
601610
| Ast::Alternation(_)
602611
| Ast::Concat(_) => true,
603612
}
@@ -1344,6 +1353,28 @@ pub enum AssertionKind {
13441353
WordBoundaryEndHalf,
13451354
}
13461355

1356+
/// A single zero-width look-around.
1357+
#[derive(Clone, Debug, Eq, PartialEq)]
1358+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1359+
pub struct LookAround {
1360+
/// The span of this look-around.
1361+
pub span: Span,
1362+
/// The look-around kind, e.g. negative/positive look-behind.
1363+
pub kind: LookAroundKind,
1364+
/// The regular expression inside the look-around.
1365+
pub ast: Box<Ast>,
1366+
}
1367+
1368+
/// A look-around kind.
1369+
#[derive(Clone, Debug, Eq, PartialEq)]
1370+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1371+
pub enum LookAroundKind {
1372+
/// `(?<=...)`
1373+
PositiveLookBehind,
1374+
/// `(?<!...)`
1375+
NegativeLookBehind,
1376+
}
1377+
13471378
/// A repetition operation applied to a regular expression.
13481379
#[derive(Clone, Debug, Eq, PartialEq)]
13491380
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
@@ -1649,6 +1680,7 @@ impl Drop for Ast {
16491680
| Ast::ClassBracketed(_) => return,
16501681
Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
16511682
Ast::Group(ref x) if !x.ast.has_subexprs() => return,
1683+
Ast::LookAround(ref x) if !x.ast.has_subexprs() => return,
16521684
Ast::Alternation(ref x) if x.asts.is_empty() => return,
16531685
Ast::Concat(ref x) if x.asts.is_empty() => return,
16541686
_ => {}
@@ -1675,6 +1707,9 @@ impl Drop for Ast {
16751707
Ast::Group(ref mut x) => {
16761708
stack.push(mem::replace(&mut x.ast, empty_ast()));
16771709
}
1710+
Ast::LookAround(ref mut x) => {
1711+
stack.push(mem::replace(&mut x.ast, empty_ast()));
1712+
}
16781713
Ast::Alternation(ref mut x) => {
16791714
stack.extend(x.asts.drain(..));
16801715
}

regex-syntax/src/ast/parse.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2328,6 +2328,7 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
23282328
Ast::ClassBracketed(ref x) => &x.span,
23292329
Ast::Repetition(ref x) => &x.span,
23302330
Ast::Group(ref x) => &x.span,
2331+
Ast::LookAround(ref x) => &x.span,
23312332
Ast::Alternation(ref x) => &x.span,
23322333
Ast::Concat(ref x) => &x.span,
23332334
};
@@ -2349,6 +2350,7 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
23492350
Ast::ClassBracketed(_)
23502351
| Ast::Repetition(_)
23512352
| Ast::Group(_)
2353+
| Ast::LookAround(_)
23522354
| Ast::Alternation(_)
23532355
| Ast::Concat(_) => {
23542356
self.decrement_depth();
@@ -3753,6 +3755,11 @@ bar
37533755
);
37543756
}
37553757

3758+
#[test]
3759+
fn parse_lookbehinds() {
3760+
todo!()
3761+
}
3762+
37563763
#[test]
37573764
fn parse_unsupported_capture_in_lookbehind() {
37583765
assert_eq!(

regex-syntax/src/ast/print.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ impl<W: fmt::Write> Visitor for Writer<W> {
8080
fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
8181
match *ast {
8282
Ast::Group(ref x) => self.fmt_group_pre(x),
83+
Ast::LookAround(ref x) => self.fmt_lookaround_pre(x),
8384
Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x),
8485
_ => Ok(()),
8586
}
@@ -92,6 +93,7 @@ impl<W: fmt::Write> Visitor for Writer<W> {
9293
Ast::Literal(ref x) => self.fmt_literal(x),
9394
Ast::Dot(_) => self.wtr.write_str("."),
9495
Ast::Assertion(ref x) => self.fmt_assertion(x),
96+
Ast::LookAround(ref x) => self.fmt_lookaround_post(x),
9597
Ast::ClassPerl(ref x) => self.fmt_class_perl(x),
9698
Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x),
9799
Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x),
@@ -174,6 +176,18 @@ impl<W: fmt::Write> Writer<W> {
174176
self.wtr.write_str(")")
175177
}
176178

179+
fn fmt_lookaround_pre(&mut self, ast: &ast::LookAround) -> fmt::Result {
180+
use crate::ast::LookAroundKind::*;
181+
match ast.kind {
182+
PositiveLookBehind => self.wtr.write_str("(?<="),
183+
NegativeLookBehind => self.wtr.write_str("(?<!"),
184+
}
185+
}
186+
187+
fn fmt_lookaround_post(&mut self, _ast: &ast::LookAround) -> fmt::Result {
188+
self.wtr.write_str(")")
189+
}
190+
177191
fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
178192
use crate::ast::RepetitionKind::*;
179193
match ast.op.kind {
@@ -511,6 +525,12 @@ mod tests {
511525
roundtrip("(a)");
512526
}
513527

528+
#[test]
529+
fn print_lookaround() {
530+
roundtrip("(?<a)");
531+
roundtrip("(?<!a)");
532+
}
533+
514534
#[test]
515535
fn print_class() {
516536
roundtrip(r"[abc]");

regex-syntax/src/ast/visitor.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ enum Frame<'a> {
140140
/// A stack frame allocated just before descending into a group's child
141141
/// node.
142142
Group(&'a ast::Group),
143+
/// A stack frame allocated just before descending into a look-around's
144+
/// child node.
145+
LookAround(&'a ast::LookAround),
143146
/// The stack frame used while visiting every child node of a concatenation
144147
/// of expressions.
145148
Concat {
@@ -270,6 +273,7 @@ impl<'a> HeapVisitor<'a> {
270273
}
271274
Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
272275
Ast::Group(ref x) => Some(Frame::Group(x)),
276+
Ast::LookAround(ref x) => Some(Frame::LookAround(x)),
273277
Ast::Concat(ref x) if x.asts.is_empty() => None,
274278
Ast::Concat(ref x) => {
275279
Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
@@ -289,6 +293,7 @@ impl<'a> HeapVisitor<'a> {
289293
match induct {
290294
Frame::Repetition(_) => None,
291295
Frame::Group(_) => None,
296+
Frame::LookAround(_) => None,
292297
Frame::Concat { tail, .. } => {
293298
if tail.is_empty() {
294299
None
@@ -444,6 +449,7 @@ impl<'a> Frame<'a> {
444449
match *self {
445450
Frame::Repetition(rep) => &rep.ast,
446451
Frame::Group(group) => &group.ast,
452+
Frame::LookAround(look) => &look.ast,
447453
Frame::Concat { head, .. } => head,
448454
Frame::Alternation { head, .. } => head,
449455
}

regex-syntax/src/hir/translate.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,9 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
354354
.unwrap_or_else(|| self.flags());
355355
self.push(HirFrame::Group { old_flags });
356356
}
357+
Ast::LookAround(ref x) => {
358+
todo!("translation from AST to HIR");
359+
}
357360
Ast::Concat(_) => {
358361
self.push(HirFrame::Concat);
359362
}
@@ -446,6 +449,9 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
446449
self.trans().flags.set(old_flags);
447450
self.push(HirFrame::Expr(self.hir_capture(x, expr)));
448451
}
452+
Ast::LookAround(_) => {
453+
todo!("translation from AST to HIR");
454+
}
449455
Ast::Concat(_) => {
450456
let mut exprs = vec![];
451457
while let Some(expr) = self.pop_concat_expr() {

0 commit comments

Comments
 (0)