Skip to content

Commit cd070f8

Browse files
committed
Detect capture groups in lookarounds for cheaper
1 parent c1f7bb1 commit cd070f8

File tree

1 file changed

+42
-36
lines changed

1 file changed

+42
-36
lines changed

regex-syntax/src/ast/parse.rs

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ impl ParserBuilder {
159159
stack_class: RefCell::new(vec![]),
160160
capture_names: RefCell::new(vec![]),
161161
scratch: RefCell::new(String::new()),
162+
lookaround_depth: Cell::new(0),
162163
}
163164
}
164165

@@ -280,6 +281,9 @@ pub struct Parser {
280281
/// A scratch buffer used in various places. Mostly this is used to
281282
/// accumulate relevant characters from parts of a pattern.
282283
scratch: RefCell<String>,
284+
/// Whether the parser is currently in a look-around. This is used to
285+
/// detect capture groups within look-arounds, which are not supported.
286+
lookaround_depth: Cell<usize>,
283287
}
284288

285289
/// ParserI is the internal parser implementation.
@@ -392,6 +396,7 @@ impl Parser {
392396
self.comments.borrow_mut().clear();
393397
self.stack_group.borrow_mut().clear();
394398
self.stack_class.borrow_mut().clear();
399+
self.lookaround_depth.set(0);
395400
}
396401
}
397402

@@ -477,6 +482,11 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
477482
self.parser().ignore_whitespace.get()
478483
}
479484

485+
/// Return whether the parser is currently in a look-around.
486+
fn in_lookaround(&self) -> bool {
487+
self.parser().lookaround_depth.get() != 0
488+
}
489+
480490
/// Return the character at the current position of the parser.
481491
///
482492
/// This panics if the current position does not point to a valid char.
@@ -737,6 +747,9 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
737747
.stack_group
738748
.borrow_mut()
739749
.push(GroupState::LookAround { concat, lookaround });
750+
self.parser()
751+
.lookaround_depth
752+
.set(self.parser().lookaround_depth.get() + 1);
740753
Ok(ast::Concat { span: self.span(), asts: vec![] })
741754
}
742755
}
@@ -770,7 +783,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
770783
Some(LookAround { concat, lookaround }) => (
771784
concat,
772785
Either::Right(lookaround),
773-
self.parser().ignore_whitespace.get(),
786+
self.ignore_whitespace(),
774787
None,
775788
),
776789
Some(Alternation(alt)) => match stack.pop() {
@@ -783,7 +796,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
783796
Some(LookAround { concat, lookaround }) => (
784797
concat,
785798
Either::Right(lookaround),
786-
self.parser().ignore_whitespace.get(),
799+
self.ignore_whitespace(),
787800
Some(alt),
788801
),
789802
None | Some(Alternation(_)) => {
@@ -830,15 +843,20 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
830843
},
831844
}
832845
prior_concat.asts.push(match grouping {
833-
Either::Left(group) => Ast::group(group),
834-
Either::Right(lookaround) => {
835-
if let Some(span) = first_capture_group_span(&lookaround.ast) {
846+
Either::Left(group) => {
847+
if group.is_capturing() && self.in_lookaround() {
836848
return Err(self.error(
837-
span,
849+
group.span,
838850
ast::ErrorKind::UnsupportedCaptureInLookBehind,
839851
));
840852
}
841853

854+
Ast::group(group)
855+
}
856+
Either::Right(lookaround) => {
857+
self.parser()
858+
.lookaround_depth
859+
.set(self.parser().lookaround_depth.get() - 1);
842860
Ast::lookaround(lookaround)
843861
}
844862
});
@@ -2522,36 +2540,6 @@ fn specialize_err<T>(
25222540
}
25232541
}
25242542

2525-
/// Returns the span of the first capture group found. Returns None in case there are no capture groups.
2526-
fn first_capture_group_span(ast: &Ast) -> Option<Span> {
2527-
struct CaptureGroupSearcher;
2528-
2529-
impl ast::Visitor for CaptureGroupSearcher {
2530-
type Output = ();
2531-
type Err = Span;
2532-
2533-
fn finish(self) -> core::result::Result<Self::Output, Self::Err> {
2534-
Ok(())
2535-
}
2536-
2537-
fn visit_pre(&mut self, ast: &Ast) -> std::result::Result<(), Span> {
2538-
match ast {
2539-
Ast::Group(group)
2540-
if !matches!(
2541-
group.kind,
2542-
ast::GroupKind::NonCapturing(_)
2543-
) =>
2544-
{
2545-
Err(group.span)
2546-
}
2547-
_ => Ok(()),
2548-
}
2549-
}
2550-
}
2551-
2552-
ast::visit(ast, CaptureGroupSearcher).err()
2553-
}
2554-
25552543
#[cfg(test)]
25562544
mod tests {
25572545
use core::ops::Range;
@@ -3882,6 +3870,24 @@ bar
38823870
kind: ast::LookAroundKind::PositiveLookBehind
38833871
}))
38843872
);
3873+
assert_eq!(
3874+
parser(r"(?<=(?<=))(a)").parse(),
3875+
Ok(concat(
3876+
0..13,
3877+
vec![
3878+
Ast::lookaround(ast::LookAround {
3879+
span: span(0..10),
3880+
ast: Box::new(Ast::lookaround(ast::LookAround {
3881+
span: span(4..9),
3882+
ast: Box::new(Ast::empty(span(8..8))),
3883+
kind: ast::LookAroundKind::PositiveLookBehind
3884+
})),
3885+
kind: ast::LookAroundKind::PositiveLookBehind
3886+
}),
3887+
group(10..13, 1, lit('a', 11)),
3888+
]
3889+
))
3890+
);
38853891
assert_eq!(
38863892
parser(r"(?<=a)").parse(),
38873893
Ok(Ast::lookaround(ast::LookAround {

0 commit comments

Comments
 (0)