Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Left recursive checks #371

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ nightly = []
# Allows deeper recursion by dynamically spilling stack state on to the heap.
spill-stack = ["stacker", "std"]

# Allows parser memoisation, speeding up heavily back-tracking parsers and allowing left recursion.
# Allows parser memoization, speeding up heavily back-tracking parsers and allowing left recursion.
memoization = []

# Allows extending chumsky by writing your own parser implementations.
Expand Down
6 changes: 3 additions & 3 deletions src/combinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -682,15 +682,15 @@ where
go_extra!(O);
}

/// See [`Parser::memoised`].
/// See [`Parser::memoized`].
#[cfg(feature = "memoization")]
#[derive(Copy, Clone)]
pub struct Memoised<A> {
pub struct Memoized<A> {
pub(crate) parser: A,
}

#[cfg(feature = "memoization")]
impl<'a, I, E, A, O> ParserSealed<'a, I, O, E> for Memoised<A>
impl<'a, I, E, A, O> ParserSealed<'a, I, O, E> for Memoized<A>
where
I: Input<'a>,
E: ParserExtra<'a, I>,
Expand Down
10 changes: 10 additions & 0 deletions src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,8 @@ where
errors: &mut self.errors,
state: &mut self.state,
ctx: &self.ctx,
#[cfg(debug_assertions)]
rec_data: None,
#[cfg(feature = "memoization")]
memos: &mut self.memos,
}
Expand All @@ -666,6 +668,8 @@ where
errors: &mut self.errors,
state: &mut self.state,
ctx: &self.ctx,
#[cfg(debug_assertions)]
rec_data: None,
#[cfg(feature = "memoization")]
memos: &mut self.memos,
}
Expand All @@ -687,6 +691,8 @@ pub struct InputRef<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> {
pub(crate) errors: &'parse mut Errors<I::Offset, E::Error>,
pub(crate) state: &'parse mut E::State,
pub(crate) ctx: &'parse E::Context,
#[cfg(debug_assertions)]
pub(crate) rec_data: Option<(I::Offset, usize)>,
#[cfg(feature = "memoization")]
pub(crate) memos: &'parse mut HashMap<(I::Offset, usize), Option<Located<I::Offset, E::Error>>>,
}
Expand All @@ -708,6 +714,8 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
state: self.state,
ctx: new_ctx,
errors: self.errors,
#[cfg(debug_assertions)]
rec_data: self.rec_data,
#[cfg(feature = "memoization")]
memos: self.memos,
};
Expand Down Expand Up @@ -735,6 +743,8 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
state: self.state,
ctx: self.ctx,
errors: self.errors,
#[cfg(debug_assertions)]
rec_data: None,
#[cfg(feature = "memoization")]
memos,
};
Expand Down
82 changes: 54 additions & 28 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,11 +635,11 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// [left recursion](https://en.wikipedia.org/wiki/Left_recursion).
// TODO: Example
#[cfg(feature = "memoization")]
fn memoised(self) -> Memoised<Self>
fn memoized(self) -> Memoized<Self>
where
Self: Sized,
{
Memoised { parser: self }
Memoized { parser: self }
}

/// Transform all outputs of this parser to a pretermined value.
Expand Down Expand Up @@ -2262,7 +2262,7 @@ mod tests {
.then_ignore(just('+'))
.then(atom.clone())
.map(|(a, b)| format!("{}{}", a, b))
.memoised()
.memoized()
.or(atom)
})
.then_ignore(end())
Expand Down Expand Up @@ -2292,7 +2292,7 @@ mod tests {
.then_ignore(just('+'))
.then(expr)
.map(|(a, b)| format!("{}{}", a, b))
.memoised();
.memoized();

sum.or(atom)
})
Expand All @@ -2306,28 +2306,56 @@ mod tests {
mod debug_asserts {
use super::prelude::*;

// TODO panic when left recursive parser is detected
// #[test]
// #[should_panic]
// fn debug_assert_left_recursive() {
// recursive(|expr| {
// let atom = any::<&str, extra::Default>()
// .filter(|c: &char| c.is_alphabetic())
// .repeated()
// .at_least(1)
// .collect();

// let sum = expr
// .clone()
// .then_ignore(just('+'))
// .then(expr)
// .map(|(a, b)| format!("{}{}", a, b));

// sum.or(atom)
// })
// .then_ignore(end())
// .parse("a+b+c");
// }
#[test]
#[should_panic]
fn debug_assert_double_left_recursive() {
recursive::<&str, char, extra::Default, _, _>(|a| recursive(move |_| a.or(just('a'))))
.parse("a");
}

#[test]
#[should_panic]
fn debug_assert_left_recursive() {
recursive(|expr| {
let atom = any::<&str, extra::Default>()
.filter(|c: &char| c.is_alphabetic())
.repeated()
.at_least(1)
.collect();

let sum = expr
.clone()
.then_ignore(just('+'))
.then(expr)
.map(|(a, b)| format!("{}{}", a, b));

sum.or(atom)
})
.parse("a+b+c");
}

#[test]
#[should_panic]
fn debug_assert_left_recursive_delc() {
let mut expr = Recursive::declare();
expr.define({
let atom = any::<&str, extra::Default>()
.filter(|c: &char| c.is_alphabetic())
.repeated()
.at_least(1)
.collect();

let sum = expr
.clone()
.then_ignore(just('+'))
.then(expr.clone())
.map(|(a, b)| format!("{}{}", a, b));

sum.or(atom)
});

expr.parse("a+b+c");
}

#[test]
#[should_panic]
Expand Down Expand Up @@ -2375,7 +2403,5 @@ mod tests {
.repeated()
.parse("a+b+c");
}

// TODO what about IterConfigure and TryIterConfigure?
}
}
60 changes: 50 additions & 10 deletions src/recursive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ pub struct Indirect<'a, 'b, I: Input<'a>, O, Extra: ParserExtra<'a, I>> {
/// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible.
pub struct Recursive<P: ?Sized> {
inner: RecursiveInner<P>,
location: Location<'static>,
}

impl<'a, 'b, I: Input<'a>, O, E: ParserExtra<'a, I>> Recursive<Indirect<'a, 'b, I, O, E>> {
Expand Down Expand Up @@ -123,11 +124,13 @@ impl<'a, 'b, I: Input<'a>, O, E: ParserExtra<'a, I>> Recursive<Indirect<'a, 'b,
/// Ok(Chain::Link('+', Box::new(Chain::Link('+', Box::new(Chain::End))))),
/// );
/// ```
#[track_caller]
pub fn declare() -> Self {
Recursive {
inner: RecursiveInner::Owned(RefC::new(Indirect {
inner: OnceCell::new(),
})),
location: *Location::caller(),
}
}

Expand All @@ -151,6 +154,36 @@ impl<P: ?Sized> Recursive<P> {
.expect("Recursive parser used before being defined"),
}
}

#[inline(always)]
fn with_left_recursive_check<'a, I, E, R>(
&self,
inp: &mut InputRef<'a, '_, I, E>,
f: impl FnOnce(&mut InputRef<'a, '_, I, E>) -> R,
) -> R
where
I: Input<'a>,
E: ParserExtra<'a, I>,
{
// TODO: Don't use address, since this might not be constant?
#[cfg(debug_assertions)]
let key = (
inp.offset().offset,
RefC::as_ptr(&self.parser()) as *const () as usize,
);

#[cfg(debug_assertions)]
if inp.memos.insert(key, None).is_some() {
panic!("Recursive parser defined at {} is left-recursive. Consider using `.memoized()` or restructuring this parser to be right-recursive.", self.location);
}

let res = f(inp);

#[cfg(debug_assertions)]
inp.memos.remove(&key);

res
}
}

impl<P: ?Sized> Clone for Recursive<P> {
Expand All @@ -160,6 +193,7 @@ impl<P: ?Sized> Clone for Recursive<P> {
RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()),
RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()),
},
location: self.location,
}
}
}
Expand All @@ -182,15 +216,17 @@ where
{
#[inline]
fn go<M: Mode>(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult<M, O> {
recurse(move || {
M::invoke(
self.parser()
.inner
.get()
.expect("Recursive parser used before being defined")
.as_ref(),
inp,
)
self.with_left_recursive_check(inp, |inp| {
recurse(move || {
M::invoke(
self.parser()
.inner
.get()
.expect("Recursive parser used before being defined")
.as_ref(),
inp,
)
})
})
}

Expand All @@ -204,7 +240,7 @@ where
{
#[inline]
fn go<M: Mode>(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult<M, O> {
recurse(move || M::invoke(&*self.parser(), inp))
self.with_left_recursive_check(inp, |inp| recurse(move || M::invoke(&*self.parser(), inp)))
}

go_extra!(O);
Expand Down Expand Up @@ -260,23 +296,27 @@ where
/// ])));
/// ```
// INFO: Clone bound not actually needed, but good to be safe for future compat
#[track_caller]
pub fn recursive<'a, 'b, I, O, E, A, F>(f: F) -> Recursive<Direct<'a, 'b, I, O, E>>
where
I: Input<'a>,
E: ParserExtra<'a, I>,
A: Parser<'a, I, O, E> + Clone + MaybeSync + 'b,
F: FnOnce(Recursive<Direct<'a, 'b, I, O, E>>) -> A,
{
let location = *Location::caller();
let rc = RefC::new_cyclic(|rc| {
let rc: RefW<DynParser<'a, 'b, I, O, E>> = rc.clone() as _;
let parser = Recursive {
inner: RecursiveInner::Unowned(rc.clone()),
location,
};

f(parser)
});

Recursive {
inner: RecursiveInner::Owned(rc),
location,
}
}