From df522af09cdc8bb1ec5f8e179e60f8346ad3bd06 Mon Sep 17 00:00:00 2001 From: stefnotch Date: Thu, 17 Aug 2023 23:39:33 +0200 Subject: [PATCH 01/64] Implement choice parser for vec --- src/primitive.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/primitive.rs b/src/primitive.rs index 8d310fad..e31352fb 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -903,6 +903,37 @@ where go_extra!(O); } +impl<'a, A, I, O, E> ParserSealed<'a, I, O, E> for Choice> +where + A: Parser<'a, I, O, E>, + I: Input<'a>, + E: ParserExtra<'a, I>, +{ + #[inline] + fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { + if self.parsers.is_empty() { + let offs = inp.offset(); + let err_span = inp.span_since(offs); + inp.add_alt(offs.offset, None, None, err_span); + Err(()) + } else { + let before = inp.save(); + match self.parsers.iter().find_map(|parser| { + inp.rewind(before); + match parser.go::(inp) { + Ok(out) => Some(out), + Err(()) => None, + } + }) { + Some(out) => Ok(out), + None => Err(()), + } + } + } + + go_extra!(O); +} + /// See [`group`]. #[derive(Copy, Clone)] pub struct Group { From 53ed5c6f7d41c31ce9951ab3b87a3b436ec0a92c Mon Sep 17 00:00:00 2001 From: stefnotch Date: Fri, 18 Aug 2023 20:51:14 +0200 Subject: [PATCH 02/64] Share ParserSealed impl for Choice --- src/primitive.rs | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/src/primitive.rs b/src/primitive.rs index e31352fb..1c1461f7 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -872,7 +872,7 @@ macro_rules! impl_choice_for_tuple { impl_choice_for_tuple!(A_ B_ C_ D_ E_ F_ G_ H_ I_ J_ K_ L_ M_ N_ O_ P_ Q_ R_ S_ T_ U_ V_ W_ X_ Y_ Z_); -impl<'a, A, I, O, E, const N: usize> ParserSealed<'a, I, O, E> for Choice<[A; N]> +impl<'a, 'b, A, I, O, E> ParserSealed<'a, I, O, E> for Choice<&'b [A]> where A: Parser<'a, I, O, E>, I: Input<'a>, @@ -880,7 +880,7 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - if N == 0 { + if self.parsers.is_empty() { let offs = inp.offset(); let err_span = inp.span_since(offs); inp.add_alt(offs.offset, None, None, err_span); @@ -903,7 +903,7 @@ where go_extra!(O); } -impl<'a, A, I, O, E> ParserSealed<'a, I, O, E> for Choice> +impl<'a, 'b, A, I, O, E> ParserSealed<'a, I, O, E> for Choice> where A: Parser<'a, I, O, E>, I: Input<'a>, @@ -911,26 +911,21 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - if self.parsers.is_empty() { - let offs = inp.offset(); - let err_span = inp.span_since(offs); - inp.add_alt(offs.offset, None, None, err_span); - Err(()) - } else { - let before = inp.save(); - match self.parsers.iter().find_map(|parser| { - inp.rewind(before); - match parser.go::(inp) { - Ok(out) => Some(out), - Err(()) => None, - } - }) { - Some(out) => Ok(out), - None => Err(()), - } - } + choice(&self.parsers[..]).go::(inp) } + go_extra!(O); +} +impl<'a, 'b, A, I, O, E, const N: usize> ParserSealed<'a, I, O, E> for Choice<[A; N]> +where + A: Parser<'a, I, O, E>, + I: Input<'a>, + E: ParserExtra<'a, I>, +{ + #[inline] + fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { + choice(&self.parsers[..]).go::(inp) + } go_extra!(O); } From 9843461a74398512afa58a4ba2f278cb96a916b8 Mon Sep 17 00:00:00 2001 From: stefnotch Date: Fri, 18 Aug 2023 21:05:15 +0200 Subject: [PATCH 03/64] Remove unused lifetime --- src/primitive.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/primitive.rs b/src/primitive.rs index 1c1461f7..65dba9c1 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -903,7 +903,7 @@ where go_extra!(O); } -impl<'a, 'b, A, I, O, E> ParserSealed<'a, I, O, E> for Choice> +impl<'a, A, I, O, E> ParserSealed<'a, I, O, E> for Choice> where A: Parser<'a, I, O, E>, I: Input<'a>, @@ -916,7 +916,7 @@ where go_extra!(O); } -impl<'a, 'b, A, I, O, E, const N: usize> ParserSealed<'a, I, O, E> for Choice<[A; N]> +impl<'a, A, I, O, E, const N: usize> ParserSealed<'a, I, O, E> for Choice<[A; N]> where A: Parser<'a, I, O, E>, I: Input<'a>, From 01b96cd643e4b47b36ee76baa57294d2153965f7 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Sun, 20 Aug 2023 21:53:11 +0100 Subject: [PATCH 04/64] Added caching capabilities --- src/lib.rs | 147 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 131 insertions(+), 16 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5b2d2737..fd4c4b17 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2856,12 +2856,12 @@ macro_rules! select_ref { #[cfg(test)] mod tests { - use super::*; + use crate::prelude::*; #[test] fn zero_copy() { - use self::input::WithContext; - use self::prelude::*; + use crate::input::WithContext; + use crate::prelude::*; #[derive(PartialEq, Debug)] enum Token<'a> { @@ -2909,8 +2909,8 @@ mod tests { #[test] fn zero_copy_map_span() { - use self::input::MappedSpan; - use self::prelude::*; + use crate::input::MappedSpan; + use crate::prelude::*; #[derive(PartialEq, Debug)] enum Token<'a> { @@ -2965,7 +2965,7 @@ mod tests { #[test] fn zero_copy_repetition() { - use self::prelude::*; + use crate::prelude::*; fn parser<'a>() -> impl Parser<'a, &'a str, Vec> { any() @@ -3001,7 +3001,7 @@ mod tests { #[test] fn zero_copy_group() { - use self::prelude::*; + use crate::prelude::*; fn parser<'a>() -> impl Parser<'a, &'a str, (&'a str, u64, char)> { group(( @@ -3041,7 +3041,7 @@ mod tests { #[test] fn zero_copy_group_array() { - use self::prelude::*; + use crate::prelude::*; fn parser<'a>() -> impl Parser<'a, &'a str, [char; 3]> { group([just('a'), just('b'), just('c')]) @@ -3054,7 +3054,7 @@ mod tests { #[test] fn unicode_str() { let input = "πŸ„―πŸ„šπŸΉ πŸ΄ŽπŸ„πŸ‹πŸ°πŸ„‚πŸ¬―πŸˆ¦gπŸΈ΅πŸ©πŸ•”πŸˆ³2πŸ¬™πŸ¨žπŸ…’πŸ­³πŸŽ…h🡚🧿🏩🰬kπŸ ‘πŸ€”πŸˆ†πŸΉπŸ€ŸπŸ‰—πŸ΄ŸπŸ“΅πŸ°„πŸ€ΏπŸœπŸ™˜πŸΉ„5πŸ »πŸ‘‰πŸ±–πŸ “"; - let mut own = InputOwn::<_, extra::Default>::new(input); + let mut own = crate::input::InputOwn::<_, extra::Default>::new(input); let mut inp = own.as_ref_start(); while let Some(_c) = inp.next() {} @@ -3062,7 +3062,7 @@ mod tests { #[test] fn iter() { - use self::prelude::*; + use crate::prelude::*; fn parser<'a>() -> impl IterParser<'a, &'a str, char> { any().repeated() @@ -3079,7 +3079,7 @@ mod tests { #[test] #[cfg(feature = "memoization")] fn exponential() { - use self::prelude::*; + use crate::prelude::*; fn parser<'a>() -> impl Parser<'a, &'a str, String> { recursive(|expr| { @@ -3109,7 +3109,7 @@ mod tests { #[test] #[cfg(feature = "memoization")] fn left_recursive() { - use self::prelude::*; + use crate::prelude::*; fn parser<'a>() -> impl Parser<'a, &'a str, String> { recursive(|expr| { @@ -3136,7 +3136,7 @@ mod tests { #[cfg(debug_assertions)] mod debug_asserts { - use super::prelude::*; + use crate::prelude::*; // TODO panic when left recursive parser is detected // #[test] @@ -3288,6 +3288,8 @@ mod tests { #[test] fn arc_impl() { + use alloc::sync::Arc; + fn parser<'a>() -> impl Parser<'a, &'a str, Vec> { Arc::new( any() @@ -3352,6 +3354,8 @@ mod tests { #[test] fn rc_impl() { + use alloc::rc::Rc; + fn parser<'a>() -> impl Parser<'a, &'a str, Vec> { Rc::new( any() @@ -3385,13 +3389,13 @@ mod tests { #[derive(Copy, Clone, Debug, PartialEq, Eq)] struct MyErr(&'static str); - impl<'a, I> Error<'a, I> for MyErr + impl<'a, I> crate::Error<'a, I> for MyErr where I: Input<'a>, { - fn expected_found>>>( + fn expected_found>>>( _expected: E, - _found: Option>, + _found: Option>, _span: I::Span, ) -> Self { MyErr("expected found") @@ -3446,4 +3450,115 @@ mod tests { assert_eq!(parser().parse("aaa").into_result().unwrap(), ()); } + + #[test] + #[cfg(feature = "unstable")] + fn cached() { + fn my_parser<'a>() -> impl Parser<'a, &'a str, &'a str, extra::Default> { + any().repeated().exactly(5).slice() + } + + struct MyCache; + + impl crate::cache::Cached for MyCache { + type Input<'src> = &'src str; + type Output<'src> = &'src str; + type Extra<'src> = extra::Default; + + fn make_parser<'src>( + self, + ) -> Boxed<'src, 'src, Self::Input<'src>, Self::Output<'src>, Self::Extra<'src>> + { + Parser::boxed(my_parser()) + } + } + + // usage < definition + { + let parser = crate::cache::Cache::new(MyCache); + + for _ in 0..2 { + let s = "hello".to_string(); + + assert_eq!(parser.get().parse(&s).into_result(), Ok("hello")); + assert!(matches!( + parser.get().parse("goodbye").into_result(), + Err(_) + )); + } + } + + // usage > definition + { + let s = "hello".to_string(); + + for _ in 0..2 { + let parser = crate::cache::Cache::new(MyCache); + + assert_eq!(parser.get().parse(&s).into_result(), Ok("hello")); + assert!(matches!( + parser.get().parse("goodbye").into_result(), + Err(_) + )); + } + } + } +} + +/// Traits and types that allow parsers to be cached between invocations. +#[cfg(feature = "unstable")] +pub mod cache { + use super::*; + + /// Implementing this trait allows you to cache parser for use with multiple inputs. + pub trait Cached { + /// The input type of the parser (for example, `&'a str`). + type Input<'src>: Input<'src>; + + /// The output type of the parser. + type Output<'src>; + + /// The extra type of the parser (see [`extra`]). + type Extra<'src>: ParserExtra<'src, Self::Input<'src>>; + + // /// The type of the parser to be cached. + // type Parser<'a>: Parser<'a, Self::Input<'a>, Self::Output<'a>, Self::Extra<'a>>; + + /// Create an instance of the parser + fn make_parser<'src>( + self, + ) -> Boxed<'src, 'src, Self::Input<'src>, Self::Output<'src>, Self::Extra<'src>>; + } + + /// Allows a parser to be cached for use with inputs and outputs of difference lifetimes. + pub struct Cache { + parser: Boxed<'static, 'static, C::Input<'static>, C::Output<'static>, C::Extra<'static>>, + #[allow(dead_code)] + phantom: EmptyPhantom, + } + + impl Cache { + /// Create a new cached parser. + pub fn new(cacher: C) -> Self { + Self { + parser: cacher.make_parser(), + phantom: EmptyPhantom::new(), + } + } + + /// Get a reference to the cached parser. + /// + /// Because this function is generic over an input lifetime, the + pub fn get<'src>( + &self, + ) -> &Boxed<'src, 'src, C::Input<'src>, C::Output<'src>, C::Extra<'src>> { + // SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary + // lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to + // 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser + // must be valid for arbitrary lifetimes. + unsafe { + &*(&self.parser as *const Boxed, C::Output<'_>, C::Extra<'_>>).cast() + } + } + } } From 60d96cd7339a8084ae081964604a17d303db1081 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Tue, 22 Aug 2023 17:53:17 +0100 Subject: [PATCH 05/64] Added cache docs --- src/cache.rs | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/either.rs | 2 +- src/lib.rs | 73 +++---------------------------------------- src/text.rs | 3 ++ 4 files changed, 95 insertions(+), 69 deletions(-) create mode 100644 src/cache.rs diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 00000000..a1e89e81 --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,86 @@ +//! Traits and types that allow parsers to be cached between invocations. +//! +//! # Example +//! +//! ``` +//! #![feature(lazy_cell)] +//! use std::sync::{LazyLock, Arc}; +//! use chumsky::{prelude::*, cache::{Cache, Cached}}; +//! +//! #[derive(Debug, PartialEq)] +//! enum Token<'a> { Ident(&'a str), Int(u64) } +//! +//! #[derive(Default)] +//! struct TokenParser; +//! impl Cached for TokenParser { +//! type Parser<'a> = Arc, extra::Default> + Send + Sync + 'a>; +//! +//! fn make_parser<'a>(self) -> Self::Parser<'a> { +//! let ident = text::ident().map(Token::Ident); +//! let num = text::int(10).from_str().unwrapped().map(Token::Int); +//! Arc::new(ident.or(num)) +//! } +//! } +//! +//! // The parser cache doesn't have a lifetime and so can be stored pretty much anywhere: +//! static PARSER: LazyLock> = LazyLock::new(Cache::default); +//! +//! // The parser can be used from any context simply by calling `.get()` on the cache +//! assert_eq!(PARSER.get().parse("42").into_result(), Ok(Token::Int(42))); +//! assert_eq!(PARSER.get().parse("hello").into_result(), Ok(Token::Ident("hello"))); +//! ``` + +use super::*; + +/// Implementing this trait allows you to cache parsers for use with inputs of different lifetimes, avoiding the +/// need to recreate the parser for each input lifetime. +pub trait Cached { + /// The type of the parser to be cached. + /// + /// Because parsers tend to have unwieldy types, it is recommended to perform type erasure here. For example, + /// a parser with input type `&'src str` and output type `Token<'src>` might have one of the following types. + /// + /// ```ignore + /// Boxed<'src, 'src, &'src str, Token<'src>, extra::Default> + /// Arc, extra::Default> + Send + Sync + 'src> + /// ``` + type Parser<'src>; + + /// Create an instance of the parser + fn make_parser<'src>(self) -> Self::Parser<'src>; +} + +/// Allows a parser to be cached for use with inputs and outputs of difference lifetimes. +pub struct Cache { + parser: C::Parser<'static>, + #[allow(dead_code)] + phantom: EmptyPhantom, +} + +impl Default for Cache { + fn default() -> Self { + Self::new(C::default()) + } +} + +impl Cache { + /// Create a new cached parser. + pub fn new(cacher: C) -> Self { + Self { + parser: cacher.make_parser(), + phantom: EmptyPhantom::new(), + } + } + + /// Get a reference to the cached parser. + /// + /// Because this function is generic over an input lifetime, the returned parser can be used in many + /// difference contexts. + pub fn get<'src>(&self) -> &C::Parser<'src> { + // SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary + // lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to + // 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser + // must be valid for arbitrary lifetimes. + unsafe { &*(&self.parser as *const C::Parser<'_>).cast() } + } +} diff --git a/src/either.rs b/src/either.rs index b6c89b53..20f59e15 100644 --- a/src/either.rs +++ b/src/either.rs @@ -1,4 +1,4 @@ -//! A small module that implements the [`Parser`](Parser) trait for the +//! A small module that implements the [`Parser`] trait for the //! [`either::Either`](https://docs.rs/either/latest/either/enum.Either.html) type. use either::Either; diff --git a/src/lib.rs b/src/lib.rs index fd4c4b17..dc106bb6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,6 +69,8 @@ macro_rules! go_cfg_extra { } mod blanket; +#[cfg(feature = "unstable")] +pub mod cache; pub mod combinator; pub mod container; #[cfg(feature = "either")] @@ -347,7 +349,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it. fn parse(&self, input: I) -> ParseResult where - Self: Sized, I: Input<'a>, E::State: Default, E::Context: Default, @@ -365,7 +366,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it. fn parse_with_state(&self, input: I, state: &mut E::State) -> ParseResult where - Self: Sized, I: Input<'a>, E::Context: Default, { @@ -3461,14 +3461,9 @@ mod tests { struct MyCache; impl crate::cache::Cached for MyCache { - type Input<'src> = &'src str; - type Output<'src> = &'src str; - type Extra<'src> = extra::Default; - - fn make_parser<'src>( - self, - ) -> Boxed<'src, 'src, Self::Input<'src>, Self::Output<'src>, Self::Extra<'src>> - { + type Parser<'src> = Boxed<'src, 'src, &'src str, &'src str, extra::Default>; + + fn make_parser<'src>(self) -> Self::Parser<'src> { Parser::boxed(my_parser()) } } @@ -3504,61 +3499,3 @@ mod tests { } } } - -/// Traits and types that allow parsers to be cached between invocations. -#[cfg(feature = "unstable")] -pub mod cache { - use super::*; - - /// Implementing this trait allows you to cache parser for use with multiple inputs. - pub trait Cached { - /// The input type of the parser (for example, `&'a str`). - type Input<'src>: Input<'src>; - - /// The output type of the parser. - type Output<'src>; - - /// The extra type of the parser (see [`extra`]). - type Extra<'src>: ParserExtra<'src, Self::Input<'src>>; - - // /// The type of the parser to be cached. - // type Parser<'a>: Parser<'a, Self::Input<'a>, Self::Output<'a>, Self::Extra<'a>>; - - /// Create an instance of the parser - fn make_parser<'src>( - self, - ) -> Boxed<'src, 'src, Self::Input<'src>, Self::Output<'src>, Self::Extra<'src>>; - } - - /// Allows a parser to be cached for use with inputs and outputs of difference lifetimes. - pub struct Cache { - parser: Boxed<'static, 'static, C::Input<'static>, C::Output<'static>, C::Extra<'static>>, - #[allow(dead_code)] - phantom: EmptyPhantom, - } - - impl Cache { - /// Create a new cached parser. - pub fn new(cacher: C) -> Self { - Self { - parser: cacher.make_parser(), - phantom: EmptyPhantom::new(), - } - } - - /// Get a reference to the cached parser. - /// - /// Because this function is generic over an input lifetime, the - pub fn get<'src>( - &self, - ) -> &Boxed<'src, 'src, C::Input<'src>, C::Output<'src>, C::Extra<'src>> { - // SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary - // lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to - // 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser - // must be valid for arbitrary lifetimes. - unsafe { - &*(&self.parser as *const Boxed, C::Output<'_>, C::Extra<'_>>).cast() - } - } - } -} diff --git a/src/text.rs b/src/text.rs index 688609ae..11817f3b 100644 --- a/src/text.rs +++ b/src/text.rs @@ -438,6 +438,9 @@ pub mod ascii { } } +// Unicode is the default +pub use unicode::*; + /// Parsers and utilities for working with unicode inputs. pub mod unicode { use super::*; From a9b67f609d690612278608a27aa71c3d7723bac0 Mon Sep 17 00:00:00 2001 From: stefnotch Date: Sat, 26 Aug 2023 19:57:07 +0200 Subject: [PATCH 06/64] Impl Ord for Offset struct --- src/input.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/input.rs b/src/input.rs index 19c970e8..a41eb31c 100644 --- a/src/input.rs +++ b/src/input.rs @@ -935,12 +935,24 @@ impl<'a, 'parse, I: Input<'a>> Clone for Offset<'a, 'parse, I> { } } +impl<'a, 'parse, I: Input<'a>> Eq for Offset<'a, 'parse, I> {} impl<'a, 'parse, I: Input<'a>> PartialEq for Offset<'a, 'parse, I> { fn eq(&self, other: &Self) -> bool { self.offset == other.offset } } +impl<'a, 'parse, I: Input<'a>> PartialOrd for Offset<'a, 'parse, I> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl<'a, 'parse, I: Input<'a>> Ord for Offset<'a, 'parse, I> { + fn cmp(&self, other: &Self) -> Ordering { + self.offset.cmp(&other.offset) + } +} + pub(crate) struct Errors { pub(crate) alt: Option>, pub(crate) secondary: Vec>, From 56762fe5c965880066a068038a18b5ac07afd75c Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Sun, 27 Aug 2023 18:43:21 +0100 Subject: [PATCH 07/64] Improved docs --- src/cache.rs | 4 ++-- src/lib.rs | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index a1e89e81..4d1b798a 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -50,7 +50,7 @@ pub trait Cached { fn make_parser<'src>(self) -> Self::Parser<'src>; } -/// Allows a parser to be cached for use with inputs and outputs of difference lifetimes. +/// Allows a parser to be cached for reuse with inputs and outputs of different lifetimes. pub struct Cache { parser: C::Parser<'static>, #[allow(dead_code)] @@ -75,7 +75,7 @@ impl Cache { /// Get a reference to the cached parser. /// /// Because this function is generic over an input lifetime, the returned parser can be used in many - /// difference contexts. + /// different contexts. pub fn get<'src>(&self) -> &C::Parser<'src> { // SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary // lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to diff --git a/src/lib.rs b/src/lib.rs index dc106bb6..666ee1b0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1420,9 +1420,10 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// Parse a pattern zero or more times (analog to Regex's `*`). /// /// Input is eagerly parsed. Be aware that the parser will accept no occurrences of the pattern too. Consider using - /// [`Repeated::at_least`] instead if it better suits your use-case. + /// [`Repeated::at_least`] instead if you wish to parse a minimum number of elements. /// - /// The output type of this parser can be any [`Container`]. + /// The output type of this parser is, by default, `()`. If you want to collect the items into a [`Container`] + /// (such as a [`Vec`]), use [`IterParser::collect`]. /// /// # Examples /// @@ -2729,7 +2730,7 @@ where /// Internally, [`select!`] is very similar to [`Parser::try_map`] and thinking of it as such might make it less /// confusing. /// -/// `select!` requires that tokens implement [`Clone`]. +/// `select!` requires that tokens implement [`Clone`] and the input type implements [`ValueInput`]. /// /// If you're trying to access tokens referentially (for the sake of nested parsing, or simply because you want to /// avoid cloning the token), see [`select_ref!`]. From 652353df6dbd955344c6c2b8f869f7a21962c2c7 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Fri, 1 Sep 2023 10:42:41 +0100 Subject: [PATCH 08/64] Added Span::to_end, Span::union --- src/lib.rs | 31 +++++++++++++++++++------------ src/span.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 666ee1b0..b4606034 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -219,9 +219,14 @@ mod sync { use sync::{DynParser, MaybeSync, RefC, RefW}; -/// The result of running a [`Parser`]. Can be converted into a [`Result`] via -/// [`ParseResult::into_result`] for when you only care about success or failure, or into distinct -/// error and output via [`ParseResult::into_output_errors`] +/// The result of performing a parse on an input with [`Parser`]. +/// +/// Unlike `Result`, this type is designed to express the fact that generating outputs and errors are not +/// mutually-exclusive operations: it is possible for a parse to produce non-terminal errors (see +/// [`Parse::recover_with`] while still producing useful output). +/// +/// If you don't care for recovered outputs and you with to treat success/failure as a binary, you may use +/// [`ParseResult::into_result`]. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ParseResult { output: Option, @@ -248,8 +253,9 @@ impl ParseResult { self.output.as_ref() } - /// Get a slice containing the parse errors for this result. The slice will be empty if there are no errors. - pub fn errors(&self) -> impl ExactSizeIterator { + /// Get an iterator over the parse errors for this result. The iterator will produce no items if there were no + /// errors. + pub fn errors(&self) -> impl ExactSizeIterator + DoubleEndedIterator { self.errs.iter() } @@ -280,21 +286,22 @@ impl ParseResult { } } - /// If the parse succeeded (i.e: no errors were produced), this function returns the output value, `T`. + /// Convert this `ParseResult` into the output. If any errors were generated (including non-fatal errors!), a + /// panic will occur instead. /// - /// If parsing generated errors, this function panics (even if these errors were non-fatal). + /// The use of this function is discouraged in user-facing code. However, it may be convenient for use in tests. #[track_caller] pub fn unwrap(self) -> T where E: fmt::Debug, { - if self.errs.is_empty() { - self.output.expect("parser generated no errors or output") - } else { + if self.has_errors() { panic!( - "called `ParseResult::unwrap()` on a parse result with errors: {:?}", - self.errs + "called `ParseResult::unwrap` on a parse result containing errors: {:?}", + &self.errs ) + } else { + self.output.expect("parser generated no errors or output") } } } diff --git a/src/span.rs b/src/span.rs index dfa6cdb1..3b73cf2e 100644 --- a/src/span.rs +++ b/src/span.rs @@ -43,6 +43,46 @@ pub trait Span { /// Return the end offset of the span. fn end(&self) -> Self::Offset; + + /// Turn this span into a zero-width span that starts and ends at the end of the original. + /// + /// For example, an original span like `3..7` will result in a new span of `7..7`. + /// + /// This may be convenient in various circumstances, such as when specifying the 'end of input' span in + /// [`Input::spanned`]. + fn to_end(&self) -> Self + where + Self: Sized, + { + Self::new(self.context(), self.end()..self.end()) + } + + /// Combine two assumed-contiguous spans together into a larger span that encompasses both (and anything between). + /// + /// For example, spans like `3..5` and `7..8` will result in a unioned span of `3..8`. + /// + /// The spans may overlap one-another, but the start offset must come before the end offset for each span (i.e: + /// each span must be 'well-formed'). If this is not the case, the result is unspecified. + /// + /// # Panics + /// + /// Panics if the [`Self::Context`]s of both spans are not equal. + fn union(&self, other: Self) -> Self + where + Self::Context: PartialEq + fmt::Debug, + Self::Offset: Ord, + Self: Sized, + { + assert_eq!( + self.context(), + other.context(), + "tried to union two spans with different contexts" + ); + Self::new( + self.context(), + self.start().min(other.start())..self.end().max(other.end()), + ) + } } /// The most basic implementor of `Span` - akin to `Range`, but `Copy` since it's not also From 97c7f68dbbbcd9680579136d184e8d7a03e6e60c Mon Sep 17 00:00:00 2001 From: wackbyte Date: Wed, 13 Sep 2023 15:21:10 -0400 Subject: [PATCH 09/64] Bind span earlier in `select{_ref}!` so that it may be used in the `if` guard --- src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b4606034..e71cf4f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2837,8 +2837,8 @@ where macro_rules! select { ($($p:pat $(= $span:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ $crate::primitive::select( - move |x, span| match x { - $($p $(if $guard)? => ::core::option::Option::Some({ $(let $span = span;)? () $(;$out)? })),+, + move |x, span| match (x, span) { + $(($p $(,$span)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+, _ => ::core::option::Option::None, } ) @@ -2854,8 +2854,8 @@ macro_rules! select { macro_rules! select_ref { ($($p:pat $(= $span:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ $crate::primitive::select_ref( - move |x, span| match x { - $($p $(if $guard)? => ::core::option::Option::Some({ $(let $span = span;)? () $(;$out)? })),+, + move |x, span| match (x, span) { + $(($p $(,$span)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+, _ => ::core::option::Option::None, } ) From b4c857db320f404b78800b3f8a71bb064f0f4bd0 Mon Sep 17 00:00:00 2001 From: Simon Guest Date: Wed, 20 Sep 2023 16:51:35 +1200 Subject: [PATCH 10/64] SkipUntil and SkipThenRetryUntil only need Input Removing ValueInput constraint allows to use these for e.g. BorrowInput. --- src/recovery.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/recovery.rs b/src/recovery.rs index d8a9bc89..784aace8 100644 --- a/src/recovery.rs +++ b/src/recovery.rs @@ -98,7 +98,7 @@ pub struct SkipThenRetryUntil { impl Sealed for SkipThenRetryUntil {} impl<'a, I, O, E, S, U> Strategy<'a, I, O, E> for SkipThenRetryUntil where - I: ValueInput<'a>, + I: Input<'a>, S: Parser<'a, I, (), E>, U: Parser<'a, I, (), E>, E: ParserExtra<'a, I>, @@ -157,7 +157,7 @@ pub struct SkipUntil { impl Sealed for SkipUntil {} impl<'a, I, O, E, S, U, F> Strategy<'a, I, O, E> for SkipUntil where - I: ValueInput<'a>, + I: Input<'a>, S: Parser<'a, I, (), E>, U: Parser<'a, I, (), E>, F: Fn() -> O, From 7718eed763e4c7850eba443ae61a8e5f6b7ef6a7 Mon Sep 17 00:00:00 2001 From: Simon Guest Date: Wed, 20 Sep 2023 16:58:11 +1200 Subject: [PATCH 11/64] Add any_ref() Works just like any() but for BorrowInput rather than ValueInput. --- src/lib.rs | 4 +++- src/primitive.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index e71cf4f6..06d60be3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,7 +114,9 @@ pub mod prelude { error::{Cheap, EmptyErr, Error as _, Rich, Simple}, extra, input::Input, - primitive::{any, choice, custom, empty, end, group, just, map_ctx, none_of, one_of, todo}, + primitive::{ + any, any_ref, choice, custom, empty, end, group, just, map_ctx, none_of, one_of, todo, + }, recovery::{nested_delimiters, skip_then_retry_until, skip_until, via_parser}, recursive::{recursive, Recursive}, span::{SimpleSpan, Span as _}, diff --git a/src/primitive.rs b/src/primitive.rs index 65dba9c1..64201de4 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -591,6 +591,62 @@ pub const fn any<'a, I: Input<'a>, E: ParserExtra<'a, I>>() -> Any { } } +/// See [`any_ref`]. +pub struct AnyRef { + #[allow(dead_code)] + phantom: EmptyPhantom<(E, I)>, +} + +impl Copy for AnyRef {} +impl Clone for AnyRef { + fn clone(&self) -> Self { + *self + } +} + +impl<'a, I, E> ParserSealed<'a, I, &'a I::Token, E> for AnyRef +where + I: BorrowInput<'a>, + E: ParserExtra<'a, I>, +{ + #[inline] + fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { + let before = inp.offset(); + match inp.next_ref_inner() { + (_, Some(tok)) => Ok(M::bind(|| tok)), + (at, found) => { + let err_span = inp.span_since(before); + inp.add_alt(at, None, found.map(|f| f.into()), err_span); + Err(()) + } + } + } + + go_extra!(&'a I::Token); +} + +/// A parser that accepts any input (but not the end of input). +/// +/// The output type of this parser is `I`, the input that was found. +/// +/// # Examples +/// +/// ``` +/// # use chumsky::{prelude::*, error::Simple}; +/// let any_ref_0 = any_ref::<_, extra::Err>>(); +/// let any_ref_1 = any_ref::<_, extra::Err>>(); +/// +/// assert_eq!(any_ref_1.parse(&['a'; 1]).into_result(), Ok(&'a')); +/// assert_eq!(any_ref_1.parse(&['7'; 1]).into_result(), Ok(&'7')); +/// assert_eq!(any_ref_1.parse(&['\t'; 1]).into_result(), Ok(&'\t')); +/// assert!(any_ref_0.parse(&[]).has_errors()); +/// ``` +pub const fn any_ref<'a, I: BorrowInput<'a>, E: ParserExtra<'a, I>>() -> AnyRef { + AnyRef { + phantom: EmptyPhantom::new(), + } +} + /// See [`map_ctx`]. pub struct MapCtx { pub(crate) parser: A, From 38a6e47cadc358b6fdc62c53728a29c090852d81 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 20 Sep 2023 12:58:24 +0100 Subject: [PATCH 12/64] Appease clippy --- src/private.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/private.rs b/src/private.rs index 2907c11f..4c2182f5 100644 --- a/src/private.rs +++ b/src/private.rs @@ -288,9 +288,7 @@ impl MaybeUninitExt for MaybeUninit { } unsafe fn array_assume_init(uninit: [Self; N]) -> [T; N] { - let out = (&uninit as *const [Self; N] as *const [T; N]).read(); - core::mem::forget(uninit); - out + (&uninit as *const [Self; N] as *const [T; N]).read() } } From 325a98642ee469de637429ab82f936cbeabda657 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 20 Sep 2023 13:36:24 +0100 Subject: [PATCH 13/64] Fix doc links --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index e71cf4f6..7060fc99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -223,7 +223,7 @@ use sync::{DynParser, MaybeSync, RefC, RefW}; /// /// Unlike `Result`, this type is designed to express the fact that generating outputs and errors are not /// mutually-exclusive operations: it is possible for a parse to produce non-terminal errors (see -/// [`Parse::recover_with`] while still producing useful output). +/// [`Parser::recover_with`] while still producing useful output). /// /// If you don't care for recovered outputs and you with to treat success/failure as a binary, you may use /// [`ParseResult::into_result`]. From 0f146c8e88908b7ee664a9b8e71c05349b2144b3 Mon Sep 17 00:00:00 2001 From: Simon Guest Date: Thu, 21 Sep 2023 09:39:32 +1200 Subject: [PATCH 14/64] Fix doc for any_ref --- src/primitive.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/primitive.rs b/src/primitive.rs index 64201de4..798c5914 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -627,7 +627,9 @@ where /// A parser that accepts any input (but not the end of input). /// -/// The output type of this parser is `I`, the input that was found. +/// The output type of this parser is `&'a I::Token`, the input that was found. +/// +/// This function is the borrowing equivalent of [any]. Where possible, it's recommended to use [any] instead. /// /// # Examples /// From f5a78d243b8377092c5639d4d7e7bf3ca77d6d50 Mon Sep 17 00:00:00 2001 From: Simon Guest Date: Thu, 21 Sep 2023 09:42:20 +1200 Subject: [PATCH 15/64] Tiny fix to doc comment for any --- src/primitive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/primitive.rs b/src/primitive.rs index 65dba9c1..77c61164 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -572,7 +572,7 @@ where /// A parser that accepts any input (but not the end of input). /// -/// The output type of this parser is `I`, the input that was found. +/// The output type of this parser is `I::Token`, the input that was found. /// /// # Examples /// From 78512d9265b639507fc64ba7082478940e436c71 Mon Sep 17 00:00:00 2001 From: Jess Date: Wed, 6 Sep 2023 09:53:19 -0600 Subject: [PATCH 16/64] Add map_with_ctx --- src/combinator.rs | 68 +++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 41 ++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/src/combinator.rs b/src/combinator.rs index cb2cf24d..2f4e45db 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -320,6 +320,74 @@ where go_extra!(O); } +/// See [`Parser::map_with_ctx`]. +pub struct MapWithContext { + pub(crate) parser: A, + pub(crate) mapper: F, + #[allow(dead_code)] + pub(crate) phantom: EmptyPhantom, +} + +impl Copy for MapWithContext {} +impl Clone for MapWithContext { + fn clone(&self) -> Self { + Self { + parser: self.parser.clone(), + mapper: self.mapper.clone(), + phantom: EmptyPhantom::new(), + } + } +} + +impl<'a, I, O, E, A, OA, F> ParserSealed<'a, I, O, E> for MapWithContext +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, OA, E>, + F: Fn(OA, &E::Context) -> O, +{ + #[inline(always)] + fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { + let out = self.parser.go::(inp)?; + Ok(M::map(out, move |x| (self.mapper)(x, inp.ctx))) + } + + go_extra!(O); +} + +impl<'a, I, O, E, A, OA, F> IterParserSealed<'a, I, O, E> for MapWithContext +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: IterParser<'a, I, OA, E>, + F: Fn(OA, &E::Context) -> O, +{ + type IterState = A::IterState + where + I: 'a; + + #[inline(always)] + fn make_iter( + &self, + inp: &mut InputRef<'a, '_, I, E>, + ) -> PResult> { + self.parser.make_iter(inp) + } + + #[inline(always)] + fn next( + &self, + inp: &mut InputRef<'a, '_, I, E>, + state: &mut Self::IterState, + ) -> IPResult { + match self.parser.next::(inp, state) { + Ok(Some(o)) => Ok(Some(M::map(o, move |x| (self.mapper)(x, inp.ctx)))), + Ok(None) => Ok(None), + Err(()) => Err(()), + } + } +} + /// See [`Parser::map`]. pub struct Map { pub(crate) parser: A, diff --git a/src/lib.rs b/src/lib.rs index 33489920..c5fd11b0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -534,6 +534,47 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: } } + /// Works the same as [`Parser::map`], but the second argument for the mapper F is the parser's + /// current context. + /// + /// Primarily used to modify existing context using the result of a parser, before passing to a + /// `*_with_ctx` method. Also useful when the output of a parser is dependent on the currenct + /// context. + /// + /// # Examples + /// + /// ``` + /// # use chumsky::{prelude::*, error::Simple}; + /// + /// fn palindrome_parser<'a>() -> impl Parser<'a, &'a str, String> { + /// recursive(|chain| { + /// choice(( + /// just(String::new()) + /// .configure(|cfg, ctx: &String| cfg.seq(ctx.clone())) + /// .then_ignore(end()), + /// any() + /// .map_with_ctx(|x, ctx| format!("{x}{ctx}")) + /// .ignore_with_ctx(chain), + /// )) + /// }) + /// .with_ctx(String::new()) + /// } + /// + /// assert_eq!(palindrome_parser().parse("abccba").into_result().as_deref(), Ok("cba")); + /// assert_eq!(palindrome_parser().parse("hello olleh").into_result().as_deref(), Ok(" olleh")); + /// assert!(palindrome_parser().parse("abccb").into_result().is_err()); + /// ``` + fn map_with_ctx U>(self, f: F) -> MapWithContext + where + Self: Sized, + { + MapWithContext { + parser: self, + mapper: f, + phantom: EmptyPhantom::new(), + } + } + /// Map the output of this parser to another value. /// If the output of this parser isn't a tuple, use [`Parser::map`]. /// From a7ecae426e40ac84bbf159a27cebae5d7c52cec1 Mon Sep 17 00:00:00 2001 From: stefnotch Date: Tue, 26 Sep 2023 21:35:06 +0200 Subject: [PATCH 17/64] Only enable vergen on docsrs --- Cargo.toml | 5 ++++- build.rs | 12 ++++++++++++ src/lib.rs | 11 +++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 35c1968e..72a28abc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,6 +59,9 @@ regex = ["dep:regex-automata"] # Enable serde serialization support serde = ["dep:serde"] +# Enable dependencies only needed for generation of documentation on docs.rs +docsrs = ["dep:vergen"] + # An alias of all features that work with the stable compiler. # Do not use this feature, its removal is not considered a breaking change and its behaviour may change. # If you're working on chumsky and you're adding a feature that does not require nightly support, please add it to this list. @@ -94,7 +97,7 @@ lasso = "0.7" slotmap = "1.0" [build-dependencies] -vergen = { version = "=8.1.1", features = ["git", "gitoxide"] } +vergen = { version = "=8.1.1", optional = true, features = ["git", "gitoxide"] } [target.'cfg(unix)'.dev-dependencies] pprof = { version = "0.11", features = ["flamegraph", "criterion"] } diff --git a/build.rs b/build.rs index 35aa44a6..fecd361b 100644 --- a/build.rs +++ b/build.rs @@ -1,8 +1,20 @@ use std::error::Error; +#[cfg(feature = "docsrs")] use vergen::EmitBuilder; fn main() -> Result<(), Box> { + emit_git_metadata()?; + Ok(()) +} + +#[cfg(feature = "docsrs")] +fn emit_git_metadata() -> Result<(), Box> { // Emit the instructions EmitBuilder::builder().all_git().emit()?; Ok(()) } + +#[cfg(not(feature = "docsrs"))] +fn emit_git_metadata() -> Result<(), Box> { + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index c5fd11b0..0c718e9e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,6 +25,7 @@ extern crate alloc; extern crate core; +#[cfg(feature = "docsrs")] macro_rules! blob_url_prefix { () => { concat!( @@ -34,6 +35,16 @@ macro_rules! blob_url_prefix { }; } +#[cfg(not(feature = "docsrs"))] +macro_rules! blob_url_prefix { + () => { + concat!( + "https://github.com/zesterer/chumsky/blob/", + env!("CARGO_PKG_VERSION") + ) + }; +} + macro_rules! go_extra { ( $O :ty ) => { #[inline(always)] From 87ff69ac7ed1b6e01e0bab4bdd7e994b01e7f5a2 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 27 Sep 2023 20:37:55 +0100 Subject: [PATCH 18/64] Tie map_ctx ParserExtra parameters together --- src/primitive.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/primitive.rs b/src/primitive.rs index 955cd230..fc6c9ce0 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -650,24 +650,25 @@ pub const fn any_ref<'a, I: BorrowInput<'a>, E: ParserExtra<'a, I>>() -> AnyRef< } /// See [`map_ctx`]. -pub struct MapCtx { +pub struct MapCtx { pub(crate) parser: A, pub(crate) mapper: F, - pub(crate) extra_phantom: PhantomData, + #[allow(dead_code)] + pub(crate) phantom: EmptyPhantom<(AE, E)>, } -impl Copy for MapCtx {} -impl Clone for MapCtx { +impl Copy for MapCtx {} +impl Clone for MapCtx { fn clone(&self) -> Self { MapCtx { parser: self.parser.clone(), mapper: self.mapper.clone(), - extra_phantom: PhantomData, + phantom: EmptyPhantom::new(), } } } -impl<'a, I, O, E, EI, A, F> ParserSealed<'a, I, O, E> for MapCtx +impl<'a, I, O, E, EI, A, F> ParserSealed<'a, I, O, E> for MapCtx where I: Input<'a>, E: ParserExtra<'a, I>, @@ -737,7 +738,7 @@ where /// } /// assert!(!specific_usize(10).parse("10").has_errors()); /// ``` -pub const fn map_ctx<'a, P, OP, I, E, EP, F>(mapper: F, parser: P) -> MapCtx +pub const fn map_ctx<'a, P, OP, I, E, EP, F>(mapper: F, parser: P) -> MapCtx where F: Fn(&E::Context) -> EP::Context, I: Input<'a>, @@ -749,7 +750,7 @@ where MapCtx { parser, mapper, - extra_phantom: PhantomData, + phantom: EmptyPhantom::new(), } } From d637d35fe0c539c151885d47e0571c0a6bbb325e Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Thu, 28 Sep 2023 10:28:45 +0100 Subject: [PATCH 19/64] Simplify example --- examples/nested_spans.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/nested_spans.rs b/examples/nested_spans.rs index fa83c034..6371dd9f 100644 --- a/examples/nested_spans.rs +++ b/examples/nested_spans.rs @@ -17,7 +17,7 @@ fn parser<'a>() -> impl Parser<'a, TokenTreeInput<'a>, i64> { let parens = expr // Here we specify how the parser should come up with the nested tokens .nested_in(select_ref! { - Token::Parens(xs) = span => xs.as_slice().spanned(SimpleSpan::splat(SimpleSpan::end(&span))), + Token::Parens(xs) = span => xs.as_slice().spanned(SimpleSpan::to_end(&span)), }); let atom = num.or(parens); From 920daef0a82b3997d1b4dd82e37b78ce9bce3474 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Sat, 7 Oct 2023 10:28:04 +0100 Subject: [PATCH 20/64] Create guide README.md --- guide/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 guide/README.md diff --git a/guide/README.md b/guide/README.md new file mode 100644 index 00000000..810fb01c --- /dev/null +++ b/guide/README.md @@ -0,0 +1,13 @@ +# Guide + +Chumsky's guide is intended to be viewed through [docs.rs](https://docs.rs/chumsky/1.0.0-alpha.4/chumsky/guide/index.html). + +## For contributors + +When modifying the guide, please remember to test the docs via rustdoc. You can do this via this command: + +``` +RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features +``` + +Appending `--open` will cause the docs to open in your web browser when built. From af231f25a505703be7e7cf4597fa3e72dc3fd210 Mon Sep 17 00:00:00 2001 From: zyansheep Date: Sat, 7 Oct 2023 15:53:24 -0400 Subject: [PATCH 21/64] add map_with_state and improve select! --- src/combinator.rs | 57 +++++++++++++++++++++++++++++++++++++++++++++++ src/error.rs | 2 ++ src/label.rs | 10 +++++++++ src/lib.rs | 29 +++++++++++++++++++----- src/primitive.rs | 15 ++++++++----- 5 files changed, 101 insertions(+), 12 deletions(-) diff --git a/src/combinator.rs b/src/combinator.rs index cb2cf24d..6de595c6 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -185,6 +185,63 @@ where } } +/// See [`Parser::map_with_slice`]. +pub struct MapWithSlice<'a, A, I, O, E, F, U> +where + I: SliceInput<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, O, E>, + F: Fn(O, I::Slice) -> U, +{ + pub(crate) parser: A, + pub(crate) mapper: F, + #[allow(dead_code)] + pub(crate) phantom: EmptyPhantom<(I::Slice, O, E)>, +} + +impl<'a, A: Copy, I, O, E, F: Copy, U> Copy for MapWithSlice<'a, A, I, O, E, F, U> +where + I: SliceInput<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, O, E>, + F: Fn(O, I::Slice) -> U, +{ +} +impl<'a, A: Clone, I, O, E, F: Clone, U> Clone for MapWithSlice<'a, A, I, O, E, F, U> +where + I: Input<'a> + SliceInput<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, O, E>, + F: Fn(O, I::Slice) -> U, +{ + fn clone(&self) -> Self { + Self { + parser: self.parser.clone(), + mapper: self.mapper.clone(), + phantom: EmptyPhantom::new(), + } + } +} + +impl<'a, I, O, E, A, F, U> ParserSealed<'a, I, U, E> for MapWithSlice<'a, A, I, O, E, F, U> +where + I: SliceInput<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, O, E>, + F: Fn(O, I::Slice) -> U, +{ + #[inline(always)] + fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { + let before = inp.offset().offset; + let gen = self.parser.go::(inp)?; + let after = inp.offset().offset; + + Ok(M::bind(|| (self.mapper)(gen, inp.slice_inner(before..after)))) + } + + go_extra!(U); +} + /// See [`Parser::map_slice`]. pub struct MapSlice<'a, A, I, O, E, F, U> where diff --git a/src/error.rs b/src/error.rs index b08584c3..2120768c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -777,7 +777,9 @@ where #[inline] fn in_context(&mut self, label: L, span: I::Span) { + // label is not yet in context if self.context.iter().all(|(l, _)| l != &label) { + // add it to context self.context.push((label, span)); } } diff --git a/src/label.rs b/src/label.rs index d0879bfc..e4bee920 100644 --- a/src/label.rs +++ b/src/label.rs @@ -48,15 +48,23 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { + // save potential alternate errors generated before this parser has run let old_alt = inp.errors.alt.take(); + // note position in source let before = inp.save(); + // run labelled parser let res = self.parser.go::(inp); // TODO: Label secondary errors too? + + // note any alternate errors generated by labelled parser let new_alt = inp.errors.alt.take(); + // reset input's alternate error back to old one inp.errors.alt = old_alt; + // if we have any new alt errors generated if let Some(mut new_alt) = new_alt { + let before_next = before.offset.into() + 1; if new_alt.pos.into() == before_next { new_alt.err.label_with(self.label.clone()); @@ -65,6 +73,8 @@ where let span = unsafe { inp.input.span(before.offset..new_alt.pos) }; new_alt.err.in_context(self.label.clone(), span); } + // new_alt.err.label_with(self.label.clone()); + // record new_alt as alternative error inp.add_alt_err(new_alt.pos, new_alt.err); } diff --git a/src/lib.rs b/src/lib.rs index e71cf4f6..9608e9d5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -436,10 +436,27 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: ParseResult::new(out, errs) } + /// Map from the output of the current parser and a slice of the input based on the current parser's span to a value. + /// + /// The returned value may borrow data from the input slice, making this function very useful + /// for creating zero-copy AST output values + fn map_with_slice U>(self, f: F) -> MapWithSlice<'a, Self, I, O, E, F, U> + where + Self: Sized, + I: SliceInput<'a>, + { + MapWithSlice { + parser: self, + mapper: f, + phantom: EmptyPhantom::new(), + } + } + /// Map from a slice of the input based on the current parser's span to a value. /// /// The returned value may borrow data from the input slice, making this function very useful /// for creating zero-copy AST output values + /// This is effectively a special case of [`map_with_slice`](Parser::map_with_slice)`(|o, s|f(s))` fn map_slice U>(self, f: F) -> MapSlice<'a, Self, I, O, E, F, U> where Self: Sized, @@ -2835,10 +2852,10 @@ where /// ``` #[macro_export] macro_rules! select { - ($($p:pat $(= $span:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ + ($($p:pat $(= $span:ident)? $(, $state:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ $crate::primitive::select( - move |x, span| match (x, span) { - $(($p $(,$span)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+, + move |x, span, state| match (x, span, state) { + $(($p $(,$span)? $(,$state)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+, _ => ::core::option::Option::None, } ) @@ -2852,10 +2869,10 @@ macro_rules! select { /// `select_ref` requires that the parser input implements [`BorrowInput`]. #[macro_export] macro_rules! select_ref { - ($($p:pat $(= $span:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ + ($($p:pat $(= $span:ident)? $(, $state:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ $crate::primitive::select_ref( - move |x, span| match (x, span) { - $(($p $(,$span)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+, + move |x, span, state| match (x, span, state) { + $(($p $(,$span)? $(,$state)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+, _ => ::core::option::Option::None, } ) diff --git a/src/primitive.rs b/src/primitive.rs index 65dba9c1..6b845345 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -444,7 +444,7 @@ where I: Input<'a>, I::Token: Clone + 'a, E: ParserExtra<'a, I>, - F: Fn(I::Token, I::Span) -> Option, + F: Fn(I::Token, I::Span, &mut E::State) -> Option, { Select { filter, @@ -457,15 +457,17 @@ where I: ValueInput<'a>, I::Token: Clone + 'a, E: ParserExtra<'a, I>, - F: Fn(I::Token, I::Span) -> Option, + F: Fn(I::Token, I::Span, &mut E::State) -> Option, { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { let before = inp.offset(); let next = inp.next_inner(); let err_span = inp.span_since(before); + let span_since = inp.span_since(before); + let state = inp.state(); let (at, found) = match next { - (at, Some(tok)) => match (self.filter)(tok.clone(), inp.span_since(before)) { + (at, Some(tok)) => match (self.filter)(tok.clone(), span_since, state) { Some(out) => return Ok(M::bind(|| out)), None => (at, Some(tok.into())), }, @@ -501,7 +503,7 @@ where I: BorrowInput<'a>, I::Token: 'a, E: ParserExtra<'a, I>, - F: Fn(&'a I::Token, I::Span) -> Option, + F: Fn(&'a I::Token, I::Span, &mut E::State) -> Option, { SelectRef { filter, @@ -514,7 +516,7 @@ where I: BorrowInput<'a>, I::Token: 'a, E: ParserExtra<'a, I>, - F: Fn(&'a I::Token, I::Span) -> Option, + F: Fn(&'a I::Token, I::Span, &mut E::State) -> Option, { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { @@ -522,8 +524,9 @@ where let next = inp.next_ref_inner(); let span = inp.span_since(before); let err_span = inp.span_since(before); + let state = inp.state(); let (at, found) = match next { - (at, Some(tok)) => match (self.filter)(tok, span) { + (at, Some(tok)) => match (self.filter)(tok, span, state) { Some(out) => return Ok(M::bind(|| out)), None => (at, Some(tok.into())), }, From 062b39fdd1b1ee3e46f5a25b72d963a9b8c33621 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Sun, 8 Oct 2023 21:16:12 +0100 Subject: [PATCH 22/64] Added map_with and try_map_with to replace existing mapper functions --- examples/json.rs | 9 ++- examples/zero-copy.rs | 8 +- src/combinator.rs | 183 +++++++++++++++++++++++++++++++++++++++--- src/input.rs | 9 +++ src/lib.rs | 106 ++++++++++++++++++------ src/primitive.rs | 6 +- src/recovery.rs | 2 +- src/text.rs | 12 +-- 8 files changed, 282 insertions(+), 53 deletions(-) diff --git a/examples/json.rs b/examples/json.rs index b91044fe..bfe96b85 100644 --- a/examples/json.rs +++ b/examples/json.rs @@ -19,7 +19,7 @@ enum Json { fn parser<'a>() -> impl Parser<'a, &'a str, Json, extra::Err>> { recursive(|value| { - let digits = text::digits(10).slice(); + let digits = text::digits(10).to_slice(); let frac = just('.').then(digits.clone()); @@ -33,7 +33,8 @@ fn parser<'a>() -> impl Parser<'a, &'a str, Json, extra::Err>> { .then(text::int(10)) .then(frac.or_not()) .then(exp.or_not()) - .map_slice(|s: &str| s.parse().unwrap()) + .to_slice() + .map(|s: &str| s.parse().unwrap()) .boxed(); let escape = just('\\') @@ -46,7 +47,7 @@ fn parser<'a>() -> impl Parser<'a, &'a str, Json, extra::Err>> { just('n').to('\n'), just('r').to('\r'), just('t').to('\t'), - just('u').ignore_then(text::digits(16).exactly(4).slice().validate( + just('u').ignore_then(text::digits(16).exactly(4).to_slice().validate( |digits, span, emitter| { char::from_u32(u32::from_str_radix(digits, 16).unwrap()).unwrap_or_else( || { @@ -64,7 +65,7 @@ fn parser<'a>() -> impl Parser<'a, &'a str, Json, extra::Err>> { .ignored() .or(escape) .repeated() - .slice() + .to_slice() .map(ToString::to_string) .delimited_by(just('"'), just('"')) .boxed(); diff --git a/examples/zero-copy.rs b/examples/zero-copy.rs index e92f7513..dc9a8ea1 100644 --- a/examples/zero-copy.rs +++ b/examples/zero-copy.rs @@ -12,16 +12,18 @@ fn parser<'a>() -> impl Parser<'a, &'a str, [(SimpleSpan, Token<'a>); 6]> .filter(|c: &char| c.is_alphanumeric()) .repeated() .at_least(1) - .map_slice(Token::Ident); + .to_slice() + .map(Token::Ident); let string = just('"') .then(any().filter(|c: &char| *c != '"').repeated()) .then(just('"')) - .map_slice(Token::String); + .to_slice() + .map(Token::String); ident .or(string) - .map_with_span(|token, span| (span, token)) + .map_with(|token, e| (e.span(), token)) .padded() .repeated() .collect_exactly() diff --git a/src/combinator.rs b/src/combinator.rs index 2f4e45db..328a2ad8 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -185,6 +185,7 @@ where } } +/* /// See [`Parser::map_slice`]. pub struct MapSlice<'a, A, I, O, E, F, U> where @@ -232,34 +233,34 @@ where { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset().offset; + let before = inp.offset(); self.parser.go::(inp)?; - let after = inp.offset().offset; - Ok(M::bind(|| (self.mapper)(inp.slice_inner(before..after)))) + Ok(M::bind(|| (self.mapper)(inp.slice_since(before..)))) } go_extra!(U); } +*/ /// See [`Parser::slice`] -pub struct Slice { +pub struct ToSlice { pub(crate) parser: A, #[allow(dead_code)] pub(crate) phantom: EmptyPhantom, } -impl Copy for Slice {} -impl Clone for Slice { +impl Copy for ToSlice {} +impl Clone for ToSlice { fn clone(&self) -> Self { - Slice { + Self { parser: self.parser.clone(), phantom: EmptyPhantom::new(), } } } -impl<'a, A, I, O, E> ParserSealed<'a, I, I::Slice, E> for Slice +impl<'a, A, I, O, E> ParserSealed<'a, I, I::Slice, E> for ToSlice where A: Parser<'a, I, O, E>, I: SliceInput<'a>, @@ -270,11 +271,10 @@ where where Self: Sized, { - let before = inp.offset().offset; + let before = inp.offset(); self.parser.go::(inp)?; - let after = inp.offset().offset; - Ok(M::bind(|| inp.slice_inner(before..after))) + Ok(M::bind(|| inp.slice_since(before..))) } go_extra!(I::Slice); @@ -320,6 +320,7 @@ where go_extra!(O); } +/* /// See [`Parser::map_with_ctx`]. pub struct MapWithContext { pub(crate) parser: A, @@ -387,6 +388,7 @@ where } } } +*/ /// See [`Parser::map`]. pub struct Map { @@ -449,7 +451,116 @@ where state: &mut Self::IterState, ) -> IPResult { match self.parser.next::(inp, state) { - Ok(Some(o)) => Ok(Some(M::map(o, |o| (self.mapper)(o)))), + Ok(Some(o)) => Ok(Some(M::map(o, &self.mapper))), + Ok(None) => Ok(None), + Err(()) => Err(()), + } + } +} + +/// See [`Parser::map_with`]. +pub struct MapExtra<'a, 'b, 'inv, I: Input<'a>, E: ParserExtra<'a, I>> { + before: Offset<'a, 'inv, I>, + inp: &'b mut InputRef<'a, 'inv, I, E>, +} + +impl<'a, 'b, 'inv, I: Input<'a>, E: ParserExtra<'a, I>> MapExtra<'a, 'b, 'inv, I, E> { + /// Get the span corresponding to the output. + #[inline(always)] + pub fn span(&self) -> I::Span { + self.inp.span_since(self.before) + } + + /// Get the slice corresponding to the output. + #[inline(always)] + pub fn slice(&self) -> I::Slice + where + I: SliceInput<'a>, + { + self.inp.slice_since(self.before..) + } + + /// Get the parser state. + #[inline(always)] + pub fn state(&mut self) -> &mut E::State { + self.inp.state() + } + + /// Get the current parser context. + #[inline(always)] + pub fn ctx(&self) -> &E::Context { + self.inp.ctx() + } +} + +/// See [`Parser::map_with`]. +pub struct MapWith { + pub(crate) parser: A, + pub(crate) mapper: F, + #[allow(dead_code)] + pub(crate) phantom: EmptyPhantom, +} + +impl Copy for MapWith {} +impl Clone for MapWith { + fn clone(&self) -> Self { + Self { + parser: self.parser.clone(), + mapper: self.mapper.clone(), + phantom: EmptyPhantom::new(), + } + } +} + +impl<'a, I, O, E, A, OA, F> ParserSealed<'a, I, O, E> for MapWith +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, OA, E>, + F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>) -> O, +{ + #[inline(always)] + fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { + let before = inp.offset(); + let out = self.parser.go::(inp)?; + Ok(M::map(out, |out| { + (self.mapper)(out, &mut MapExtra { before, inp }) + })) + } + + go_extra!(O); +} + +impl<'a, I, O, E, A, OA, F> IterParserSealed<'a, I, O, E> for MapWith +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: IterParser<'a, I, OA, E>, + F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>) -> O, +{ + type IterState = A::IterState + where + I: 'a; + + #[inline(always)] + fn make_iter( + &self, + inp: &mut InputRef<'a, '_, I, E>, + ) -> PResult> { + self.parser.make_iter(inp) + } + + #[inline(always)] + fn next( + &self, + inp: &mut InputRef<'a, '_, I, E>, + state: &mut Self::IterState, + ) -> IPResult { + let before = inp.offset(); + match self.parser.next::(inp, state) { + Ok(Some(o)) => Ok(Some(M::map(o, |o| { + (self.mapper)(o, &mut MapExtra { before, inp }) + }))), Ok(None) => Ok(None), Err(()) => Err(()), } @@ -531,6 +642,7 @@ where } } +/* /// See [`Parser::map_with_span`]. pub struct MapWithSpan { pub(crate) parser: A, @@ -569,6 +681,7 @@ where go_extra!(O); } +*/ /// See [`Parser::to_span`]. pub struct ToSpan { @@ -603,6 +716,7 @@ where go_extra!(I::Span); } +/* /// See [`Parser::map_with_state`]. pub struct MapWithState { pub(crate) parser: A, @@ -642,6 +756,7 @@ where go_extra!(O); } +*/ /// See [`Parser::try_map`]. pub struct TryMap { @@ -686,6 +801,7 @@ where go_extra!(O); } +/* /// See [`Parser::try_map_with_state`]. pub struct TryMapWithState { pub(crate) parser: A, @@ -728,6 +844,49 @@ where go_extra!(O); } +*/ + +/// See [`Parser::try_map_with`]. +pub struct TryMapWith { + pub(crate) parser: A, + pub(crate) mapper: F, + #[allow(dead_code)] + pub(crate) phantom: EmptyPhantom, +} + +impl Copy for TryMapWith {} +impl Clone for TryMapWith { + fn clone(&self) -> Self { + Self { + parser: self.parser.clone(), + mapper: self.mapper.clone(), + phantom: EmptyPhantom::new(), + } + } +} + +impl<'a, I, O, E, A, OA, F> ParserSealed<'a, I, O, E> for TryMapWith +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, OA, E>, + F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>) -> Result, +{ + #[inline(always)] + fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { + let before = inp.offset(); + let out = self.parser.go::(inp)?; + match (self.mapper)(out, &mut MapExtra { before, inp }) { + Ok(out) => Ok(M::bind(|| out)), + Err(err) => { + inp.add_alt_err(inp.offset().offset, err); + Err(()) + } + } + } + + go_extra!(O); +} /// See [`Parser::to`]. pub struct To { diff --git a/src/input.rs b/src/input.rs index a41eb31c..f02c4543 100644 --- a/src/input.rs +++ b/src/input.rs @@ -1373,6 +1373,15 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> self.slice_from_inner(range.start.offset..) } + /// Get a slice of the input that covers the given offset range. + #[inline] + pub fn slice_since(&self, range: RangeFrom>) -> I::Slice + where + I: SliceInput<'a>, + { + self.slice_inner(range.start.offset..self.offset) + } + // TODO: Unofy with `InputRef::slice` #[inline(always)] pub(crate) fn slice_inner(&self, range: Range) -> I::Slice diff --git a/src/lib.rs b/src/lib.rs index c5fd11b0..4d456cef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -152,7 +152,9 @@ use self::{ container::*, error::Error, extra::ParserExtra, - input::{BorrowInput, Emitter, ExactSizeInput, InputRef, SliceInput, StrInput, ValueInput}, + input::{ + BorrowInput, Emitter, ExactSizeInput, InputRef, Offset, SliceInput, StrInput, ValueInput, + }, prelude::*, primitive::Any, private::{ @@ -438,6 +440,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: ParseResult::new(out, errs) } + /* /// Map from a slice of the input based on the current parser's span to a value. /// /// The returned value may borrow data from the input slice, making this function very useful @@ -453,16 +456,15 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: phantom: EmptyPhantom::new(), } } + */ /// Convert the output of this parser into a slice of the input, based on the current parser's /// span. - /// - /// This is effectively a special case of [`map_slice`](Parser::map_slice)`(|x| x)` - fn slice(self) -> Slice + fn to_slice(self) -> ToSlice where Self: Sized, { - Slice { + ToSlice { parser: self, phantom: EmptyPhantom::new(), } @@ -534,6 +536,24 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: } } + /// Map the output of this parser to another value, with the opportunity to get extra metadata. + /// + /// The output type of this parser is `U`, the same as the function's output. + fn map_with) -> U>( + self, + f: F, + ) -> MapWith + where + Self: Sized, + { + MapWith { + parser: self, + mapper: f, + phantom: EmptyPhantom::new(), + } + } + + /* /// Works the same as [`Parser::map`], but the second argument for the mapper F is the parser's /// current context. /// @@ -574,6 +594,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: phantom: EmptyPhantom::new(), } } + */ /// Map the output of this parser to another value. /// If the output of this parser isn't a tuple, use [`Parser::map`]. @@ -628,6 +649,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: } } + /* /// Map the output of this parser to another value, making use of the pattern's span when doing so. /// /// This is very useful when generating an AST that attaches a span to each AST node. @@ -660,6 +682,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: phantom: EmptyPhantom::new(), } } + */ /// Transform the output of this parser to the pattern's span. /// @@ -682,8 +705,8 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// } /// /// let int = text::int::<_, _, extra::Err>>(10) - /// .slice() - /// .map_with_span(Expr::Int) + /// .to_slice() + /// .map_with(|int, e| Expr::Int(int, e.span())) /// .padded(); /// /// let add_op = just('+').to_span().padded(); @@ -712,6 +735,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: } } + /* /// Map the output of this parser to another value, making use of the parser's state when doing so. /// /// This is very useful for parsing non context-free grammars. @@ -772,6 +796,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: phantom: EmptyPhantom::new(), } } + */ /// After a successful parse, apply a fallible function to the output. If the function produces an error, treat it /// as a parsing error. @@ -805,6 +830,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: } } + /* /// After a successful parse, apply a fallible function to the output, making use of the parser's state when /// doing so. If the function produces an error, treat it as a parsing error. /// @@ -825,6 +851,28 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: phantom: EmptyPhantom::new(), } } + */ + + /// After a successful parse, apply a fallible function to the output, with the opportunity to get extra metadata. + /// If the function produces an error, treat it as a parsing error. + /// + /// If you wish parsing of this pattern to continue when an error is generated instead of halting, consider using + /// [`Parser::validate`] instead. + /// + /// The output type of this parser is `U`, the [`Ok`] return value of the function. + fn try_map_with) -> Result>( + self, + f: F, + ) -> TryMapWith + where + Self: Sized, + { + TryMapWith { + parser: self, + mapper: f, + phantom: EmptyPhantom::new(), + } + } /// Ignore the output of this parser, yielding `()` as an output instead. /// @@ -1427,7 +1475,8 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// .and_is(one_of("{}").not()) /// .repeated() /// .at_least(1) - /// .map_slice(Tree::Text); + /// .to_slice() + /// .map(Tree::Text); /// /// let group = tree /// .repeated() @@ -1635,9 +1684,9 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// // Now, define our parser /// let int = text::int::<&str, _, extra::Full, NodeArena, ()>>(10) /// .padded() - /// .map_with_state(|s, _, state: &mut NodeArena| + /// .map_with(|s, e| /// // Return the ID of the new integer node - /// state.insert(Expr::Int(s.parse().unwrap())) + /// e.state().insert(Expr::Int(s.parse().unwrap())) /// ); /// /// let sum = int.foldl_with_state( @@ -1689,7 +1738,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// ``` /// # use chumsky::prelude::*; /// let just_numbers = text::digits::<_, _, extra::Err>>(10) - /// .slice() + /// .to_slice() /// .padded() /// .then_ignore(none_of("+-*/").rewind()) /// .separated_by(just(',')) @@ -2928,16 +2977,18 @@ mod tests { .filter(|c: &char| c.is_alphanumeric()) .repeated() .at_least(1) - .map_slice(Token::Ident); + .to_slice() + .map(Token::Ident); let string = just('"') .then(any().filter(|c: &char| *c != '"').repeated()) .then(just('"')) - .map_slice(Token::String); + .to_slice() + .map(Token::String); ident .or(string) - .map_with_span(|token, span| (span, token)) + .map_with(|token, e| (e.span(), token)) .padded() .repeated() .collect_exactly() @@ -2978,16 +3029,18 @@ mod tests { .filter(|c: &char| c.is_alphanumeric()) .repeated() .at_least(1) - .map_slice(Token::Ident); + .to_slice() + .map(Token::Ident); let string = just('"') .then(any().filter(|c: &char| *c != '"').repeated()) .then(just('"')) - .map_slice(Token::String); + .to_slice() + .map(Token::String); ident .or(string) - .map_with_span(|token, span| (span, token)) + .map_with(|token, e| (e.span(), token)) .padded() .repeated() .collect_exactly() @@ -3024,7 +3077,8 @@ mod tests { .repeated() .at_least(1) .at_most(3) - .map_slice(|b: &str| b.parse::().unwrap()) + .to_slice() + .map(|b: &str| b.parse::().unwrap()) .padded() .separated_by(just(',').padded()) .allow_trailing() @@ -3060,13 +3114,14 @@ mod tests { .filter(|c: &char| c.is_ascii_alphabetic()) .repeated() .at_least(1) - .slice() + .to_slice() .padded(), any() .filter(|c: &char| c.is_ascii_digit()) .repeated() .at_least(1) - .map_slice(|s: &str| s.parse::().unwrap()) + .to_slice() + .map(|s: &str| s.parse::().unwrap()) .padded(), any().filter(|c: &char| !c.is_whitespace()).padded(), )) @@ -3348,7 +3403,8 @@ mod tests { .repeated() .at_least(1) .at_most(3) - .map_slice(|b: &str| b.parse::().unwrap()) + .to_slice() + .map(|b: &str| b.parse::().unwrap()) .padded() .separated_by(just(',').padded()) .allow_trailing() @@ -3380,7 +3436,8 @@ mod tests { .repeated() .at_least(1) .at_most(3) - .map_slice(|b: &str| b.parse::().unwrap()) + .to_slice() + .map(|b: &str| b.parse::().unwrap()) .padded() .separated_by(just(',').padded()) .allow_trailing() @@ -3414,7 +3471,8 @@ mod tests { .repeated() .at_least(1) .at_most(3) - .map_slice(|b: &str| b.parse::().unwrap()) + .to_slice() + .map(|b: &str| b.parse::().unwrap()) .padded() .separated_by(just(',').padded()) .allow_trailing() @@ -3506,7 +3564,7 @@ mod tests { #[cfg(feature = "unstable")] fn cached() { fn my_parser<'a>() -> impl Parser<'a, &'a str, &'a str, extra::Default> { - any().repeated().exactly(5).slice() + any().repeated().exactly(5).to_slice() } struct MyCache; diff --git a/src/primitive.rs b/src/primitive.rs index fc6c9ce0..0219cbcc 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -704,7 +704,7 @@ where /// /// let inc = one_of::<_, _, extra::Default>(b'a'..=b'z') /// .ignore_with_ctx(map_ctx::<_, u8, &[u8], extra::Context, extra::Context, _>(|c: &u8| c.to_ascii_uppercase(), upper)) -/// .slice() +/// .to_slice() /// .repeated() /// .at_least(1) /// .collect::>(); @@ -783,8 +783,8 @@ impl Clone for Todo { /// ```should_panic /// # use chumsky::prelude::*; /// let int = just::<_, _, extra::Err>>("0x").ignore_then(todo()) -/// .or(just("0b").ignore_then(text::digits(2).slice())) -/// .or(text::int(10).slice()); +/// .or(just("0b").ignore_then(text::digits(2).to_slice())) +/// .or(text::int(10).to_slice()); /// /// // Decimal numbers are parsed /// assert_eq!(int.parse("12").into_result(), Ok("12")); diff --git a/src/recovery.rs b/src/recovery.rs index 784aace8..da918d19 100644 --- a/src/recovery.rs +++ b/src/recovery.rs @@ -245,5 +245,5 @@ where } }) .delimited_by(just(start), just(end)) - .map_with_span(move |_, span| fallback(span)) + .map_with(move |_, e| fallback(e.span())) } diff --git a/src/text.rs b/src/text.rs index 11817f3b..91d32179 100644 --- a/src/text.rs +++ b/src/text.rs @@ -270,7 +270,7 @@ where /// /// ``` /// # use chumsky::prelude::*; -/// let digits = text::digits::<_, _, extra::Err>>(10).slice(); +/// let digits = text::digits::<_, _, extra::Err>>(10).to_slice(); /// /// assert_eq!(digits.parse("0").into_result(), Ok("0")); /// assert_eq!(digits.parse("1").into_result(), Ok("1")); @@ -347,7 +347,7 @@ pub fn int<'a, I: ValueInput<'a> + StrInput<'a, C>, C: Char, E: ParserExtra<'a, .then(any().filter(move |c: &C| c.is_digit(radix)).repeated()) .ignored() .or(just(C::digit_zero()).ignored()) - .slice() + .to_slice() } /// Parsers and utilities for working with ASCII inputs. @@ -379,7 +379,7 @@ pub mod ascii { .filter(|c: &C| c.to_char().is_ascii_alphanumeric() || c.to_char() == '_') .repeated(), ) - .slice() + .to_slice() } /// Like [`ident`], but only accepts a specific identifier while rejecting trailing identifier characters. @@ -434,7 +434,7 @@ pub mod ascii { Err(Error::expected_found(None, None, span)) } }) - .slice() + .to_slice() } } @@ -469,7 +469,7 @@ pub mod unicode { .filter(|c: &C| c.is_ident_continue()) .repeated(), ) - .slice() + .to_slice() } /// Like [`ident`], but only accepts a specific identifier while rejecting trailing identifier characters. @@ -528,7 +528,7 @@ pub mod unicode { Err(Error::expected_found(None, None, span)) } }) - .slice() + .to_slice() } } From 2c03fc3d7bbe1f05e46c654cf10fd1ea8494aa67 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Sun, 8 Oct 2023 21:42:27 +0100 Subject: [PATCH 23/64] Added foldl_with, foldr_with, switched validate to use MapExtra --- examples/json.rs | 4 +- src/combinator.rs | 307 +++++----------------------------------------- src/lib.rs | 269 +++++++++++++--------------------------- 3 files changed, 115 insertions(+), 465 deletions(-) diff --git a/examples/json.rs b/examples/json.rs index bfe96b85..7f5dc68e 100644 --- a/examples/json.rs +++ b/examples/json.rs @@ -48,10 +48,10 @@ fn parser<'a>() -> impl Parser<'a, &'a str, Json, extra::Err>> { just('r').to('\r'), just('t').to('\t'), just('u').ignore_then(text::digits(16).exactly(4).to_slice().validate( - |digits, span, emitter| { + |digits, e, emitter| { char::from_u32(u32::from_str_radix(digits, 16).unwrap()).unwrap_or_else( || { - emitter.emit(Rich::custom(span, "invalid unicode character")); + emitter.emit(Rich::custom(e.span(), "invalid unicode character")); '\u{FFFD}' // unicode replacement character }, ) diff --git a/src/combinator.rs b/src/combinator.rs index 328a2ad8..54aa8a63 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -185,64 +185,6 @@ where } } -/* -/// See [`Parser::map_slice`]. -pub struct MapSlice<'a, A, I, O, E, F, U> -where - I: SliceInput<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, O, E>, - F: Fn(I::Slice) -> U, -{ - pub(crate) parser: A, - pub(crate) mapper: F, - #[allow(dead_code)] - pub(crate) phantom: EmptyPhantom<(I::Slice, O, E)>, -} - -impl<'a, A: Copy, I, O, E, F: Copy, U> Copy for MapSlice<'a, A, I, O, E, F, U> -where - I: SliceInput<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, O, E>, - F: Fn(I::Slice) -> U, -{ -} -impl<'a, A: Clone, I, O, E, F: Clone, U> Clone for MapSlice<'a, A, I, O, E, F, U> -where - I: Input<'a> + SliceInput<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, O, E>, - F: Fn(I::Slice) -> U, -{ - fn clone(&self) -> Self { - Self { - parser: self.parser.clone(), - mapper: self.mapper.clone(), - phantom: EmptyPhantom::new(), - } - } -} - -impl<'a, I, O, E, A, F, U> ParserSealed<'a, I, U, E> for MapSlice<'a, A, I, O, E, F, U> -where - I: SliceInput<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, O, E>, - F: Fn(I::Slice) -> U, -{ - #[inline(always)] - fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); - self.parser.go::(inp)?; - - Ok(M::bind(|| (self.mapper)(inp.slice_since(before..)))) - } - - go_extra!(U); -} -*/ - /// See [`Parser::slice`] pub struct ToSlice { pub(crate) parser: A, @@ -320,76 +262,6 @@ where go_extra!(O); } -/* -/// See [`Parser::map_with_ctx`]. -pub struct MapWithContext { - pub(crate) parser: A, - pub(crate) mapper: F, - #[allow(dead_code)] - pub(crate) phantom: EmptyPhantom, -} - -impl Copy for MapWithContext {} -impl Clone for MapWithContext { - fn clone(&self) -> Self { - Self { - parser: self.parser.clone(), - mapper: self.mapper.clone(), - phantom: EmptyPhantom::new(), - } - } -} - -impl<'a, I, O, E, A, OA, F> ParserSealed<'a, I, O, E> for MapWithContext -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, OA, E>, - F: Fn(OA, &E::Context) -> O, -{ - #[inline(always)] - fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let out = self.parser.go::(inp)?; - Ok(M::map(out, move |x| (self.mapper)(x, inp.ctx))) - } - - go_extra!(O); -} - -impl<'a, I, O, E, A, OA, F> IterParserSealed<'a, I, O, E> for MapWithContext -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: IterParser<'a, I, OA, E>, - F: Fn(OA, &E::Context) -> O, -{ - type IterState = A::IterState - where - I: 'a; - - #[inline(always)] - fn make_iter( - &self, - inp: &mut InputRef<'a, '_, I, E>, - ) -> PResult> { - self.parser.make_iter(inp) - } - - #[inline(always)] - fn next( - &self, - inp: &mut InputRef<'a, '_, I, E>, - state: &mut Self::IterState, - ) -> IPResult { - match self.parser.next::(inp, state) { - Ok(Some(o)) => Ok(Some(M::map(o, move |x| (self.mapper)(x, inp.ctx)))), - Ok(None) => Ok(None), - Err(()) => Err(()), - } - } -} -*/ - /// See [`Parser::map`]. pub struct Map { pub(crate) parser: A, @@ -642,47 +514,6 @@ where } } -/* -/// See [`Parser::map_with_span`]. -pub struct MapWithSpan { - pub(crate) parser: A, - pub(crate) mapper: F, - #[allow(dead_code)] - pub(crate) phantom: EmptyPhantom, -} - -impl Copy for MapWithSpan {} -impl Clone for MapWithSpan { - fn clone(&self) -> Self { - Self { - parser: self.parser.clone(), - mapper: self.mapper.clone(), - phantom: EmptyPhantom::new(), - } - } -} - -impl<'a, I, O, E, A, OA, F> ParserSealed<'a, I, O, E> for MapWithSpan -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, OA, E>, - F: Fn(OA, I::Span) -> O, -{ - #[inline(always)] - fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); - let out = self.parser.go::(inp)?; - Ok(M::map(out, |out| { - let span = inp.span_since(before); - (self.mapper)(out, span) - })) - } - - go_extra!(O); -} -*/ - /// See [`Parser::to_span`]. pub struct ToSpan { pub(crate) parser: A, @@ -716,48 +547,6 @@ where go_extra!(I::Span); } -/* -/// See [`Parser::map_with_state`]. -pub struct MapWithState { - pub(crate) parser: A, - pub(crate) mapper: F, - #[allow(dead_code)] - pub(crate) phantom: EmptyPhantom, -} - -impl Copy for MapWithState {} -impl Clone for MapWithState { - fn clone(&self) -> Self { - Self { - parser: self.parser.clone(), - mapper: self.mapper.clone(), - phantom: EmptyPhantom::new(), - } - } -} - -impl<'a, I, O, E, A, OA, F> ParserSealed<'a, I, O, E> for MapWithState -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, OA, E>, - F: Fn(OA, I::Span, &mut E::State) -> O, -{ - #[inline(always)] - fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); - let out = self.parser.go::(inp)?; - Ok(M::bind(|| { - let span = inp.span_since(before); - let state = inp.state(); - (self.mapper)(out, span, state) - })) - } - - go_extra!(O); -} -*/ - /// See [`Parser::try_map`]. pub struct TryMap { pub(crate) parser: A, @@ -801,51 +590,6 @@ where go_extra!(O); } -/* -/// See [`Parser::try_map_with_state`]. -pub struct TryMapWithState { - pub(crate) parser: A, - pub(crate) mapper: F, - #[allow(dead_code)] - pub(crate) phantom: EmptyPhantom, -} - -impl Copy for TryMapWithState {} -impl Clone for TryMapWithState { - fn clone(&self) -> Self { - Self { - parser: self.parser.clone(), - mapper: self.mapper.clone(), - phantom: EmptyPhantom::new(), - } - } -} - -impl<'a, I, O, E, A, OA, F> ParserSealed<'a, I, O, E> for TryMapWithState -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, OA, E>, - F: Fn(OA, I::Span, &mut E::State) -> Result, -{ - #[inline(always)] - fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - let before = inp.offset(); - let out = self.parser.go::(inp)?; - let span = inp.span_since(before); - match (self.mapper)(out, span, inp.state()) { - Ok(out) => Ok(M::bind(|| out)), - Err(err) => { - inp.add_alt_err(inp.offset().offset, err); - Err(()) - } - } - } - - go_extra!(O); -} -*/ - /// See [`Parser::try_map_with`]. pub struct TryMapWith { pub(crate) parser: A, @@ -2505,8 +2249,8 @@ where go_extra!(O); } -/// See [`IterParser::foldr_with_state`]. -pub struct FoldrWithState { +/// See [`IterParser::foldr_with`]. +pub struct FoldrWith { pub(crate) parser_a: A, pub(crate) parser_b: B, pub(crate) folder: F, @@ -2516,8 +2260,8 @@ pub struct FoldrWithState { pub(crate) phantom: EmptyPhantom<(OA, E)>, } -impl Copy for FoldrWithState {} -impl Clone for FoldrWithState { +impl Copy for FoldrWith {} +impl Clone for FoldrWith { fn clone(&self) -> Self { Self { parser_a: self.parser_a.clone(), @@ -2530,13 +2274,13 @@ impl Clone for FoldrWithState ParserSealed<'a, I, O, E> for FoldrWithState +impl<'a, I, F, A, B, O, OA, E> ParserSealed<'a, I, O, E> for FoldrWith where I: Input<'a>, A: IterParser<'a, I, OA, E>, B: Parser<'a, I, O, E>, E: ParserExtra<'a, I>, - F: Fn(OA, O, &mut E::State) -> O, + F: Fn(OA, O, &mut MapExtra<'a, '_, '_, I, E>) -> O, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult @@ -2550,7 +2294,7 @@ where let before = inp.offset(); match self.parser_a.next::(inp, &mut iter_state) { Ok(Some(out)) => { - M::combine_mut(&mut a_out, out, |a_out, item| a_out.push(item)); + M::combine_mut(&mut a_out, out, |a_out, item| a_out.push((item, before))); } Ok(None) => break, Err(()) => return Err(()), @@ -2568,10 +2312,9 @@ where let b_out = self.parser_b.go::(inp)?; Ok(M::combine(a_out, b_out, |a_out, b_out| { - let state = inp.state(); - a_out - .into_iter() - .rfold(b_out, |b, a| (self.folder)(a, b, state)) + a_out.into_iter().rfold(b_out, |b, (a, before)| { + (self.folder)(a, b, &mut MapExtra { before, inp }) + }) })) } @@ -2642,8 +2385,8 @@ where go_extra!(O); } -/// See [`Parser::foldl_with_state`]. -pub struct FoldlWithState { +/// See [`Parser::foldl_with`]. +pub struct FoldlWith { pub(crate) parser_a: A, pub(crate) parser_b: B, pub(crate) folder: F, @@ -2653,8 +2396,8 @@ pub struct FoldlWithState { pub(crate) phantom: EmptyPhantom<(OB, E)>, } -impl Copy for FoldlWithState {} -impl Clone for FoldlWithState { +impl Copy for FoldlWith {} +impl Clone for FoldlWith { fn clone(&self) -> Self { Self { parser_a: self.parser_a.clone(), @@ -2667,19 +2410,20 @@ impl Clone for FoldlWithState ParserSealed<'a, I, O, E> for FoldlWithState +impl<'a, I, F, A, B, O, OB, E> ParserSealed<'a, I, O, E> for FoldlWith where I: Input<'a>, A: Parser<'a, I, O, E>, B: IterParser<'a, I, OB, E>, E: ParserExtra<'a, I>, - F: Fn(O, OB, &mut E::State) -> O, + F: Fn(O, OB, &mut MapExtra<'a, '_, '_, I, E>) -> O, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult where Self: Sized, { + let before_all = inp.offset(); let mut out = self.parser_a.go::(inp)?; let mut iter_state = self.parser_b.make_iter::(inp)?; loop { @@ -2687,8 +2431,16 @@ where let before = inp.offset(); match self.parser_b.next::(inp, &mut iter_state) { Ok(Some(b_out)) => { - let state = inp.state(); - out = M::combine(out, b_out, |out, b_out| (self.folder)(out, b_out, state)); + out = M::combine(out, b_out, |out, b_out| { + (self.folder)( + out, + b_out, + &mut MapExtra { + before: before_all, + inp, + }, + ) + }) } Ok(None) => break Ok(out), Err(()) => break Err(()), @@ -2862,7 +2614,7 @@ where I: Input<'a>, E: ParserExtra<'a, I>, A: Parser<'a, I, OA, E>, - F: Fn(OA, I::Span, &mut Emitter) -> U, + F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>, &mut Emitter) -> U, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult @@ -2872,9 +2624,8 @@ where let before = inp.offset(); let out = self.parser.go::(inp)?; - let span = inp.span_since(before); let mut emitter = Emitter::new(); - let out = (self.validator)(out, span, &mut emitter); + let out = (self.validator)(out, &mut MapExtra { before, inp }, &mut emitter); for err in emitter.errors() { inp.emit(inp.offset, err); } diff --git a/src/lib.rs b/src/lib.rs index 4d456cef..88fcd46e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -440,24 +440,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: ParseResult::new(out, errs) } - /* - /// Map from a slice of the input based on the current parser's span to a value. - /// - /// The returned value may borrow data from the input slice, making this function very useful - /// for creating zero-copy AST output values - fn map_slice U>(self, f: F) -> MapSlice<'a, Self, I, O, E, F, U> - where - Self: Sized, - I: SliceInput<'a>, - { - MapSlice { - parser: self, - mapper: f, - phantom: EmptyPhantom::new(), - } - } - */ - /// Convert the output of this parser into a slice of the input, based on the current parser's /// span. fn to_slice(self) -> ToSlice @@ -539,30 +521,66 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// Map the output of this parser to another value, with the opportunity to get extra metadata. /// /// The output type of this parser is `U`, the same as the function's output. - fn map_with) -> U>( - self, - f: F, - ) -> MapWith - where - Self: Sized, - { - MapWith { - parser: self, - mapper: f, - phantom: EmptyPhantom::new(), - } - } - - /* - /// Works the same as [`Parser::map`], but the second argument for the mapper F is the parser's - /// current context. - /// - /// Primarily used to modify existing context using the result of a parser, before passing to a - /// `*_with_ctx` method. Also useful when the output of a parser is dependent on the currenct - /// context. /// /// # Examples /// + /// Using the span of the output in the mapping function: + /// + /// ``` + /// # use chumsky::prelude::*; + /// + /// // It's common for AST nodes to use a wrapper type that allows attaching span information to them + /// #[derive(Debug, PartialEq)] + /// pub struct Spanned(T, SimpleSpan); + /// + /// let ident = text::ascii::ident::<_, _, extra::Err>>() + /// .map_with(|ident, e| Spanned(ident, e.span())) // Equivalent to `.map_with_span(|ident, span| Spanned(ident, span))` + /// .padded(); + /// + /// assert_eq!(ident.parse("hello").into_result(), Ok(Spanned("hello", (0..5).into()))); + /// assert_eq!(ident.parse(" hello ").into_result(), Ok(Spanned("hello", (7..12).into()))); + /// ``` + /// + /// Using the parser state in the mapping function to intern strings: + /// + /// ``` + /// # use chumsky::prelude::*; + /// use std::ops::Range; + /// use lasso::{Rodeo, Spur}; + /// + /// // It's common for AST nodes to use interned versions of identifiers + /// // Keys are generally smaller, faster to compare, and can be `Copy` + /// #[derive(Copy, Clone)] + /// pub struct Ident(Spur); + /// + /// let ident = text::ascii::ident::<_, _, extra::Full, Rodeo, ()>>() + /// .map_with(|ident, e| Ident(e.state().get_or_intern(ident))) + /// .padded() + /// .repeated() + /// .at_least(1) + /// .collect::>(); + /// + /// // Test out parser + /// + /// let mut interner = Rodeo::new(); + /// + /// match ident.parse_with_state("hello", &mut interner).into_result() { + /// Ok(idents) => { + /// assert_eq!(interner.resolve(&idents[0].0), "hello"); + /// } + /// Err(e) => panic!("Parsing Failed: {:?}", e), + /// } + /// + /// match ident.parse_with_state("hello hello", &mut interner).into_result() { + /// Ok(idents) => { + /// assert_eq!(idents[0].0, idents[1].0); + /// } + /// Err(e) => panic!("Parsing Failed: {:?}", e), + /// } + /// ``` + /// + /// Using the parse context in the mapping function: + /// /// ``` /// # use chumsky::{prelude::*, error::Simple}; /// @@ -573,7 +591,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// .configure(|cfg, ctx: &String| cfg.seq(ctx.clone())) /// .then_ignore(end()), /// any() - /// .map_with_ctx(|x, ctx| format!("{x}{ctx}")) + /// .map_with(|x, e| format!("{x}{}", e.ctx())) /// .ignore_with_ctx(chain), /// )) /// }) @@ -584,17 +602,19 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// assert_eq!(palindrome_parser().parse("hello olleh").into_result().as_deref(), Ok(" olleh")); /// assert!(palindrome_parser().parse("abccb").into_result().is_err()); /// ``` - fn map_with_ctx U>(self, f: F) -> MapWithContext + fn map_with) -> U>( + self, + f: F, + ) -> MapWith where Self: Sized, { - MapWithContext { + MapWith { parser: self, mapper: f, phantom: EmptyPhantom::new(), } } - */ /// Map the output of this parser to another value. /// If the output of this parser isn't a tuple, use [`Parser::map`]. @@ -649,41 +669,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: } } - /* - /// Map the output of this parser to another value, making use of the pattern's span when doing so. - /// - /// This is very useful when generating an AST that attaches a span to each AST node. - /// - /// The output type of this parser is `U`, the same as the function's output. - /// - /// # Examples - /// - /// ``` - /// # use chumsky::prelude::*; - /// - /// // It's common for AST nodes to use a wrapper type that allows attaching span information to them - /// #[derive(Debug, PartialEq)] - /// pub struct Spanned(T, SimpleSpan); - /// - /// let ident = text::ascii::ident::<_, _, extra::Err>>() - /// .map_with_span(Spanned) // Equivalent to `.map_with_span(|ident, span| Spanned(ident, span))` - /// .padded(); - /// - /// assert_eq!(ident.parse("hello").into_result(), Ok(Spanned("hello", (0..5).into()))); - /// assert_eq!(ident.parse(" hello ").into_result(), Ok(Spanned("hello", (7..12).into()))); - /// ``` - fn map_with_span U>(self, f: F) -> MapWithSpan - where - Self: Sized, - { - MapWithSpan { - parser: self, - mapper: f, - phantom: EmptyPhantom::new(), - } - } - */ - /// Transform the output of this parser to the pattern's span. /// /// This is commonly used when you know what pattern you've parsed and are only interested in the span of the @@ -735,69 +720,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: } } - /* - /// Map the output of this parser to another value, making use of the parser's state when doing so. - /// - /// This is very useful for parsing non context-free grammars. - /// - /// The output type of this parser is `U`, the same as the function's output. - /// - /// # Examples - /// - /// ## General - /// - /// ``` - /// # use chumsky::prelude::*; - /// use std::ops::Range; - /// use lasso::{Rodeo, Spur}; - /// - /// // It's common for AST nodes to use interned versions of identifiers - /// // Keys are generally smaller, faster to compare, and can be `Copy` - /// #[derive(Copy, Clone)] - /// pub struct Ident(Spur); - /// - /// let ident = text::ascii::ident::<_, _, extra::Full, Rodeo, ()>>() - /// .map_with_state(|ident, span, state| Ident(state.get_or_intern(ident))) - /// .padded() - /// .repeated() - /// .at_least(1) - /// .collect::>(); - /// - /// // Test out parser - /// - /// let mut interner = Rodeo::new(); - /// - /// match ident.parse_with_state("hello", &mut interner).into_result() { - /// Ok(idents) => { - /// assert_eq!(interner.resolve(&idents[0].0), "hello"); - /// } - /// Err(e) => panic!("Parsing Failed: {:?}", e), - /// } - /// - /// match ident.parse_with_state("hello hello", &mut interner).into_result() { - /// Ok(idents) => { - /// assert_eq!(idents[0].0, idents[1].0); - /// } - /// Err(e) => panic!("Parsing Failed: {:?}", e), - /// } - /// ``` - /// - /// See [`Parser::foldl_with_state`] for an example showing arena allocation via parser state. - fn map_with_state U>( - self, - f: F, - ) -> MapWithState - where - Self: Sized, - { - MapWithState { - parser: self, - mapper: f, - phantom: EmptyPhantom::new(), - } - } - */ - /// After a successful parse, apply a fallible function to the output. If the function produces an error, treat it /// as a parsing error. /// @@ -830,29 +752,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: } } - /* - /// After a successful parse, apply a fallible function to the output, making use of the parser's state when - /// doing so. If the function produces an error, treat it as a parsing error. - /// - /// If you wish parsing of this pattern to continue when an error is generated instead of halting, consider using - /// [`Parser::validate`] instead. - /// - /// The output type of this parser is `U`, the [`Ok`] return value of the function. - fn try_map_with_state Result>( - self, - f: F, - ) -> TryMapWithState - where - Self: Sized, - { - TryMapWithState { - parser: self, - mapper: f, - phantom: EmptyPhantom::new(), - } - } - */ - /// After a successful parse, apply a fallible function to the output, with the opportunity to get extra metadata. /// If the function produces an error, treat it as a parsing error. /// @@ -1652,7 +1551,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// /// let sum = int /// .clone() - /// .foldl_with_state(just('+').ignore_then(int).repeated(), |a, b, state| (a + b) * *state); + /// .foldl_with(just('+').ignore_then(int).repeated(), |a, b, e| (a + b) * *e.state()); /// /// let mut multiplier = 2i32; /// assert_eq!(sum.parse_with_state("1+12+3+9", &mut multiplier).into_result(), Ok(134)); @@ -1689,11 +1588,11 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// e.state().insert(Expr::Int(s.parse().unwrap())) /// ); /// - /// let sum = int.foldl_with_state( + /// let sum = int.foldl_with( /// just('+').padded().ignore_then(int).repeated(), - /// |a: NodeId, b: NodeId, state: &mut NodeArena| { + /// |a: NodeId, b: NodeId, e| { /// // Inserting an item into the arena returns its ID - /// state.insert(Expr::Add(a, b)) + /// e.state().insert(Expr::Add(a, b)) /// } /// ); /// @@ -1708,13 +1607,13 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// } /// ``` #[cfg_attr(debug_assertions, track_caller)] - fn foldl_with_state(self, other: B, f: F) -> FoldlWithState + fn foldl_with(self, other: B, f: F) -> FoldlWith where - F: Fn(O, OB, &mut E::State) -> O, + F: Fn(O, OB, &mut MapExtra<'a, '_, '_, I, E>) -> O, B: IterParser<'a, I, OB, E>, Self: Sized, { - FoldlWithState { + FoldlWith { parser_a: self, parser_b: other, folder: f, @@ -1953,8 +1852,8 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// let large_int = text::int::<_, _, extra::Err>>(10) /// .from_str() /// .unwrapped() - /// .validate(|x: u32, span, emitter| { - /// if x < 256 { emitter.emit(Rich::custom(span, format!("{} must be 256 or higher.", x))) } + /// .validate(|x: u32, e, emitter| { + /// if x < 256 { emitter.emit(Rich::custom(e.span(), format!("{} must be 256 or higher.", x))) } /// x /// }); /// @@ -1972,8 +1871,8 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// let large_int_val = text::int::<_, _, extra::Err>>(10) /// .from_str() /// .unwrapped() - /// .validate(|x: u32, span, emitter| { - /// if x < 256 { emitter.emit(Rich::custom(span, format!("{} must be 256 or higher", x))) } + /// .validate(|x: u32, e, emitter| { + /// if x < 256 { emitter.emit(Rich::custom(e.span(), format!("{} must be 256 or higher", x))) } /// x /// }); /// @@ -2026,7 +1925,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: fn validate(self, f: F) -> Validate where Self: Sized, - F: Fn(O, I::Span, &mut Emitter) -> U, + F: Fn(O, &mut MapExtra<'a, '_, '_, I, E>, &mut Emitter) -> U, { Validate { parser: self, @@ -2618,9 +2517,9 @@ where /// let signed = just('+').to(1) /// .or(just('-').to(-1)) /// .repeated() - /// .foldr_with_state(int, |a, b, state| { - /// (*state) += 1; - /// (a * b) + /// .foldr_with(int, |a, b, e| { + /// *e.state() += 1; + /// a * b /// }); /// /// // Test our parser @@ -2632,13 +2531,13 @@ where /// /// #[cfg_attr(debug_assertions, track_caller)] - fn foldr_with_state(self, other: B, f: F) -> FoldrWithState + fn foldr_with(self, other: B, f: F) -> FoldrWith where - F: Fn(O, OA, &mut E::State) -> OA, + F: Fn(O, OA, &mut MapExtra<'a, '_, '_, I, E>) -> OA, B: Parser<'a, I, OA, E>, Self: Sized, { - FoldrWithState { + FoldrWith { parser_a: self, parser_b: other, folder: f, @@ -3320,10 +3219,10 @@ mod tests { #[test] #[should_panic] #[cfg(debug_assertions)] - fn debug_assert_foldl_with_state() { + fn debug_assert_foldl_with() { let mut state = 100; empty::<&str, extra::Full>() - .foldl_with_state(empty().to(()).repeated(), |_, _, _| ()) + .foldl_with(empty().to(()).repeated(), |_, _, _| ()) .parse_with_state("a+b+c", &mut state); } @@ -3345,7 +3244,7 @@ mod tests { empty::<&str, extra::Default>() .to(()) .repeated() - .foldr_with_state(empty(), |_, _, _| ()) + .foldr_with(empty(), |_, _, _| ()) .parse_with_state("a+b+c", &mut ()); } From fd788929d1b8b1f957dc78a5bf583827a186bc63 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Sun, 8 Oct 2023 22:54:33 +0100 Subject: [PATCH 24/64] Use new features in examples --- examples/nano_rust.rs | 103 +++++++++++++++++++++--------------------- src/either.rs | 3 +- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/examples/nano_rust.rs b/examples/nano_rust.rs index 0548b2dc..9dd4d8e3 100644 --- a/examples/nano_rust.rs +++ b/examples/nano_rust.rs @@ -49,7 +49,7 @@ fn lexer<'src>( // A parser for numbers let num = text::int(10) .then(just('.').then(text::digits(10)).or_not()) - .slice() + .to_slice() .from_str() .unwrapped() .map(Token::Num); @@ -58,10 +58,15 @@ fn lexer<'src>( let str_ = just('"') .ignore_then(none_of('"').repeated()) .then_ignore(just('"')) - .map_slice(Token::Str); + .to_slice() + .map(Token::Str); // A parser for operators - let op = one_of("+*-/!=").repeated().at_least(1).map_slice(Token::Op); + let op = one_of("+*-/!=") + .repeated() + .at_least(1) + .to_slice() + .map(Token::Op); // A parser for control characters (delimiters, semicolons, etc.) let ctrl = one_of("()[]{};,").map(Token::Ctrl); @@ -87,7 +92,7 @@ fn lexer<'src>( .padded(); token - .map_with_span(|tok, span| (tok, span)) + .map_with(|tok, e| (tok, e.span())) .padded_by(comment.repeated()) .padded() // If we encounter an error, skip and attempt to lex the next character as a token instead @@ -161,7 +166,7 @@ enum Expr<'src> { Let(&'src str, Box>, Box>), Then(Box>, Box>), Binary(Box>, BinaryOp, Box>), - Call(Box>, Vec>), + Call(Box>, Spanned>>), If(Box>, Box>, Box>), Print(Box>), } @@ -208,7 +213,7 @@ fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< } .labelled("value"); - let ident = select! { Token::Ident(ident) => ident.clone() }.labelled("identifier"); + let ident = select! { Token::Ident(ident) => ident }.labelled("identifier"); // A list of expressions let items = expr @@ -243,7 +248,7 @@ fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< .delimited_by(just(Token::Ctrl('(')), just(Token::Ctrl(')'))), ) .map(|expr| Expr::Print(Box::new(expr)))) - .map_with_span(|expr, span| (expr, span)) + .map_with(|expr, e| (expr, e.span())) // Atoms can also just be normal expressions, but surrounded with parentheses .or(expr .clone() @@ -271,25 +276,23 @@ fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< .boxed(); // Function calls have very high precedence so we prioritise them - let call = atom.foldl( + let call = atom.foldl_with( items .delimited_by(just(Token::Ctrl('(')), just(Token::Ctrl(')'))) - .map_with_span(|args, span: Span| (args, span)) + .map_with(|args, e| (args, e.span())) .repeated(), - |f, args| { - let span = f.1.start..args.1.end; - (Expr::Call(Box::new(f), args.0), span.into()) - }, + |f, args, e| (Expr::Call(Box::new(f), args), e.span()), ); // Product ops (multiply and divide) have equal precedence let op = just(Token::Op("*")) .to(BinaryOp::Mul) .or(just(Token::Op("/")).to(BinaryOp::Div)); - let product = call.clone().foldl(op.then(call).repeated(), |a, (op, b)| { - let span = a.1.start..b.1.end; - (Expr::Binary(Box::new(a), op, Box::new(b)), span.into()) - }); + let product = call + .clone() + .foldl_with(op.then(call).repeated(), |a, (op, b), e| { + (Expr::Binary(Box::new(a), op, Box::new(b)), e.span()) + }); // Sum ops (add and subtract) have equal precedence let op = just(Token::Op("+")) @@ -297,19 +300,19 @@ fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< .or(just(Token::Op("-")).to(BinaryOp::Sub)); let sum = product .clone() - .foldl(op.then(product).repeated(), |a, (op, b)| { - let span = a.1.start..b.1.end; - (Expr::Binary(Box::new(a), op, Box::new(b)), span.into()) + .foldl_with(op.then(product).repeated(), |a, (op, b), e| { + (Expr::Binary(Box::new(a), op, Box::new(b)), e.span()) }); // Comparison ops (equal, not-equal) have equal precedence let op = just(Token::Op("==")) .to(BinaryOp::Eq) .or(just(Token::Op("!=")).to(BinaryOp::NotEq)); - let compare = sum.clone().foldl(op.then(sum).repeated(), |a, (op, b)| { - let span = a.1.start..b.1.end; - (Expr::Binary(Box::new(a), op, Box::new(b)), span.into()) - }); + let compare = sum + .clone() + .foldl_with(op.then(sum).repeated(), |a, (op, b), e| { + (Expr::Binary(Box::new(a), op, Box::new(b)), e.span()) + }); compare.labelled("expression").as_context() }); @@ -338,15 +341,15 @@ fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< .ignore_then(block.clone().or(if_)) .or_not(), ) - .map_with_span(|((cond, a), b), span: Span| { + .map_with(|((cond, a), b), e| { ( Expr::If( Box::new(cond), Box::new(a), // If an `if` expression has no trailing `else` block, we magic up one that just produces null - Box::new(b.unwrap_or_else(|| (Expr::Value(Value::Null), span.clone()))), + Box::new(b.unwrap_or_else(|| (Expr::Value(Value::Null), e.span()))), ), - span, + e.span(), ) }) }); @@ -356,9 +359,8 @@ fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< let block_chain = block_expr .clone() - .foldl(block_expr.clone().repeated(), |a, b| { - let span = a.1.start..b.1.end; - (Expr::Then(Box::new(a), Box::new(b)), span.into()) + .foldl_with(block_expr.clone().repeated(), |a, b, e| { + (Expr::Then(Box::new(a), Box::new(b)), e.span()) }); let block_recovery = nested_delimiters( @@ -385,22 +387,19 @@ fn expr_parser<'tokens, 'src: 'tokens>() -> impl Parser< ]) .ignored(), )) - .foldl( + .foldl_with( just(Token::Ctrl(';')).ignore_then(expr.or_not()).repeated(), - |a, b| { - // This allows creating a span that covers the entire Then expression. - // b_end is the end of b if it exists, otherwise it is the end of a. - let a_start = a.1.start; - let b_end = b.as_ref().map(|b| b.1.end).unwrap_or(a.1.end); + |a, b, e| { + let span: Span = e.span(); ( Expr::Then( Box::new(a), - // If there is no b expression then its span is empty. - Box::new(b.unwrap_or_else(|| { - (Expr::Value(Value::Null), (b_end..b_end).into()) - })), + // If there is no b expression then its span is the end of the statement/block. + Box::new( + b.unwrap_or_else(|| (Expr::Value(Value::Null), span.to_end())), + ), ), - (a_start..b_end).into(), + span, ) }, ) @@ -413,7 +412,7 @@ fn funcs_parser<'tokens, 'src: 'tokens>() -> impl Parser< HashMap<&'src str, Func<'src>>, extra::Err, Span>>, > + Clone { - let ident = select! { Token::Ident(ident) => ident.clone() }; + let ident = select! { Token::Ident(ident) => ident }; // Argument lists are just identifiers separated by commas, surrounded by parentheses let args = ident @@ -426,11 +425,11 @@ fn funcs_parser<'tokens, 'src: 'tokens>() -> impl Parser< let func = just(Token::Fn) .ignore_then( ident - .map_with_span(|name, span| (name, span)) + .map_with(|name, e| (name, e.span())) .labelled("function name"), ) .then(args) - .map_with_span(|start, span| (start, span)) + .map_with(|start, e| (start, e.span())) .then( expr_parser() .delimited_by(just(Token::Ctrl('{')), just(Token::Ctrl('}'))) @@ -453,7 +452,7 @@ fn funcs_parser<'tokens, 'src: 'tokens>() -> impl Parser< .validate(|fs, _, emitter| { let mut funcs = HashMap::new(); for ((name, name_span), f) in fs { - if funcs.insert(name.clone(), f).is_some() { + if funcs.insert(name, f).is_some() { emitter.emit(Rich::custom( name_span.clone(), format!("Function '{}' already exists", name), @@ -488,14 +487,14 @@ fn eval_expr<'src>( .rev() .find(|(l, _)| l == name) .map(|(_, v)| v.clone()) - .or_else(|| Some(Value::Func(name.clone())).filter(|_| funcs.contains_key(name))) + .or_else(|| Some(Value::Func(name)).filter(|_| funcs.contains_key(name))) .ok_or_else(|| Error { span: expr.1.clone(), msg: format!("No such variable '{}' in scope", name), })?, Expr::Let(local, val, body) => { let val = eval_expr(val, funcs, stack)?; - stack.push((local.clone(), val)); + stack.push((local, val)); let res = eval_expr(body, funcs, stack)?; stack.pop(); res @@ -531,16 +530,16 @@ fn eval_expr<'src>( match f { Value::Func(name) => { let f = &funcs[&name]; - let mut stack = if f.args.len() != args.len() { + let mut stack = if f.args.len() != args.0.len() { return Err(Error { span: expr.1.clone(), - msg: format!("'{}' called with wrong number of arguments (expected {}, found {})", name, f.args.len(), args.len()), + msg: format!("'{}' called with wrong number of arguments (expected {}, found {})", name, f.args.len(), args.0.len()), }); } else { f.args .iter() - .zip(args.iter()) - .map(|(name, arg)| Ok((name.clone(), eval_expr(arg, funcs, stack)?))) + .zip(args.0.iter()) + .map(|(name, arg)| Ok((*name, eval_expr(arg, funcs, stack)?))) .collect::>()? }; eval_expr(&f.body, funcs, &mut stack)? @@ -583,7 +582,7 @@ fn main() { let parse_errs = if let Some(tokens) = &tokens { let (ast, parse_errs) = funcs_parser() - .map_with_span(|ast, span| (ast, span)) + .map_with(|ast, e| (ast, e.span())) .parse(tokens.as_slice().spanned((src.len()..src.len()).into())) .into_output_errors(); diff --git a/src/either.rs b/src/either.rs index 20f59e15..a24f91fc 100644 --- a/src/either.rs +++ b/src/either.rs @@ -45,7 +45,8 @@ mod tests { .repeated() .at_least(1) .at_most(3) - .map_slice(|b: &str| b.parse::().unwrap()) + .to_slice() + .map(|b: &str| b.parse::().unwrap()) .padded() .separated_by(just(',').padded()) .allow_trailing() From c9582e5f32e515de7ea7c2de930fc36814e5578a Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Sun, 8 Oct 2023 22:58:20 +0100 Subject: [PATCH 25/64] Make clippy happy --- src/combinator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/combinator.rs b/src/combinator.rs index 54aa8a63..c712d14c 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -2287,7 +2287,7 @@ where where Self: Sized, { - let mut a_out = M::bind(|| Vec::new()); + let mut a_out = M::bind(Vec::new); let mut iter_state = self.parser_a.make_iter::(inp)?; loop { #[cfg(debug_assertions)] From 52d098cd0387dfa3cd7569819a9a2ef198c7d69e Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Sun, 8 Oct 2023 23:07:34 +0100 Subject: [PATCH 26/64] Fix docs --- guide/key_concepts.md | 8 ++++---- guide/meet_the_parsers.md | 8 +++----- guide/technical_notes.md | 6 +++--- src/combinator.rs | 2 +- src/extra.rs | 4 ++-- src/lib.rs | 4 ++-- 6 files changed, 15 insertions(+), 17 deletions(-) diff --git a/guide/key_concepts.md b/guide/key_concepts.md index 114204b4..b1fb2481 100644 --- a/guide/key_concepts.md +++ b/guide/key_concepts.md @@ -144,11 +144,11 @@ error types that have different tradeoffs: erroneously found - [`Rich`]: a very information-rich error type that records: - + - The span that triggered the error - + - The token that was erroneously found instead - + - A list of tokens or patterns that were expected at the span location instead [`Rich`] also supports many additional features such as custom error messages, labelling (see [`Parser::labelled`]) and @@ -172,4 +172,4 @@ the [`Span`] trait. Additionally, chumsky comes with a built-in span type, [`Sim implementations for types in Rust's standard library such as [`std::ops::Range`]. Chumsky will use its internal knowledge of your parser to generate spans for you whenever you need them, such as for -attaching to nodes of an abstract syntax tree. See [`Parser::map_with_span`] for more information. \ No newline at end of file +attaching to nodes of an abstract syntax tree. See [`Parser::map_with`] for more information. diff --git a/guide/meet_the_parsers.md b/guide/meet_the_parsers.md index a1c7e441..a563b869 100644 --- a/guide/meet_the_parsers.md +++ b/guide/meet_the_parsers.md @@ -78,10 +78,8 @@ Combinators that manipulate, generate, or combine the output of parsers in some | Name | Example | Description | |---------------------------------|---------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | [`Parser::map`] | `a.map(...)` | Map the output of a parser using the given mapping function. | -| [`Parser::map_with_span`] | `a.map_with_span(...)` | Map the output of a parser using the given mapping function, with access to the span that the output corresponds to. | -| [`Parser::map_with_state`] | `a.map_with_state(...)` | Map the output of a parser using the given mapping function, with access to the parser state (see [`Parser::parse_with_state`] for more information). | -| [`Parser::map_slice`] | `a.map_slice(...)` | Parse a pattern. Discard the output of the pattern and map a slice of the input that the pattern corresponds to. Requires input types that implement [`SliceInput`]. | -| [`Parser::slice`] | `a.slice()` | Parse a pattern. Discard the output of the pattern and instead use a slice of the input that the pattern corresponds to as the output. Requires inputs that implement [`SliceInput`]. | +| [`Parser::map_with`] | `a.map_with(...)` | Map the output of a parser using the given mapping function, with access to metadata associated with the output. | +| [`Parser::to_slice`] | `a.to_slice()` | Parse a pattern. Discard the output of the pattern and instead use a slice of the input that the pattern corresponds to as the output. Requires inputs that implement [`SliceInput`]. | | [`Parser::to`] | `a.to(x)` | Parse a pattern, ignoring the output value and using a constant value as the output value instead. | | [`Parser::ignored`] | `a.ignored()` | Parse a pattern, ignoring the output value and using [`()`] as the output value instead. | | [`IterParser::collect`] | `a.repeated().collect::>()` | Collects elements of an [`IterParser`] into a type implementing [`Container`]. | @@ -98,7 +96,7 @@ Combinators that manipulate or emit errors, along with fallibly validating parse | [`Parser::map_err`] | `a.map_err(...)` | Parse a pattern. On failure, map the parser error to another value. Often used to customise error messages or add extra information to them. | | [`Parser::map_err_with_state`] | `a.lazy()` | Like [`Parser::map_err`], but provides access to the parser state (see [`Parser::parse_with_state`] for more information). | | [`Parser::try_map`] | `a.try_map(...)` | Map the output of a parser using the given fallible mapping function. If the function produces an error, the parser fails with that error. | -| [`Parser::try_map_with_state`] | `a.try_map_with_state(...)` | Map the output of a parser using the given fallible mapping function, with access to the parser state. If the function produces an error, the parser fails with that error. | +| [`Parser::try_map_with`] | `a.try_map_with(...)` | | Map the output of a parser using the given fallible mapping function, with access to output metadata. If the function produces an error, the parser fails with that error. | | [`Parser::validate`] | `a.validate(...)` | Parse a pattern. On success, map the output to another value with the opportunity to emit extra secondary errors. Commonly used to check the validity of patterns in the parser. | | [`Parser::filter`] | `any().filter(char::is_lowercase)` | Parse a pattern and apply the given filtering function to the output. If the filter function returns [`false`], the parser fails. | | [`Parser::labelled`] | `a.labelled("a")` | Parse a pattern, labelling it. What exactly this does depends on the error type, but it is generally used to give a pattern a more general name (for example, "expression"). | diff --git a/guide/technical_notes.md b/guide/technical_notes.md index 261ab5ae..9a884974 100644 --- a/guide/technical_notes.md +++ b/guide/technical_notes.md @@ -34,6 +34,6 @@ For this reason, unless otherwise specified, any closures/functions used inline *semantically* [pure](https://en.wikipedia.org/wiki/Purely_functional_programming): that is, you should not assume that they are called any specific number of times. This does not mean that they are not permitted to have side effects, but that those side effects should be irrelevant to the correct functioning of the parser. For example, -[string interning](https://en.wikipedia.org/wiki/String_interning) within [`Parser::map_with_state`] is an impure -operation, but this impurity does not affect the correct functioning of the parser: interning a string that goes unused -can be done any number of times or not at all without resulting in bad behaviour. +[string interning](https://en.wikipedia.org/wiki/String_interning) within [`Parser::map_with`] is an impure operation, +but this impurity does not affect the correct functioning of the parser: interning a string that goes unused can be done +any number of times or not at all without resulting in bad behaviour. diff --git a/src/combinator.rs b/src/combinator.rs index c712d14c..79182573 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -185,7 +185,7 @@ where } } -/// See [`Parser::slice`] +/// See [`Parser::to_slice`] pub struct ToSlice { pub(crate) parser: A, #[allow(dead_code)] diff --git a/src/extra.rs b/src/extra.rs index 169139c8..e95cd890 100644 --- a/src/extra.rs +++ b/src/extra.rs @@ -24,7 +24,7 @@ where /// such as interned identifiers or position-dependent name resolution, however *cannot* influence /// the actual progress of the parser - for that, use [`Self::Context`]. /// - /// For examples of using this type, see [`Parser::map_with_state`] or [`Parser::foldl_with_state`]. + /// For examples of using this type, see [`Parser::map_with`] or [`Parser::foldl_with`]. type State: 'a; /// Context used for parser configuration. This is used to provide context-sensitive parsing of *input*. /// Context-sensitive parsing in chumsky is always left-hand sensitive - context for the parse must originate @@ -46,7 +46,7 @@ pub type Err = Full; /// Use `State` or `Full` as the `Extra` type parameter of a parser to use a custom state type. /// You can then use `parser().parse_with_state(&mut S)` to parse with a custom state. /// -/// See [`Parser::map_with_state`] for examples. +/// See [`Parser::map_with`] for examples. pub type State = Full; /// Use specified context type, but default other types. See [`ParserExtra`] for more details. diff --git a/src/lib.rs b/src/lib.rs index 88fcd46e..613c3b17 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -368,7 +368,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: } /// Parse a stream of tokens, yielding an output if possible, and any errors encountered along the way. - /// The provided state will be passed on to parsers that expect it, such as [`map_with_state`](Parser::map_with_state). + /// The provided state will be passed on to parsers that expect it, such as [`map_with`](Parser::map_with). /// /// If `None` is returned (i.e: parsing failed) then there will *always* be at least one item in the error `Vec`. /// If you want to just use a default state value, use [`Parser::parse`] instead. @@ -2246,7 +2246,7 @@ where /// /// Chumsky distinguishes 'state' from 'context'. State is not able to change what input a parser /// accepts, but may be used to change the contents of the type it emits. In this way state is expected -/// to be idempotent - combinators such as [`Parser::map_with_state`] are allowed to not call the +/// to be idempotent - combinators such as [`Parser::map_with`] are allowed to not call the /// provided closure at all if they don't emit any output. Context and configuration, on the other hand, /// is used to change what kind of input a parser may accept, and thus must always be evaluated. Context /// isn't usable in any map combinator however - while it may affect accepted input, it is not expected From ca4d85ae6b65ab26436323ae40ddf234c54aee6c Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 28 Aug 2023 19:04:33 +0100 Subject: [PATCH 27/64] Pratt parsing with Fn traits --- src/lib.rs | 11 ++ src/pratt2.rs | 296 +++++++++++++++++++++++++++++++++++++++++++++++ src/primitive.rs | 5 - 3 files changed, 307 insertions(+), 5 deletions(-) create mode 100644 src/pratt2.rs diff --git a/src/lib.rs b/src/lib.rs index 613c3b17..b25fa355 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -88,6 +88,8 @@ pub mod label; pub mod number; #[cfg(feature = "pratt")] pub mod pratt; +#[cfg(feature = "pratt")] +pub mod pratt2; pub mod primitive; mod private; pub mod recovery; @@ -2225,6 +2227,15 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: _phantom: EmptyPhantom::new(), } } + + /// TODO + #[cfg(feature = "pratt")] + fn pratt2(self, ops: Ops) -> pratt2::Pratt + where + Self: Sized, + { + pratt2::Pratt { atom: self, ops } + } } #[cfg(feature = "nightly")] diff --git a/src/pratt2.rs b/src/pratt2.rs new file mode 100644 index 00000000..9a517039 --- /dev/null +++ b/src/pratt2.rs @@ -0,0 +1,296 @@ +#![allow(missing_docs)] + +use super::*; + +trait Operator<'a, I, O, E> +where + I: Input<'a>, + E: ParserExtra<'a, I>, +{ + type Op; + type OpParser: Parser<'a, I, Self::Op, E>; + const INFIX: bool = false; + const PREFIX: bool = false; + + fn op_parser(&self) -> &Self::OpParser; + fn associativity(&self) -> Associativity; + fn fold_infix(&self, lhs: O, op: Self::Op, rhs: O) -> O { + unreachable!() + } + fn fold_prefix(&self, op: Self::Op, rhs: O) -> O { + unreachable!() + } +} + +#[derive(Copy, Clone, PartialEq)] +pub enum Associativity { + Left(u16), + Right(u16), +} + +impl Associativity { + fn left_power(&self) -> u16 { + match self { + Self::Left(x) => x * 2, + Self::Right(x) => x * 2 + 1, + } + } + + fn right_power(&self) -> u16 { + match self { + Self::Left(x) => x * 2 + 1, + Self::Right(x) => x * 2, + } + } +} + +pub struct Infix { + op_parser: A, + f: F, + associativity: Associativity, + #[allow(dead_code)] + phantom: EmptyPhantom<(Op, Args)>, +} + +pub fn left(binding_power: u16) -> Associativity { + Associativity::Left(binding_power) +} +pub fn right(binding_power: u16) -> Associativity { + Associativity::Right(binding_power) +} + +pub const fn binary( + associativity: Associativity, + op_parser: A, + f: F, +) -> Infix { + Infix { + op_parser, + f, + associativity, + phantom: EmptyPhantom::new(), + } +} + +impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Infix +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn(O, O) -> O, +{ + type Op = Op; + type OpParser = A; + const INFIX: bool = true; + + fn op_parser(&self) -> &Self::OpParser { + &self.op_parser + } + fn associativity(&self) -> Associativity { + self.associativity + } + fn fold_infix(&self, lhs: O, _op: Self::Op, rhs: O) -> O { + (self.f)(lhs, rhs) + } +} + +impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Infix +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn(O, Op, O) -> O, +{ + type Op = Op; + type OpParser = A; + const INFIX: bool = true; + + fn op_parser(&self) -> &Self::OpParser { + &self.op_parser + } + fn associativity(&self) -> Associativity { + self.associativity + } + fn fold_infix(&self, lhs: O, op: Self::Op, rhs: O) -> O { + (self.f)(lhs, op, rhs) + } +} + +pub struct Prefix { + op_parser: A, + f: F, + binding_power: u16, + #[allow(dead_code)] + phantom: EmptyPhantom<(Op, Args)>, +} + +pub const fn unary_prefix( + binding_power: u16, + op_parser: A, + f: F, +) -> Prefix { + Prefix { + op_parser, + f, + binding_power, + phantom: EmptyPhantom::new(), + } +} + +impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn(Op, O) -> O, +{ + type Op = Op; + type OpParser = A; + const PREFIX: bool = true; + + fn op_parser(&self) -> &Self::OpParser { + &self.op_parser + } + fn associativity(&self) -> Associativity { + Associativity::Left(self.binding_power) + } + fn fold_prefix(&self, op: Self::Op, rhs: O) -> O { + (self.f)(op, rhs) + } +} + +pub struct Pratt { + pub(crate) atom: Atom, + pub(crate) ops: Ops, +} + +macro_rules! impl_pratt_for_tuple { + () => {}; + ($head:ident $($X:ident)*) => { + impl_pratt_for_tuple!($($X)*); + impl_pratt_for_tuple!(~ $head $($X)*); + }; + (~ $($X:ident)+) => { + #[allow(unused_variables, non_snake_case)] + impl<'a, Atom, $($X),*> Pratt { + fn pratt_go(&self, inp: &mut InputRef<'a, '_, I, E>, min_power: u16) -> PResult + where + I: Input<'a>, + E: ParserExtra<'a, I>, + Atom: Parser<'a, I, O, E>, + $($X: Operator<'a, I, O, E>),* + { + let pre_op = inp.save(); + let mut lhs = 'choice: { + let ($($X,)*) = &self.ops; + + $( + if $X::PREFIX { + match $X.op_parser().go::(inp) { + Ok(op) => { + match self.pratt_go::(inp, $X.associativity().right_power()) { + Ok(rhs) => break 'choice M::combine(op, rhs, |op, rhs| $X.fold_prefix(op, rhs)), + Err(()) => inp.rewind(pre_op), + } + }, + Err(()) => inp.rewind(pre_op), + } + } + )* + + self.atom.go::(inp)? + }; + + 'luup: loop { + enum Op<$($X),*> { $($X($X)),* } + + let ($($X,)*) = &self.ops; + + let pre_op = inp.save(); + let op = 'choice: { + $( + if $X::INFIX { + match $X.op_parser().go::(inp) { + Ok(out) => break 'choice Op::$X(out), + Err(()) => inp.rewind(pre_op), + } + } + )* + + inp.rewind(pre_op); + break 'luup; + }; + + match op { + $( + Op::$X(op) => { + let assoc = $X.associativity(); + if assoc.left_power() < min_power { + inp.rewind(pre_op); + break + } else { + lhs = match self.pratt_go::(inp, assoc.right_power()) { + Ok(rhs) => M::combine( + M::combine(lhs, rhs, |lhs, rhs| (lhs, rhs)), + op, + |(lhs, rhs), op| $X.fold_infix(lhs, op, rhs), + ), + Err(()) => { inp.rewind(pre_op); break }, + } + } + }, + )* + } + } + + Ok(lhs) + } + } + + #[allow(unused_variables, non_snake_case)] + impl<'a, I, O, E, Atom, $($X),*> ParserSealed<'a, I, O, E> for Pratt + where + I: Input<'a>, + E: ParserExtra<'a, I>, + Atom: Parser<'a, I, O, E>, + $($X: Operator<'a, I, O, E>),* + { + fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { + self.pratt_go::(inp, 0) + } + + go_extra!(O); + } + }; +} + +impl_pratt_for_tuple!(A_ B_ C_ D_ E_ F_ G_ H_ I_ J_ K_ L_ M_ N_ O_ P_ Q_ R_ S_ T_ U_ V_ W_ X_ Y_ Z_); + +#[cfg(test)] +mod tests { + use super::*; + + fn parser<'a>() -> impl Parser<'a, &'a str, i64> { + let atom = text::int(10).padded().from_str::().unwrapped(); + + atom.pratt2(( + unary_prefix(2, just('-'), |_, x: i64| -x), + binary(left(0), just('+'), |l, r| l + r), + binary(left(0), just('-'), |l, r| l - r), + binary(left(1), just('*'), |l, r| l * r), + binary(left(1), just('/'), |l, _, r| l / r), + )) + } + + #[test] + fn precedence() { + assert_eq!(parser().parse("1 + 2 * 3").into_result(), Ok(7)); + assert_eq!(parser().parse("2 * 3 + 1").into_result(), Ok(7)); + } + + #[test] + fn unary() { + assert_eq!(parser().parse("-2").into_result(), Ok(-2)); + assert_eq!(parser().parse("-2 + 2").into_result(), Ok(0)); + } +} diff --git a/src/primitive.rs b/src/primitive.rs index 0219cbcc..67113894 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -84,11 +84,6 @@ where go_extra!(()); } -// impl<'b, T, C: Container> Container for &'b C { -// type Iter<'a> = C::Iter<'a>; -// fn iter(&self) -> Self::Iter<'_> { (*self).iter() } -// } - /// Configuration for [`just`], used in [`ConfigParser::configure`] pub struct JustCfg { seq: Option, From 69fe8a3ca0ad4953b0b71825ba6e4ea3a293f99e Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 28 Aug 2023 21:23:09 +0100 Subject: [PATCH 28/64] Added support for spans --- src/pratt2.rs | 70 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/src/pratt2.rs b/src/pratt2.rs index 9a517039..188e6400 100644 --- a/src/pratt2.rs +++ b/src/pratt2.rs @@ -14,10 +14,10 @@ where fn op_parser(&self) -> &Self::OpParser; fn associativity(&self) -> Associativity; - fn fold_infix(&self, lhs: O, op: Self::Op, rhs: O) -> O { + fn fold_infix(&self, lhs: O, op: Self::Op, rhs: O, span: I::Span) -> O { unreachable!() } - fn fold_prefix(&self, op: Self::Op, rhs: O) -> O { + fn fold_prefix(&self, op: Self::Op, rhs: O, span: I::Span) -> O { unreachable!() } } @@ -89,7 +89,7 @@ where fn associativity(&self) -> Associativity { self.associativity } - fn fold_infix(&self, lhs: O, _op: Self::Op, rhs: O) -> O { + fn fold_infix(&self, lhs: O, _op: Self::Op, rhs: O, _span: I::Span) -> O { (self.f)(lhs, rhs) } } @@ -111,11 +111,33 @@ where fn associativity(&self) -> Associativity { self.associativity } - fn fold_infix(&self, lhs: O, op: Self::Op, rhs: O) -> O { + fn fold_infix(&self, lhs: O, op: Self::Op, rhs: O, _span: I::Span) -> O { (self.f)(lhs, op, rhs) } } +impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Infix +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn(O, Op, O, I::Span) -> O, +{ + type Op = Op; + type OpParser = A; + const INFIX: bool = true; + + fn op_parser(&self) -> &Self::OpParser { + &self.op_parser + } + fn associativity(&self) -> Associativity { + self.associativity + } + fn fold_infix(&self, lhs: O, op: Self::Op, rhs: O, span: I::Span) -> O { + (self.f)(lhs, op, rhs, span) + } +} + pub struct Prefix { op_parser: A, f: F, @@ -154,11 +176,33 @@ where fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } - fn fold_prefix(&self, op: Self::Op, rhs: O) -> O { + fn fold_prefix(&self, op: Self::Op, rhs: O, _span: I::Span) -> O { (self.f)(op, rhs) } } +impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn(Op, O, I::Span) -> O, +{ + type Op = Op; + type OpParser = A; + const PREFIX: bool = true; + + fn op_parser(&self) -> &Self::OpParser { + &self.op_parser + } + fn associativity(&self) -> Associativity { + Associativity::Left(self.binding_power) + } + fn fold_prefix(&self, op: Self::Op, rhs: O, span: I::Span) -> O { + (self.f)(op, rhs, span) + } +} + pub struct Pratt { pub(crate) atom: Atom, pub(crate) ops: Ops, @@ -180,7 +224,7 @@ macro_rules! impl_pratt_for_tuple { Atom: Parser<'a, I, O, E>, $($X: Operator<'a, I, O, E>),* { - let pre_op = inp.save(); + let pre_expr = inp.save(); let mut lhs = 'choice: { let ($($X,)*) = &self.ops; @@ -189,11 +233,14 @@ macro_rules! impl_pratt_for_tuple { match $X.op_parser().go::(inp) { Ok(op) => { match self.pratt_go::(inp, $X.associativity().right_power()) { - Ok(rhs) => break 'choice M::combine(op, rhs, |op, rhs| $X.fold_prefix(op, rhs)), - Err(()) => inp.rewind(pre_op), + Ok(rhs) => break 'choice M::combine(op, rhs, |op, rhs| { + let span = inp.span_since(pre_expr.offset()); + $X.fold_prefix(op, rhs, span) + }), + Err(()) => inp.rewind(pre_expr), } }, - Err(()) => inp.rewind(pre_op), + Err(()) => inp.rewind(pre_expr), } } )* @@ -233,7 +280,10 @@ macro_rules! impl_pratt_for_tuple { Ok(rhs) => M::combine( M::combine(lhs, rhs, |lhs, rhs| (lhs, rhs)), op, - |(lhs, rhs), op| $X.fold_infix(lhs, op, rhs), + |(lhs, rhs), op| { + let span = inp.span_since(pre_expr.offset()); + $X.fold_infix(lhs, op, rhs, span) + }, ), Err(()) => { inp.rewind(pre_op); break }, } From 189a78218fee66194e06606437f5fd56ef00d523 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Tue, 29 Aug 2023 14:41:09 +0100 Subject: [PATCH 29/64] Simplified logic --- src/pratt2.rs | 56 +++++++++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/src/pratt2.rs b/src/pratt2.rs index 188e6400..17659e68 100644 --- a/src/pratt2.rs +++ b/src/pratt2.rs @@ -232,7 +232,7 @@ macro_rules! impl_pratt_for_tuple { if $X::PREFIX { match $X.op_parser().go::(inp) { Ok(op) => { - match self.pratt_go::(inp, $X.associativity().right_power()) { + match self.pratt_go::(inp, $X.associativity().left_power()) { Ok(rhs) => break 'choice M::combine(op, rhs, |op, rhs| { let span = inp.span_since(pre_expr.offset()); $X.fold_prefix(op, rhs, span) @@ -248,49 +248,35 @@ macro_rules! impl_pratt_for_tuple { self.atom.go::(inp)? }; - 'luup: loop { - enum Op<$($X),*> { $($X($X)),* } - + loop { let ($($X,)*) = &self.ops; let pre_op = inp.save(); - let op = 'choice: { - $( - if $X::INFIX { - match $X.op_parser().go::(inp) { - Ok(out) => break 'choice Op::$X(out), - Err(()) => inp.rewind(pre_op), - } - } - )* - - inp.rewind(pre_op); - break 'luup; - }; - - match op { - $( - Op::$X(op) => { - let assoc = $X.associativity(); - if assoc.left_power() < min_power { - inp.rewind(pre_op); - break - } else { - lhs = match self.pratt_go::(inp, assoc.right_power()) { - Ok(rhs) => M::combine( + $( + let assoc = $X.associativity(); + if $X::INFIX && assoc.left_power() >= min_power { + match $X.op_parser().go::(inp) { + Ok(op) => match self.pratt_go::(inp, assoc.right_power()) { + Ok(rhs) => { + lhs = M::combine( M::combine(lhs, rhs, |lhs, rhs| (lhs, rhs)), op, |(lhs, rhs), op| { let span = inp.span_since(pre_expr.offset()); $X.fold_infix(lhs, op, rhs, span) }, - ), - Err(()) => { inp.rewind(pre_op); break }, - } - } - }, - )* - } + ); + continue + }, + Err(()) => inp.rewind(pre_op), + }, + Err(()) => inp.rewind(pre_op), + } + } + )* + + inp.rewind(pre_op); + break; } Ok(lhs) From b9468aaeafa6925b30c140022450cbc9809f0d89 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Tue, 29 Aug 2023 15:09:24 +0100 Subject: [PATCH 30/64] Added support for postfix operators --- src/pratt2.rs | 154 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 5 deletions(-) diff --git a/src/pratt2.rs b/src/pratt2.rs index 17659e68..8cad6b6e 100644 --- a/src/pratt2.rs +++ b/src/pratt2.rs @@ -11,6 +11,7 @@ where type OpParser: Parser<'a, I, Self::Op, E>; const INFIX: bool = false; const PREFIX: bool = false; + const POSTFIX: bool = false; fn op_parser(&self) -> &Self::OpParser; fn associativity(&self) -> Associativity; @@ -20,6 +21,9 @@ where fn fold_prefix(&self, op: Self::Op, rhs: O, span: I::Span) -> O { unreachable!() } + fn fold_postfix(&self, lhs: O, op: Self::Op, span: I::Span) -> O { + unreachable!() + } } #[derive(Copy, Clone, PartialEq)] @@ -146,7 +150,7 @@ pub struct Prefix { phantom: EmptyPhantom<(Op, Args)>, } -pub const fn unary_prefix( +pub const fn prefix( binding_power: u16, op_parser: A, f: F, @@ -159,6 +163,28 @@ pub const fn unary_prefix( } } +impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn(O) -> O, +{ + type Op = Op; + type OpParser = A; + const PREFIX: bool = true; + + fn op_parser(&self) -> &Self::OpParser { + &self.op_parser + } + fn associativity(&self) -> Associativity { + Associativity::Left(self.binding_power) + } + fn fold_prefix(&self, _op: Self::Op, rhs: O, _span: I::Span) -> O { + (self.f)(rhs) + } +} + impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix where I: Input<'a>, @@ -203,6 +229,93 @@ where } } +pub struct Postfix { + op_parser: A, + f: F, + binding_power: u16, + #[allow(dead_code)] + phantom: EmptyPhantom<(Op, Args)>, +} + +pub const fn postfix( + binding_power: u16, + op_parser: A, + f: F, +) -> Postfix { + Postfix { + op_parser, + f, + binding_power, + phantom: EmptyPhantom::new(), + } +} + +impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Postfix +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn(O) -> O, +{ + type Op = Op; + type OpParser = A; + const POSTFIX: bool = true; + + fn op_parser(&self) -> &Self::OpParser { + &self.op_parser + } + fn associativity(&self) -> Associativity { + Associativity::Left(self.binding_power) + } + fn fold_postfix(&self, lhs: O, _op: Self::Op, _span: I::Span) -> O { + (self.f)(lhs) + } +} + +impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Postfix +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn(O, Op) -> O, +{ + type Op = Op; + type OpParser = A; + const POSTFIX: bool = true; + + fn op_parser(&self) -> &Self::OpParser { + &self.op_parser + } + fn associativity(&self) -> Associativity { + Associativity::Left(self.binding_power) + } + fn fold_postfix(&self, lhs: O, op: Self::Op, _span: I::Span) -> O { + (self.f)(lhs, op) + } +} + +impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Postfix +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn(O, Op, I::Span) -> O, +{ + type Op = Op; + type OpParser = A; + const POSTFIX: bool = true; + + fn op_parser(&self) -> &Self::OpParser { + &self.op_parser + } + fn associativity(&self) -> Associativity { + Associativity::Left(self.binding_power) + } + fn fold_postfix(&self, lhs: O, op: Self::Op, span: I::Span) -> O { + (self.f)(lhs, op, span) + } +} + pub struct Pratt { pub(crate) atom: Atom, pub(crate) ops: Ops, @@ -228,6 +341,7 @@ macro_rules! impl_pratt_for_tuple { let mut lhs = 'choice: { let ($($X,)*) = &self.ops; + // Prefix unary operators $( if $X::PREFIX { match $X.op_parser().go::(inp) { @@ -252,7 +366,26 @@ macro_rules! impl_pratt_for_tuple { let ($($X,)*) = &self.ops; let pre_op = inp.save(); - $( + + // Postfix unary operators + $( + let assoc = $X.associativity(); + if $X::POSTFIX && assoc.right_power() >= min_power { + match $X.op_parser().go::(inp) { + Ok(op) => { + lhs = M::combine(lhs, op, |lhs, op| { + let span = inp.span_since(pre_expr.offset()); + $X.fold_postfix(lhs, op, span) + }); + continue + }, + Err(()) => inp.rewind(pre_op), + } + } + )* + + // Infix binary operators + $( let assoc = $X.associativity(); if $X::INFIX && assoc.left_power() >= min_power { match $X.op_parser().go::(inp) { @@ -306,11 +439,20 @@ impl_pratt_for_tuple!(A_ B_ C_ D_ E_ F_ G_ H_ I_ J_ K_ L_ M_ N_ O_ P_ Q_ R_ S_ T mod tests { use super::*; + fn factorial(x: i64) -> i64 { + if x == 0 { + 1 + } else { + x * factorial(x - 1) + } + } + fn parser<'a>() -> impl Parser<'a, &'a str, i64> { let atom = text::int(10).padded().from_str::().unwrapped(); atom.pratt2(( - unary_prefix(2, just('-'), |_, x: i64| -x), + prefix(2, just('-'), |_, x: i64| -x), + postfix(2, just('!'), factorial), binary(left(0), just('+'), |l, r| l + r), binary(left(0), just('-'), |l, r| l - r), binary(left(1), just('*'), |l, r| l * r), @@ -320,13 +462,15 @@ mod tests { #[test] fn precedence() { - assert_eq!(parser().parse("1 + 2 * 3").into_result(), Ok(7)); - assert_eq!(parser().parse("2 * 3 + 1").into_result(), Ok(7)); + assert_eq!(parser().parse("2 + 3 * 4").into_result(), Ok(14)); + assert_eq!(parser().parse("2 * 3 + 4").into_result(), Ok(10)); } #[test] fn unary() { assert_eq!(parser().parse("-2").into_result(), Ok(-2)); + assert_eq!(parser().parse("4!").into_result(), Ok(24)); + assert_eq!(parser().parse("2 + 4!").into_result(), Ok(26)); assert_eq!(parser().parse("-2 + 2").into_result(), Ok(0)); } } From 75574a2900a68033da045503eed06efd70d6eb68 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Tue, 29 Aug 2023 15:21:08 +0100 Subject: [PATCH 31/64] Clippy fixes --- src/pratt2.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pratt2.rs b/src/pratt2.rs index 8cad6b6e..a0c70747 100644 --- a/src/pratt2.rs +++ b/src/pratt2.rs @@ -15,13 +15,13 @@ where fn op_parser(&self) -> &Self::OpParser; fn associativity(&self) -> Associativity; - fn fold_infix(&self, lhs: O, op: Self::Op, rhs: O, span: I::Span) -> O { + fn fold_infix(&self, _lhs: O, _op: Self::Op, _rhs: O, _span: I::Span) -> O { unreachable!() } - fn fold_prefix(&self, op: Self::Op, rhs: O, span: I::Span) -> O { + fn fold_prefix(&self, _op: Self::Op, _rhs: O, _span: I::Span) -> O { unreachable!() } - fn fold_postfix(&self, lhs: O, op: Self::Op, span: I::Span) -> O { + fn fold_postfix(&self, _lhs: O, _op: Self::Op, _span: I::Span) -> O { unreachable!() } } @@ -451,7 +451,7 @@ mod tests { let atom = text::int(10).padded().from_str::().unwrapped(); atom.pratt2(( - prefix(2, just('-'), |_, x: i64| -x), + prefix(2, just('-'), |x: i64| -x), postfix(2, just('!'), factorial), binary(left(0), just('+'), |l, r| l + r), binary(left(0), just('-'), |l, r| l - r), From e99ab6ac92fe4b05a9cbc35566366851961f3137 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 30 Aug 2023 15:44:03 +0100 Subject: [PATCH 32/64] Used macros to simplify operator closure impls --- src/pratt2.rs | 268 ++++++++++++++------------------------------------ 1 file changed, 74 insertions(+), 194 deletions(-) diff --git a/src/pratt2.rs b/src/pratt2.rs index a0c70747..b261fde8 100644 --- a/src/pratt2.rs +++ b/src/pratt2.rs @@ -63,7 +63,7 @@ pub fn right(binding_power: u16) -> Associativity { Associativity::Right(binding_power) } -pub const fn binary( +pub const fn infix( associativity: Associativity, op_parser: A, f: F, @@ -76,71 +76,31 @@ pub const fn binary( } } -impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Infix -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn(O, O) -> O, -{ - type Op = Op; - type OpParser = A; - const INFIX: bool = true; - - fn op_parser(&self) -> &Self::OpParser { - &self.op_parser - } - fn associativity(&self) -> Associativity { - self.associativity - } - fn fold_infix(&self, lhs: O, _op: Self::Op, rhs: O, _span: I::Span) -> O { - (self.f)(lhs, rhs) - } -} - -impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Infix -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn(O, Op, O) -> O, -{ - type Op = Op; - type OpParser = A; - const INFIX: bool = true; - - fn op_parser(&self) -> &Self::OpParser { - &self.op_parser - } - fn associativity(&self) -> Associativity { - self.associativity - } - fn fold_infix(&self, lhs: O, op: Self::Op, rhs: O, _span: I::Span) -> O { - (self.f)(lhs, op, rhs) - } +macro_rules! infix_op { + (|$f:ident : Fn($($Arg:ty),*) -> O, $lhs:ident, $op:ident, $rhs:ident, $span:ident| $invoke:expr) => { + impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Infix + where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn($($Arg),*) -> O, + { + type Op = Op; + type OpParser = A; + const INFIX: bool = true; + fn op_parser(&self) -> &Self::OpParser { &self.op_parser } + fn associativity(&self) -> Associativity { self.associativity } + fn fold_infix(&self, $lhs: O, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.f; $invoke } + } + }; } -impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Infix -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn(O, Op, O, I::Span) -> O, -{ - type Op = Op; - type OpParser = A; - const INFIX: bool = true; - - fn op_parser(&self) -> &Self::OpParser { - &self.op_parser - } - fn associativity(&self) -> Associativity { - self.associativity - } - fn fold_infix(&self, lhs: O, op: Self::Op, rhs: O, span: I::Span) -> O { - (self.f)(lhs, op, rhs, span) - } -} +// Allow `|lhs, rhs| ` to be used as a fold closure for infix operators +infix_op!(|f: Fn(O, O) -> O, lhs, _op, rhs, _span| f(lhs, rhs)); +// Allow `|lhs, op, rhs| ` to be used as a fold closure for infix operators +infix_op!(|f: Fn(O, Op, O) -> O, lhs, op, rhs, _span| f(lhs, op, rhs)); +// Allow `|lhs, op, rhs, span| ` to be used as a fold closure for infix operators +infix_op!(|f: Fn(O, Op, O, I::Span) -> O, lhs, op, rhs, span| f(lhs, op, rhs, span)); pub struct Prefix { op_parser: A, @@ -163,71 +123,31 @@ pub const fn prefix( } } -impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn(O) -> O, -{ - type Op = Op; - type OpParser = A; - const PREFIX: bool = true; - - fn op_parser(&self) -> &Self::OpParser { - &self.op_parser - } - fn associativity(&self) -> Associativity { - Associativity::Left(self.binding_power) - } - fn fold_prefix(&self, _op: Self::Op, rhs: O, _span: I::Span) -> O { - (self.f)(rhs) - } -} - -impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn(Op, O) -> O, -{ - type Op = Op; - type OpParser = A; - const PREFIX: bool = true; - - fn op_parser(&self) -> &Self::OpParser { - &self.op_parser - } - fn associativity(&self) -> Associativity { - Associativity::Left(self.binding_power) - } - fn fold_prefix(&self, op: Self::Op, rhs: O, _span: I::Span) -> O { - (self.f)(op, rhs) - } +macro_rules! prefix_op { + (|$f:ident : Fn($($Arg:ty),*) -> O, $op:ident, $rhs:ident, $span:ident| $invoke:expr) => { + impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix + where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn($($Arg),*) -> O, + { + type Op = Op; + type OpParser = A; + const PREFIX: bool = true; + fn op_parser(&self) -> &Self::OpParser { &self.op_parser } + fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } + fn fold_prefix(&self, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.f; $invoke } + } + }; } -impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn(Op, O, I::Span) -> O, -{ - type Op = Op; - type OpParser = A; - const PREFIX: bool = true; - - fn op_parser(&self) -> &Self::OpParser { - &self.op_parser - } - fn associativity(&self) -> Associativity { - Associativity::Left(self.binding_power) - } - fn fold_prefix(&self, op: Self::Op, rhs: O, span: I::Span) -> O { - (self.f)(op, rhs, span) - } -} +// Allow `|rhs| ` to be used as a fold closure for prefix operators +prefix_op!(|f: Fn(O) -> O, _op, rhs, _span| f(rhs)); +// Allow `|op, rhs| ` to be used as a fold closure for prefix operators +prefix_op!(|f: Fn(Op, O) -> O, op, rhs, _span| f(op, rhs)); +// Allow `|op, rhs, span| ` to be used as a fold closure for prefix operators +prefix_op!(|f: Fn(Op, O, I::Span) -> O, op, rhs, span| f(op, rhs, span)); pub struct Postfix { op_parser: A, @@ -250,71 +170,31 @@ pub const fn postfix( } } -impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Postfix -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn(O) -> O, -{ - type Op = Op; - type OpParser = A; - const POSTFIX: bool = true; - - fn op_parser(&self) -> &Self::OpParser { - &self.op_parser - } - fn associativity(&self) -> Associativity { - Associativity::Left(self.binding_power) - } - fn fold_postfix(&self, lhs: O, _op: Self::Op, _span: I::Span) -> O { - (self.f)(lhs) - } -} - -impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Postfix -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn(O, Op) -> O, -{ - type Op = Op; - type OpParser = A; - const POSTFIX: bool = true; - - fn op_parser(&self) -> &Self::OpParser { - &self.op_parser - } - fn associativity(&self) -> Associativity { - Associativity::Left(self.binding_power) - } - fn fold_postfix(&self, lhs: O, op: Self::Op, _span: I::Span) -> O { - (self.f)(lhs, op) - } +macro_rules! postfix_op { + (|$f:ident : Fn($($Arg:ty),*) -> O, $lhs:ident, $op:ident, $span:ident| $invoke:expr) => { + impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Postfix + where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn($($Arg),*) -> O, + { + type Op = Op; + type OpParser = A; + const POSTFIX: bool = true; + fn op_parser(&self) -> &Self::OpParser { &self.op_parser } + fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } + fn fold_postfix(&self, $lhs: O, $op: Self::Op, $span: I::Span) -> O { let $f = &self.f; $invoke } + } + }; } -impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Postfix -where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn(O, Op, I::Span) -> O, -{ - type Op = Op; - type OpParser = A; - const POSTFIX: bool = true; - - fn op_parser(&self) -> &Self::OpParser { - &self.op_parser - } - fn associativity(&self) -> Associativity { - Associativity::Left(self.binding_power) - } - fn fold_postfix(&self, lhs: O, op: Self::Op, span: I::Span) -> O { - (self.f)(lhs, op, span) - } -} +// Allow `|lhs| ` to be used as a fold closure for postfix operators +postfix_op!(|f: Fn(O) -> O, lhs, _op, _span| f(lhs)); +// Allow `|lhs, op| ` to be used as a fold closure for postfix operators +postfix_op!(|f: Fn(O, Op) -> O, lhs, op, _span| f(lhs, op)); +// Allow `|lhs, op, span| ` to be used as a fold closure for postfix operators +postfix_op!(|f: Fn(O, Op, I::Span) -> O, lhs, op, span| f(lhs, op, span)); pub struct Pratt { pub(crate) atom: Atom, @@ -453,10 +333,10 @@ mod tests { atom.pratt2(( prefix(2, just('-'), |x: i64| -x), postfix(2, just('!'), factorial), - binary(left(0), just('+'), |l, r| l + r), - binary(left(0), just('-'), |l, r| l - r), - binary(left(1), just('*'), |l, r| l * r), - binary(left(1), just('/'), |l, _, r| l / r), + infix(left(0), just('+'), |l, r| l + r), + infix(left(0), just('-'), |l, r| l - r), + infix(left(1), just('*'), |l, r| l * r), + infix(left(1), just('/'), |l, _, r| l / r), )) } From 512faf5609255b4a00dfa445c7a0fa8cece08f36 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 30 Aug 2023 18:34:10 +0100 Subject: [PATCH 33/64] Ported old tests --- src/pratt2.rs | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) diff --git a/src/pratt2.rs b/src/pratt2.rs index b261fde8..fed9102f 100644 --- a/src/pratt2.rs +++ b/src/pratt2.rs @@ -318,6 +318,7 @@ impl_pratt_for_tuple!(A_ B_ C_ D_ E_ F_ G_ H_ I_ J_ K_ L_ M_ N_ O_ P_ Q_ R_ S_ T #[cfg(test)] mod tests { use super::*; + use crate::{extra::Err, prelude::*}; fn factorial(x: i64) -> i64 { if x == 0 { @@ -353,4 +354,209 @@ mod tests { assert_eq!(parser().parse("2 + 4!").into_result(), Ok(26)); assert_eq!(parser().parse("-2 + 2").into_result(), Ok(0)); } + + enum Expr { + Literal(i64), + Not(Box), + Negate(Box), + Confusion(Box), + Factorial(Box), + Value(Box), + Add(Box, Box), + Sub(Box, Box), + Mul(Box, Box), + Div(Box, Box), + } + + impl std::fmt::Display for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Literal(literal) => write!(f, "{literal}"), + Self::Not(right) => write!(f, "(~{right})"), + Self::Negate(right) => write!(f, "(-{right})"), + Self::Confusion(right) => write!(f, "(Β§{right})"), + Self::Factorial(right) => write!(f, "({right}!)"), + Self::Value(right) => write!(f, "({right}$)"), + Self::Add(left, right) => write!(f, "({left} + {right})"), + Self::Sub(left, right) => write!(f, "({left} - {right})"), + Self::Mul(left, right) => write!(f, "({left} * {right})"), + Self::Div(left, right) => write!(f, "({left} / {right})"), + } + } + } + + fn u(e: fn(Box) -> Expr, r: Expr) -> Expr { + e(Box::new(r)) + } + fn i(e: fn(Box, Box) -> Expr, l: Expr, r: Expr) -> Expr { + e(Box::new(l), Box::new(r)) + } + + fn expr_parser<'a>() -> impl Parser<'a, &'a str, String, Err>> { + let atom = text::int(10).from_str().unwrapped().map(Expr::Literal); + + atom.pratt2(( + infix(left(0), just('+'), |l, r| i(Expr::Add, l, r)), + infix(left(0), just('-'), |l, r| i(Expr::Sub, l, r)), + infix(right(1), just('*'), |l, r| i(Expr::Mul, l, r)), + infix(right(1), just('/'), |l, r| i(Expr::Div, l, r)), + )) + .map(|x| x.to_string()) + } + + fn complete_parser<'a>() -> impl Parser<'a, &'a str, String, Err>> { + expr_parser().then_ignore(end()) + } + + fn parse(input: &str) -> ParseResult> { + complete_parser().parse(input) + } + + fn parse_partial(input: &str) -> ParseResult> { + expr_parser().lazy().parse(input) + } + + fn unexpected<'a, C: Into>>, S: Into>( + c: C, + span: S, + ) -> Simple<'a, char> { + as Error<'_, &'_ str>>::expected_found(None, c.into(), span.into()) + } + + #[test] + fn missing_first_expression() { + assert_eq!(parse("").into_result(), Err(vec![unexpected(None, 0..0)])) + } + + #[test] + fn missing_later_expression() { + assert_eq!(parse("1+").into_result(), Err(vec![unexpected(None, 2..2)]),); + } + + #[test] + fn invalid_first_expression() { + assert_eq!( + parse("?").into_result(), + Err(vec![unexpected(Some('?'.into()), 0..1)]), + ); + } + + #[test] + fn invalid_later_expression() { + assert_eq!( + parse("1+?").into_result(), + Err(vec![dbg!(unexpected(Some('?'.into()), 2..3))]), + ); + } + + #[test] + fn invalid_operator() { + assert_eq!( + parse("1?").into_result(), + Err(vec![unexpected(Some('?'.into()), 1..2)]), + ); + } + + #[test] + fn invalid_operator_incomplete() { + assert_eq!(parse_partial("1?").into_result(), Ok("1".to_string()),); + } + + #[test] + fn complex_nesting() { + assert_eq!( + parse_partial("1+2*3/4*5-6*7+8-9+10").into_result(), + Ok("(((((1 + (2 * (3 / (4 * 5)))) - (6 * 7)) + 8) - 9) + 10)".to_string()), + ); + } + + #[test] + fn with_prefix_ops() { + let atom = text::int::<_, _, Err>>(10) + .from_str() + .unwrapped() + .map(Expr::Literal); + + let parser = atom + .pratt2(( + // -- Prefix + // Because we defined '*' and '/' as right associative operators, + // in order to get these to function as expected, their strength + // must be higher + prefix(2, just('-'), |r| u(Expr::Negate, r)), + prefix(2, just('~'), |r| u(Expr::Not, r)), + // This is what happens when not + prefix(1, just('Β§'), |r| u(Expr::Confusion, r)), + // -- Infix + infix(left(0), just('+'), |l, r| i(Expr::Add, l, r)), + infix(left(0), just('-'), |l, r| i(Expr::Sub, l, r)), + infix(right(1), just('*'), |l, r| i(Expr::Mul, l, r)), + infix(right(1), just('/'), |l, r| i(Expr::Div, l, r)), + )) + .map(|x| x.to_string()); + + assert_eq!( + parser.parse("-1+Β§~2*3").into_result(), + Ok("((-1) + (Β§((~2) * 3)))".to_string()), + ) + } + + #[test] + fn with_postfix_ops() { + let atom = text::int::<_, _, Err>>(10) + .from_str() + .unwrapped() + .map(Expr::Literal); + + let parser = atom + .pratt2(( + // -- Postfix + // Because we defined '*' and '/' as right associative operators, + // in order to get these to function as expected, their strength + // must be higher + postfix(2, just('!'), |l| u(Expr::Factorial, l)), + // This is what happens when not + postfix(0, just('$'), |l| u(Expr::Value, l)), + // -- Infix + infix(left(1), just('+'), |l, r| i(Expr::Add, l, r)), + infix(left(1), just('-'), |l, r| i(Expr::Sub, l, r)), + infix(right(2), just('*'), |l, r| i(Expr::Mul, l, r)), + infix(right(2), just('/'), |l, r| i(Expr::Div, l, r)), + )) + .map(|x| x.to_string()); + + assert_eq!( + parser.parse("1+2!$*3").into_result(), + Ok("(((1 + (2!))$) * 3)".to_string()), + ) + } + + #[test] + fn with_pre_and_postfix_ops() { + let atom = text::int::<_, _, Err>>(10) + .from_str() + .unwrapped() + .map(Expr::Literal); + + let parser = atom + .pratt2(( + // -- Prefix + prefix(4, just('-'), |r| u(Expr::Negate, r)), + prefix(4, just('~'), |r| u(Expr::Not, r)), + prefix(1, just('Β§'), |r| u(Expr::Confusion, r)), + // -- Postfix + postfix(5, just('!'), |l| u(Expr::Factorial, l)), + postfix(0, just('$'), |l| u(Expr::Value, l)), + // -- Infix + infix(left(1), just('+'), |l, r| i(Expr::Add, l, r)), + infix(left(1), just('-'), |l, r| i(Expr::Sub, l, r)), + infix(right(2), just('*'), |l, r| i(Expr::Mul, l, r)), + infix(right(2), just('/'), |l, r| i(Expr::Div, l, r)), + )) + .map(|x| x.to_string()); + assert_eq!( + parser.parse("Β§1+-~2!$*3").into_result(), + Ok("(((Β§(1 + (-(~(2!)))))$) * 3)".to_string()), + ) + } } From d8a5e8689b33e76ba110386f7bff68816a27b827 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Fri, 1 Sep 2023 10:15:23 +0100 Subject: [PATCH 34/64] Replaced old pratt implementation, added extensive docs --- src/lib.rs | 122 ++---- src/pratt.rs | 1049 ++++++++++++++++++++++------------------------ src/pratt/ops.rs | 372 ---------------- src/pratt2.rs | 562 ------------------------- 4 files changed, 527 insertions(+), 1578 deletions(-) delete mode 100644 src/pratt/ops.rs delete mode 100644 src/pratt2.rs diff --git a/src/lib.rs b/src/lib.rs index b25fa355..01668b31 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -88,8 +88,6 @@ pub mod label; pub mod number; #[cfg(feature = "pratt")] pub mod pratt; -#[cfg(feature = "pratt")] -pub mod pratt2; pub mod primitive; mod private; pub mod recovery; @@ -108,8 +106,6 @@ pub mod util; pub mod prelude { #[cfg(feature = "lexical-numbers")] pub use super::number::number; - #[cfg(feature = "pratt")] - pub use super::pratt::{InfixOp, Pratt}; #[cfg(feature = "regex")] pub use super::regex::regex; pub use super::{ @@ -2129,112 +2125,48 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: ParserSealed::boxed(self) } - /// Use pratt-parsing to efficiently parse expressions separated by - /// operators of different associativity and precedence. + /// Use [Pratt parsing](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing) to ergonomically + /// parse this pattern separated by prefix, postfix, and infix operators of various associativites and precedence. /// - /// The pratt-parsing algorithm uses recursion to efficiently parse - /// arbitrarily nested expressions. + /// Pratt parsing is a powerful technique and is recommended when writing parsers for expressions. /// /// # Example /// + /// See the documentation in [`pratt`] for more extensive examples and details. + /// /// ``` - /// use chumsky::prelude::*; + /// # use chumsky::prelude::*; /// use chumsky::pratt::*; - /// use chumsky::extra; - /// - /// enum Expr { - /// Add(Box, Box), - /// Sub(Box, Box), - /// Pow(Box, Box), - /// Neg(Box), - /// Fact(Box), - /// Deref(Box), - /// Literal(i32), - /// } - /// - /// impl std::fmt::Display for Expr { - /// fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - /// match self { - /// Self::Literal(literal) => write!(f, "{literal}"), - /// Self::Fact(left) => write!(f, "({left}!)"), - /// Self::Deref(left) => write!(f, "(*{left})"), - /// Self::Neg(right) => write!(f, "(-{right})"), - /// Self::Add(left, right) => write!(f, "({left} + {right})"), - /// Self::Sub(left, right) => write!(f, "({left} - {right})"), - /// Self::Pow(left, right) => write!(f, "({left} ^ {right})"), - /// } - /// } - /// } + /// use std::ops::{Neg, Mul, Div, Add, Sub}; /// - /// let atom = text::int::<_, _, extra::Err>>(10) + /// let int = text::int::<_, _, extra::Err>>(10) /// .from_str() /// .unwrapped() - /// .map(Expr::Literal); - /// - /// let operator = choice(( - /// // Our `-` and `+` bind the weakest, meaning that even if they occur - /// // first in an expression, they will be the last executed - /// left_infix(just('+'), 1, |l, r| Expr::Add(Box::new(l), Box::new(r))), - /// left_infix(just('-'), 1, |l, r| Expr::Sub(Box::new(l), Box::new(r))), - /// // Just like in math, we want that if we write -x^2, that our parser - /// // parses that as -(x^2), so we need it to bind tighter than our - /// // prefix operators - /// right_infix(just('^'), 3, |l, r| Expr::Pow(Box::new(l), Box::new(r))), - /// )) - /// .padded(); - /// - /// let prefix_ops = choice(( - /// // Notice the conflict with our `Expr::Sub`. This will still - /// // parse correctly. We want negation to happen before `+` and `-`, - /// // so we set it's precedence higher. - /// prefix(just('-'), 2, |rhs| Expr::Neg(Box::new(rhs))), - /// prefix(just('*'), 2, |rhs| Expr::Deref(Box::new(rhs))), - /// )) - /// .padded(); - /// - /// // We want factorial to happen before any negation, so we need it's - /// // precedence to be higher than `Expr::Neg`. - /// let factorial = postfix(just('!'), 4, |lhs| Expr::Fact(Box::new(lhs))).padded(); - /// - /// let pratt = atom - /// .pratt(operator) - /// .with_prefix_ops(prefix_ops) - /// .with_postfix_ops(factorial) - /// .map(|x| x.to_string()); + /// .padded(); /// - /// assert_eq!( - /// pratt.parse("*1 + -2! - -3^2").into_result(), - /// Ok("(((*1) + (-(2!))) - (-(3 ^ 2)))".to_string()), - /// ); + /// let op = |c| just(c).padded(); + /// + /// let expr = int.pratt(( + /// prefix(2, op('-'), i64::neg), + /// infix(left(1), op('*'), i64::mul), + /// infix(left(1), op('/'), i64::div), + /// infix(left(0), op('+'), i64::add), + /// infix(left(0), op('-'), i64::sub), + /// )); + /// + /// // Pratt parsing can handle unary operators... + /// assert_eq!(expr.parse("-7").into_result(), Ok(-7)); + /// // ...and infix binary operators... + /// assert_eq!(expr.parse("6 + 3").into_result(), Ok(9)); + /// // ...and arbitrary precedence levels between them. + /// assert_eq!(expr.parse("2 + 3 * -4").into_result(), Ok(-10)); /// ``` #[cfg(feature = "pratt")] - fn pratt( - self, - ops: InfixOps, - ) -> Pratt> - where - I: Input<'a>, - E: ParserExtra<'a, I>, - InfixOps: Parser<'a, I, InfixOpsOut, E>, - Self: Sized, - { - Pratt { - atom: self, - ops: pratt::Infix { - infix: ops, - _phantom: EmptyPhantom::new(), - }, - _phantom: EmptyPhantom::new(), - } - } - - /// TODO - #[cfg(feature = "pratt")] - fn pratt2(self, ops: Ops) -> pratt2::Pratt + fn pratt(self, ops: Ops) -> pratt::Pratt where Self: Sized, { - pratt2::Pratt { atom: self, ops } + pratt::Pratt { atom: self, ops } } } diff --git a/src/pratt.rs b/src/pratt.rs index a7d0bd09..e4fbcd3e 100644 --- a/src/pratt.rs +++ b/src/pratt.rs @@ -1,530 +1,467 @@ -//! Pratt parser for efficiently parsing operators while respecting -//! operator precedence. +//! Utilities for parsing expressions using +//! [Pratt parsing](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing). //! -//! Pratt parsing is an algorithm that allows efficient parsing of -//! expressions using recursion. +//! *β€œWho am I? What is my purpose in life? Does it really, cosmically speaking, matter if I don’t get up and go to work?”* //! -//! The [`Parser::pratt`] method creates a [`Pratt`] parser. See the -//! method's documentation for an example of how it can be used. - -mod ops; -use ops::Strength; -pub use ops::{InfixOp, PostfixOp, PrefixOp}; - -use core::cmp::{self, Ordering}; - -use crate::{ - extra::ParserExtra, - input::InputRef, - prelude::Input, - private::{Check, Emit, Mode, PResult, ParserSealed}, - EmptyPhantom, Parser, -}; - -pub(super) use ops::{Infix, InfixPostfix, InfixPrefix, InfixPrefixPostfix, PrattOpOutput}; +//! Pratt parsing is a powerful technique for defining and parsing operators of varying arity, precedence, and +//! associativity. Unlike [precedence climbing](https://en.wikipedia.org/wiki/Operator-precedence_parser), which +//! defines operator precedence by structurally composing parsers of decreasing precedence, Pratt parsing defines +//! precedence through a numerical +//! ['binding power'](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html#From-Precedence-to-Binding-Power) +//! that determines how strongly operators should bind to the operands around them. +//! +//! Pratt parsers are defined with the [`Parser::pratt`] method. +//! +//! When writing pratt parsers, it is necessary to first define an 'atomic' operand used by the parser for building up +//! expressions. In most languages, atoms are simple, self-delimiting patterns such as numeric and string literals, +//! identifiers, or parenthesised expressions. Once an atom has been defined, operators can also be defined that +//! operate upon said atoms. +//! +//! # Fold functions +//! +//! Because operators bind atoms together, pratt parsers require you to specify, for each operator, a function that +//! combines its operands together into a syntax tree. These functions are given as the last arguments of [`infix`], +//! [`prefix`], and [`postfix`]. +//! +//! Fold functions have several overloads, allowing you to make use of only the operands, the operands and the +//! operators, and even additionally a [`Span`] that covers the entire operation. See the documentation for each +//! function to see which fold signatures can be used. +//! +//! # Examples +//! +//! ``` +//! use chumsky::prelude::*; +//! use chumsky::pratt::*; +//! use chumsky::extra; +//! +//! enum Expr { +//! Add(Box, Box), +//! Sub(Box, Box), +//! Pow(Box, Box), +//! Neg(Box), +//! Factorial(Box), +//! Deref(Box), +//! Literal(i32), +//! } +//! +//! impl std::fmt::Display for Expr { +//! fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +//! match self { +//! Self::Literal(literal) => write!(f, "{literal}"), +//! Self::Factorial(left) => write!(f, "({left}!)"), +//! Self::Deref(left) => write!(f, "(*{left})"), +//! Self::Neg(right) => write!(f, "(-{right})"), +//! Self::Add(left, right) => write!(f, "({left} + {right})"), +//! Self::Sub(left, right) => write!(f, "({left} - {right})"), +//! Self::Pow(left, right) => write!(f, "({left} ^ {right})"), +//! } +//! } +//! } +//! +//! let atom = text::int::<_, _, extra::Err>>(10) +//! .from_str() +//! .unwrapped() +//! .map(Expr::Literal) +//! .padded(); +//! +//! let op = |c| just(c).padded(); +//! +//! let expr = atom.pratt(( +//! // We want factorial to happen before any negation, so we need its precedence to be higher than `Expr::Neg`. +//! postfix(4, op('!'), |lhs| Expr::Factorial(Box::new(lhs))), +//! // Just like in math, we want that if we write -x^2, our parser parses that as -(x^2), so we need it to have +//! // exponents bind tighter than our prefix operators +//! infix(right(3), op('^'), |l, r| Expr::Pow(Box::new(l), Box::new(r))), +//! // Notice the conflict with our `Expr::Sub`. This will still parse correctly. We want negation to happen before +//! // `+` and `-`, so we set it's precedence higher. +//! prefix(2, op('-'), |rhs| Expr::Neg(Box::new(rhs))), +//! prefix(2, op('*'), |rhs| Expr::Deref(Box::new(rhs))), +//! // Our `-` and `+` bind the weakest, meaning that even if they occur first in an expression, they will be the +//! // last executed +//! infix(left(1), op('+'), |l, r| Expr::Add(Box::new(l), Box::new(r))), +//! infix(left(1), op('-'), |l, r| Expr::Sub(Box::new(l), Box::new(r))), +//! )) +//! .map(|x| x.to_string()); +//! +//! assert_eq!( +//! expr.parse("*1 + -2! - -3^2").into_result(), +//! Ok("(((*1) + (-(2!))) - (-(3 ^ 2)))".to_string()), +//! ); +//! ``` -/// Shorthand for [`InfixOp::new_left`]. -/// -/// Creates a left associative infix operator that is parsed with the -/// parser `P`, and a function which is used to `build` a value `E`. -/// The operator's precedence is determined by `strength`. The higher -/// the value, the higher the precedence. -pub fn left_infix(parser: P, strength: u8, build: InfixBuilder) -> InfixOp { - InfixOp::new_left(parser, strength, build) -} +use super::*; -/// Shorthand for [`InfixOp::new_right`]. -/// -/// Creates a right associative infix operator that is parsed with the -/// parser `P`, and a function which is used to `build` a value `E`. -/// The operator's precedence is determined by `strength`. The higher -/// the value, the higher the precedence. -pub fn right_infix(parser: P, strength: u8, build: InfixBuilder) -> InfixOp { - InfixOp::new_right(parser, strength, build) +trait Operator<'a, I, O, E> +where + I: Input<'a>, + E: ParserExtra<'a, I>, +{ + type Op; + type OpParser: Parser<'a, I, Self::Op, E>; + const IS_INFIX: bool = false; + const IS_PREFIX: bool = false; + const IS_POSTFIX: bool = false; + + fn op_parser(&self) -> &Self::OpParser; + fn associativity(&self) -> Associativity; + fn fold_infix(&self, _lhs: O, _op: Self::Op, _rhs: O, _span: I::Span) -> O { + unreachable!() + } + fn fold_prefix(&self, _op: Self::Op, _rhs: O, _span: I::Span) -> O { + unreachable!() + } + fn fold_postfix(&self, _lhs: O, _op: Self::Op, _span: I::Span) -> O { + unreachable!() + } } -/// Shorthand for [`PrefixOp::new`]. +/// Defines the [associativity](https://en.wikipedia.org/wiki/Associative_property) and binding power of an [`infix`] +/// operator (see [`left`] and [`right`]). /// -/// Creates a prefix operator (a right-associative unary operator) -/// that is parsed with the parser `P`, and a function which is used -/// to `build` a value `E`. The operator's precedence is determined -/// by `strength`. The higher the value, the higher the precedence. -pub fn prefix(parser: P, strength: u8, build: PrefixBuilder) -> PrefixOp { - PrefixOp::new(parser, strength, build) +/// Higher binding powers should be used for higher precedence operators. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Associativity { + /// Specifies that the operator should be left-associative, with the given binding power (see [`left`]). + Left(u16), + /// Specifies that the operator should be right-associative, with the given binding power (see [`right`]). + Right(u16), } -/// Shorthand for [`PostfixOp::new`]. +/// Specifies a left [`Associativity`] with the given binding power. /// -/// Creates a postfix operator (a left-associative unary operator) -/// that is parsed with the parser `P`, and a function which is used -/// to `build` a value `E`. The operator's precedence is determined -/// by `strength`. The higher the value, the higher the precedence. -pub fn postfix(parser: P, strength: u8, build: PostfixBuilder) -> PostfixOp { - PostfixOp::new(parser, strength, build) +/// Left-associative operators are evaluated from the left-most terms, moving rightward. For example, the expression +/// `a + b + c + d` will be evaluated as `((a + b) + c) + d` because addition is conventionally left-associative. +pub fn left(binding_power: u16) -> Associativity { + Associativity::Left(binding_power) } -/// A struct which represents a parser capable of using pratt-parsing. -/// -/// This parser contains a parser of type `Atom`, which parses expressions that -/// are separated by a set of operators of parsed by a parser of type `Ops`. -/// The operators may have varying precedence levels, as well as associativity. -/// For those unfamiliar with operator precedence and/or associativity, it may -/// be helpful to read [this documentation](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence) -/// -/// This struct offers two methods: -/// * `with_prefix_ops`: Attaches prefix operators to the parser -/// * `with_postfix_ops`: Attaches postfix operators to the parser +/// Specifies a right [`Associativity`] with the given binding power. /// -/// Once one of the methods has been used, it will become unavailable -/// due to the use of the type-state pattern to prevent accidental -/// resetting of the operators. -/// See [`Parser::pratt`] for an example of how to use these methods. -pub struct Pratt { - pub(crate) atom: Atom, - pub(crate) ops: Ops, - pub(crate) _phantom: EmptyPhantom<(I, O, E)>, +/// Right-associative operators are evaluated from the right-most terms, moving leftward. For example, the expression +/// `a ^ b ^ c ^ d` will be evaluated as `a ^ (b ^ (c ^ d))` because exponents are conventionally right-associative. +pub fn right(binding_power: u16) -> Associativity { + Associativity::Right(binding_power) } -impl Copy for Pratt -where - Atom: Copy, - Ops: Copy, -{ -} - -impl Clone for Pratt -where - Atom: Clone, - Ops: Clone, -{ - fn clone(&self) -> Self { - Self { - atom: self.atom.clone(), - ops: self.ops.clone(), - _phantom: EmptyPhantom::new(), +impl Associativity { + fn left_power(&self) -> u32 { + match self { + Self::Left(x) => *x as u32 * 2, + Self::Right(x) => *x as u32 * 2 + 1, } } -} -impl<'a, I, O, E, Atom, InfixOps, InfixOpsOut> Pratt> { - /// Extend a `Pratt` parser by setting prefix operators. - /// See [`Parser::pratt`] for an example of how to use this methods. - pub fn with_prefix_ops( - self, - prefix_ops: PrefixOps, - ) -> Pratt> - where - I: Input<'a>, - E: ParserExtra<'a, I>, - InfixOps: Parser<'a, I, InfixOpsOut, E>, - PrefixOps: Parser<'a, I, PrefixOpsOut, E>, - Pratt>: - PrattParser<'a, I, O, E>, - { - Pratt { - atom: self.atom, - ops: InfixPrefix { - infix: self.ops.infix, - prefix: prefix_ops, - _phantom: EmptyPhantom::new(), - }, - _phantom: EmptyPhantom::new(), + fn right_power(&self) -> u32 { + match self { + Self::Left(x) => *x as u32 * 2 + 1, + Self::Right(x) => *x as u32 * 2, } } +} - /// Extend a `Pratt` parser by setting postfix operators - /// See [`Parser::pratt`] for an example of how to use this method. - pub fn with_postfix_ops( - self, - postfix_ops: PostfixOps, - ) -> Pratt> - where - I: Input<'a>, - E: ParserExtra<'a, I>, - InfixOps: Parser<'a, I, InfixOpsOut, E>, - PostfixOps: Parser<'a, I, PostfixOpsOut, E>, - Pratt>: - PrattParser<'a, I, O, E>, - { - Pratt { - atom: self.atom, - ops: InfixPostfix { - infix: self.ops.infix, - postfix: postfix_ops, - _phantom: EmptyPhantom::new(), - }, - _phantom: EmptyPhantom::new(), - } - } +/// See [`infix`]. +pub struct Infix { + op_parser: A, + fold: F, + associativity: Associativity, + #[allow(dead_code)] + phantom: EmptyPhantom<(Op, Args)>, } -impl<'a, I, O, E, Atom, InfixOps, InfixOpsOut, PrefixOps, PrefixOpsOut> - Pratt> -{ - /// Extend a `Pratt` parser by setting postfix operators - pub fn with_postfix_ops( - self, - postfix_ops: PostfixOps, - ) -> Pratt< - I, - O, - E, - Atom, - InfixPrefixPostfix< - InfixOps, - InfixOpsOut, - PrefixOps, - PrefixOpsOut, - PostfixOps, - PostfixOpsOut, - >, - > - where - I: Input<'a>, - E: ParserExtra<'a, I>, - InfixOps: Parser<'a, I, InfixOpsOut, E>, - PrefixOps: Parser<'a, I, PrefixOpsOut, E>, - PostfixOps: Parser<'a, I, PostfixOpsOut, E>, - Pratt< - I, - O, - E, - Atom, - InfixPrefixPostfix< - InfixOps, - InfixOpsOut, - PrefixOps, - PrefixOpsOut, - PostfixOps, - PostfixOpsOut, - >, - >: PrattParser<'a, I, O, E>, - { - Pratt { - atom: self.atom, - ops: InfixPrefixPostfix { - infix: self.ops.infix, - prefix: self.ops.prefix, - postfix: postfix_ops, - _phantom: EmptyPhantom::new(), - }, - _phantom: EmptyPhantom::new(), - } +/// Specify a binary infix operator for a pratt parser with the given associativity, binding power, and +/// [fold function](crate::pratt#fold-functions). +/// +/// Operators like addition, subtraction, multiplication, division, remainder, exponentiation, etc. are infix binary +/// operators in most languages. +/// +/// See [`left`] and [`right`] for information about associativity. +/// +/// The fold function (the last argument) must have one of the following signatures: +/// +/// ```ignore +/// // Combine the left and right operands +/// impl Fn(O, O) -> O +/// // Combine the left operand, the operator itself, and the right operand +/// impl Fn(O, Op, O) -> O +/// // Combine the left operand, the operator itself, the right operand, and the span that covers the whole operation +/// impl Fn(O, Op, O, I::Span) -> O +/// ``` +pub const fn infix( + associativity: Associativity, + op_parser: A, + fold: F, +) -> Infix { + Infix { + op_parser, + fold, + associativity, + phantom: EmptyPhantom::new(), } } -impl<'a, I, O, E, Atom, InfixOps, InfixOpsOut, PostfixOps, PostfixOpsOut> - Pratt> -{ - /// Extend a `Pratt` parser by setting prefix operators - pub fn with_prefix_ops( - self, - prefix_ops: PrefixOps, - ) -> Pratt< - I, - O, - E, - Atom, - InfixPrefixPostfix< - InfixOps, - InfixOpsOut, - PrefixOps, - PrefixOpsOut, - PostfixOps, - PostfixOpsOut, - >, - > - where - I: Input<'a>, - E: ParserExtra<'a, I>, - PrefixOps: Parser<'a, I, PrefixOpsOut, E>, - { - Pratt { - atom: self.atom, - ops: InfixPrefixPostfix { - infix: self.ops.infix, - prefix: prefix_ops, - postfix: self.ops.postfix, - _phantom: EmptyPhantom::new(), - }, - _phantom: EmptyPhantom::new(), +macro_rules! infix_op { + (|$f:ident : Fn($($Arg:ty),*) -> O, $lhs:ident, $op:ident, $rhs:ident, $span:ident| $invoke:expr) => { + impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Infix + where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn($($Arg),*) -> O, + { + type Op = Op; + type OpParser = A; + const IS_INFIX: bool = true; + fn op_parser(&self) -> &Self::OpParser { &self.op_parser } + fn associativity(&self) -> Associativity { self.associativity } + fn fold_infix(&self, $lhs: O, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.fold; $invoke } } - } + }; } -type InfixBuilder = fn(lhs: E, rhs: E) -> E; - -type PrefixBuilder = fn(rhs: E) -> E; - -type PostfixBuilder = fn(rhs: E) -> E; - -mod nameless_trait { - use super::*; +// Allow `|lhs, rhs| ` to be used as a fold closure for infix operators +infix_op!(|f: Fn(O, O) -> O, lhs, _op, rhs, _span| f(lhs, rhs)); +// Allow `|lhs, op, rhs| ` to be used as a fold closure for infix operators +infix_op!(|f: Fn(O, Op, O) -> O, lhs, op, rhs, _span| f(lhs, op, rhs)); +// Allow `|lhs, op, rhs, span| ` to be used as a fold closure for infix operators +infix_op!(|f: Fn(O, Op, O, I::Span) -> O, lhs, op, rhs, span| f(lhs, op, rhs, span)); + +/// See [`prefix`]. +pub struct Prefix { + op_parser: A, + fold: F, + binding_power: u16, + #[allow(dead_code)] + phantom: EmptyPhantom<(Op, Args)>, +} - pub trait PrattParser<'a, I, Expr, E> - where - I: Input<'a>, - E: ParserExtra<'a, I>, - { - fn pratt_parse( - &self, - inp: &mut InputRef<'a, '_, I, E>, - min_strength: Option, - ) -> PResult; +/// Specify a unary prefix operator for a pratt parser with the given binding power and +/// [fold function](crate::pratt#fold-functions). +/// +/// Operators like negation, not, dereferencing, etc. are prefix unary operators in most languages. +/// +/// The fold function (the last argument) must have one of the following signatures: +/// +/// ```ignore +/// // Transform the operand +/// impl Fn(O) -> O +/// // Combine the operator itself and the operand +/// impl Fn(Op, O) -> O +/// // Combine the operator itself, the operand, and the span that covers the whole operation +/// impl Fn(Op, O, I::Span) -> O +/// ``` +pub const fn prefix( + binding_power: u16, + op_parser: A, + fold: F, +) -> Prefix { + Prefix { + op_parser, + fold, + binding_power, + phantom: EmptyPhantom::new(), } } -use nameless_trait::PrattParser; - -impl<'a, I, O, E, Atom, InfixOps, InfixOpsOut> PrattParser<'a, I, O, E> - for Pratt> -where - I: Input<'a>, - E: ParserExtra<'a, I>, - Atom: Parser<'a, I, O, E>, - InfixOps: Parser<'a, I, PrattOpOutput>, E>, -{ - fn pratt_parse( - &self, - inp: &mut InputRef<'a, '_, I, E>, - min_strength: Option, - ) -> PResult - where - M: Mode, - { - let mut left = self.atom.go::(inp)?; - loop { - let pre_op = inp.save(); - let (op, prec) = match self.ops.infix.go::(inp) { - Ok(PrattOpOutput(prec, build)) => { - if prec.strength_left().is_lt(&min_strength) { - inp.rewind(pre_op); - return Ok(left); - } - (build, prec) - } - Err(_) => { - inp.rewind(pre_op); - return Ok(left); - } - }; - - let right = self.pratt_parse::(inp, Some(prec.strength_right()))?; - left = M::combine(left, right, op); +macro_rules! prefix_op { + (|$f:ident : Fn($($Arg:ty),*) -> O, $op:ident, $rhs:ident, $span:ident| $invoke:expr) => { + impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix + where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn($($Arg),*) -> O, + { + type Op = Op; + type OpParser = A; + const IS_PREFIX: bool = true; + fn op_parser(&self) -> &Self::OpParser { &self.op_parser } + fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } + fn fold_prefix(&self, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.fold; $invoke } } - } + }; } -impl<'a, I, O, E, Atom, InfixOps, InfixOpsOut, PrefixOps, PrefixOpsOut> PrattParser<'a, I, O, E> - for Pratt> -where - I: Input<'a>, - E: ParserExtra<'a, I>, - Atom: Parser<'a, I, O, E>, - InfixOps: Parser<'a, I, PrattOpOutput>, E>, - PrefixOps: Parser<'a, I, PrattOpOutput>, E>, -{ - fn pratt_parse( - &self, - inp: &mut InputRef<'a, '_, I, E>, - min_strength: Option, - ) -> PResult - where - M: Mode, - { - let pre_op = inp.save(); - let mut left = match self.ops.prefix.go::(inp) { - Ok(PrattOpOutput(prec, build)) => { - let right = self.pratt_parse::(inp, Some(prec.strength_right()))?; - M::map(right, build) - } - Err(_) => { - inp.rewind(pre_op); - self.atom.go::(inp)? - } - }; - - loop { - let pre_op = inp.save(); - let (op, prec) = match self.ops.infix.go::(inp) { - Ok(PrattOpOutput(prec, build)) => { - if prec.strength_left().is_lt(&min_strength) { - inp.rewind(pre_op); - return Ok(left); - } - (build, prec) - } - Err(_) => { - inp.rewind(pre_op); - return Ok(left); - } - }; +// Allow `|rhs| ` to be used as a fold closure for prefix operators +prefix_op!(|f: Fn(O) -> O, _op, rhs, _span| f(rhs)); +// Allow `|op, rhs| ` to be used as a fold closure for prefix operators +prefix_op!(|f: Fn(Op, O) -> O, op, rhs, _span| f(op, rhs)); +// Allow `|op, rhs, span| ` to be used as a fold closure for prefix operators +prefix_op!(|f: Fn(Op, O, I::Span) -> O, op, rhs, span| f(op, rhs, span)); + +/// See [`postfix`]. +pub struct Postfix { + op_parser: A, + fold: F, + binding_power: u16, + #[allow(dead_code)] + phantom: EmptyPhantom<(Op, Args)>, +} - let right = self.pratt_parse::(inp, Some(prec.strength_right()))?; - left = M::combine(left, right, op); - } +/// Specify a unary postfix operator for a pratt parser with the given binding power and +/// [fold function](crate::pratt#fold-functions). +/// +/// Operators like factorial, field access, function composition, etc. are postfix unary operators in most languages. +/// +/// The fold function (the last argument) must have one of the following signatures: +/// +/// ```ignore +/// // Transform the operand +/// impl Fn(O) -> O +/// // Combine the operand and the operator itself +/// impl Fn(O, Op) -> O +/// // Combine the operand, the operator itself, and the span that covers the whole operation +/// impl Fn(Op, O, I::Span) -> O +/// ``` +pub const fn postfix( + binding_power: u16, + op_parser: A, + fold: F, +) -> Postfix { + Postfix { + op_parser, + fold, + binding_power, + phantom: EmptyPhantom::new(), } } -impl<'a, I, O, E, Atom, InfixOps, InfixOpsOut, PostfixOps, PostfixOpsOut> PrattParser<'a, I, O, E> - for Pratt> -where - I: Input<'a>, - E: ParserExtra<'a, I>, - Atom: Parser<'a, I, O, E>, - InfixOps: Parser<'a, I, PrattOpOutput>, E>, - PostfixOps: Parser<'a, I, PrattOpOutput>, E>, -{ - fn pratt_parse( - &self, - inp: &mut InputRef<'a, '_, I, E>, - min_strength: Option, - ) -> PResult - where - M: Mode, - { - let mut left = self.atom.go::(inp)?; - loop { - let pre_op = inp.save(); - match self.ops.postfix.go::(inp) { - Ok(PrattOpOutput(prec, build)) => { - if prec.strength_left().is_lt(&min_strength) { - inp.rewind(pre_op); - return Ok(left); - } - left = M::map(left, build); - continue; - } - Err(_) => { - inp.rewind(pre_op); - } - } +macro_rules! postfix_op { + (|$f:ident : Fn($($Arg:ty),*) -> O, $lhs:ident, $op:ident, $span:ident| $invoke:expr) => { + impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Postfix + where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, Op, E>, + F: Fn($($Arg),*) -> O, + { + type Op = Op; + type OpParser = A; + const IS_POSTFIX: bool = true; + fn op_parser(&self) -> &Self::OpParser { &self.op_parser } + fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } + fn fold_postfix(&self, $lhs: O, $op: Self::Op, $span: I::Span) -> O { let $f = &self.fold; $invoke } + } + }; +} - let (op, prec) = match self.ops.infix.go::(inp) { - Ok(PrattOpOutput(prec, build)) => { - if prec.strength_left().is_lt(&min_strength) { - inp.rewind(pre_op); - return Ok(left); - } - (build, prec) - } - Err(_) => { - inp.rewind(pre_op); - return Ok(left); - } - }; +// Allow `|lhs| ` to be used as a fold closure for postfix operators +postfix_op!(|f: Fn(O) -> O, lhs, _op, _span| f(lhs)); +// Allow `|lhs, op| ` to be used as a fold closure for postfix operators +postfix_op!(|f: Fn(O, Op) -> O, lhs, op, _span| f(lhs, op)); +// Allow `|lhs, op, span| ` to be used as a fold closure for postfix operators +postfix_op!(|f: Fn(O, Op, I::Span) -> O, lhs, op, span| f(lhs, op, span)); - let right = self.pratt_parse::(inp, Some(prec.strength_right()))?; - left = M::combine(left, right, op); - } - } +/// See [`Parser::pratt`]. +pub struct Pratt { + pub(crate) atom: Atom, + pub(crate) ops: Ops, } -impl< - 'a, - I, - O, - E, - Atom, - InfixOps, - InfixOpsOut, - PrefixOps, - PrefixOpsOut, - PostfixOps, - PostfixOpsOut, - > PrattParser<'a, I, O, E> - for Pratt< - I, - O, - E, - Atom, - InfixPrefixPostfix< - InfixOps, - InfixOpsOut, - PrefixOps, - PrefixOpsOut, - PostfixOps, - PostfixOpsOut, - >, - > -where - I: Input<'a>, - E: ParserExtra<'a, I>, - Atom: Parser<'a, I, O, E>, - InfixOps: Parser<'a, I, PrattOpOutput>, E>, - PrefixOps: Parser<'a, I, PrattOpOutput>, E>, - PostfixOps: Parser<'a, I, PrattOpOutput>, E>, -{ - fn pratt_parse( - &self, - inp: &mut InputRef<'a, '_, I, E>, - min_strength: Option, - ) -> PResult - where - M: Mode, - { - let pre_op = inp.save(); - let mut left = match self.ops.prefix.go::(inp) { - Ok(PrattOpOutput(prec, build)) => { - let right = self.pratt_parse::(inp, Some(prec.strength_right()))?; - M::map(right, build) - } - Err(_) => { - inp.rewind(pre_op); - self.atom.go::(inp)? - } - }; - - loop { - let pre_op = inp.save(); - match self.ops.postfix.go::(inp) { - Ok(PrattOpOutput(prec, build)) => { - if prec.strength_left().is_lt(&min_strength) { - inp.rewind(pre_op); - return Ok(left); - } - left = M::map(left, build); - continue; - } - Err(_) => { - inp.rewind(pre_op); - } - } +macro_rules! impl_pratt_for_tuple { + () => {}; + ($head:ident $($X:ident)*) => { + impl_pratt_for_tuple!($($X)*); + impl_pratt_for_tuple!(~ $head $($X)*); + }; + (~ $($X:ident)+) => { + #[allow(unused_variables, non_snake_case)] + impl<'a, Atom, $($X),*> Pratt { + fn pratt_go(&self, inp: &mut InputRef<'a, '_, I, E>, min_power: u32) -> PResult + where + I: Input<'a>, + E: ParserExtra<'a, I>, + Atom: Parser<'a, I, O, E>, + $($X: Operator<'a, I, O, E>),* + { + let pre_expr = inp.save(); + let mut lhs = 'choice: { + let ($($X,)*) = &self.ops; + + // Prefix unary operators + $( + if $X::IS_PREFIX { + match $X.op_parser().go::(inp) { + Ok(op) => { + match self.pratt_go::(inp, $X.associativity().left_power()) { + Ok(rhs) => break 'choice M::combine(op, rhs, |op, rhs| { + let span = inp.span_since(pre_expr.offset()); + $X.fold_prefix(op, rhs, span) + }), + Err(()) => inp.rewind(pre_expr), + } + }, + Err(()) => inp.rewind(pre_expr), + } + } + )* + + self.atom.go::(inp)? + }; + + loop { + let ($($X,)*) = &self.ops; + + let pre_op = inp.save(); + + // Postfix unary operators + $( + let assoc = $X.associativity(); + if $X::IS_POSTFIX && assoc.right_power() >= min_power { + match $X.op_parser().go::(inp) { + Ok(op) => { + lhs = M::combine(lhs, op, |lhs, op| { + let span = inp.span_since(pre_expr.offset()); + $X.fold_postfix(lhs, op, span) + }); + continue + }, + Err(()) => inp.rewind(pre_op), + } + } + )* + + // Infix binary operators + $( + let assoc = $X.associativity(); + if $X::IS_INFIX && assoc.left_power() >= min_power { + match $X.op_parser().go::(inp) { + Ok(op) => match self.pratt_go::(inp, assoc.right_power()) { + Ok(rhs) => { + lhs = M::combine( + M::combine(lhs, rhs, |lhs, rhs| (lhs, rhs)), + op, + |(lhs, rhs), op| { + let span = inp.span_since(pre_expr.offset()); + $X.fold_infix(lhs, op, rhs, span) + }, + ); + continue + }, + Err(()) => inp.rewind(pre_op), + }, + Err(()) => inp.rewind(pre_op), + } + } + )* - let (op, prec) = match self.ops.infix.go::(inp) { - Ok(PrattOpOutput(prec, build)) => { - if prec.strength_left().is_lt(&min_strength) { - inp.rewind(pre_op); - return Ok(left); - } - (build, prec) - } - Err(_) => { inp.rewind(pre_op); - return Ok(left); + break; } - }; - let right = self.pratt_parse::(inp, Some(prec.strength_right()))?; - left = M::combine(left, right, op); + Ok(lhs) + } } - } -} -macro_rules! impl_parse { - ($Parser:ident < $($Gen:ident),+ $(,)?>) => { - impl<'a, I, O, E, Atom, $($Gen),+> ParserSealed<'a, I, O, E> for Pratt> + #[allow(unused_variables, non_snake_case)] + impl<'a, I, O, E, Atom, $($X),*> ParserSealed<'a, I, O, E> for Pratt where I: Input<'a>, E: ParserExtra<'a, I>, Atom: Parser<'a, I, O, E>, - Self: PrattParser<'a, I, O, E>, + $($X: Operator<'a, I, O, E>),* { - fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult - where - Self: Sized, - { - self.pratt_parse::(inp, None) + fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { + self.pratt_go::(inp, 0) } go_extra!(O); @@ -532,23 +469,47 @@ macro_rules! impl_parse { }; } -impl_parse!(Infix); +impl_pratt_for_tuple!(A_ B_ C_ D_ E_ F_ G_ H_ I_ J_ K_ L_ M_ N_ O_ P_ Q_ R_ S_ T_ U_ V_ W_ X_ Y_ Z_); -impl_parse!(InfixPrefix); +#[cfg(test)] +mod tests { + use super::*; + use crate::{extra::Err, prelude::*}; -impl_parse!(InfixPostfix); + fn factorial(x: i64) -> i64 { + if x == 0 { + 1 + } else { + x * factorial(x - 1) + } + } -impl_parse!(InfixPrefixPostfix); + fn parser<'a>() -> impl Parser<'a, &'a str, i64> { + let atom = text::int(10).padded().from_str::().unwrapped(); + + atom.pratt(( + prefix(2, just('-'), |x: i64| -x), + postfix(2, just('!'), factorial), + infix(left(0), just('+'), |l, r| l + r), + infix(left(0), just('-'), |l, r| l - r), + infix(left(1), just('*'), |l, r| l * r), + infix(left(1), just('/'), |l, _, r| l / r), + )) + } -#[cfg(test)] -mod tests { - use crate::error::Error; - use crate::extra::Err; - use crate::prelude::{choice, end, just, Simple, SimpleSpan}; - use crate::util::MaybeRef; - use crate::{text, ParseResult}; + #[test] + fn precedence() { + assert_eq!(parser().parse("2 + 3 * 4").into_result(), Ok(14)); + assert_eq!(parser().parse("2 * 3 + 4").into_result(), Ok(10)); + } - use super::*; + #[test] + fn unary() { + assert_eq!(parser().parse("-2").into_result(), Ok(-2)); + assert_eq!(parser().parse("4!").into_result(), Ok(24)); + assert_eq!(parser().parse("2 + 4!").into_result(), Ok(26)); + assert_eq!(parser().parse("-2 + 2").into_result(), Ok(0)); + } enum Expr { Literal(i64), @@ -580,21 +541,27 @@ mod tests { } } - fn parser<'a>() -> impl Parser<'a, &'a str, String, Err>> { - let atom = text::int(10).from_str().unwrapped().map(Expr::Literal); + fn u(e: fn(Box) -> Expr, r: Expr) -> Expr { + e(Box::new(r)) + } + fn i(e: fn(Box, Box) -> Expr, l: Expr, r: Expr) -> Expr { + e(Box::new(l), Box::new(r)) + } - let operator = choice(( - left_infix(just('+'), 0, |l, r| Expr::Add(Box::new(l), Box::new(r))), - left_infix(just('-'), 0, |l, r| Expr::Sub(Box::new(l), Box::new(r))), - right_infix(just('*'), 1, |l, r| Expr::Mul(Box::new(l), Box::new(r))), - right_infix(just('/'), 1, |l, r| Expr::Div(Box::new(l), Box::new(r))), - )); + fn expr_parser<'a>() -> impl Parser<'a, &'a str, String, Err>> { + let atom = text::int(10).from_str().unwrapped().map(Expr::Literal); - atom.pratt(operator).map(|x| x.to_string()) + atom.pratt(( + infix(left(0), just('+'), |l, r| i(Expr::Add, l, r)), + infix(left(0), just('-'), |l, r| i(Expr::Sub, l, r)), + infix(right(1), just('*'), |l, r| i(Expr::Mul, l, r)), + infix(right(1), just('/'), |l, r| i(Expr::Div, l, r)), + )) + .map(|x| x.to_string()) } fn complete_parser<'a>() -> impl Parser<'a, &'a str, String, Err>> { - parser().then_ignore(end()) + expr_parser().then_ignore(end()) } fn parse(input: &str) -> ParseResult> { @@ -602,7 +569,7 @@ mod tests { } fn parse_partial(input: &str) -> ParseResult> { - parser().lazy().parse(input) + expr_parser().lazy().parse(input) } fn unexpected<'a, C: Into>>, S: Into>( @@ -666,24 +633,22 @@ mod tests { .unwrapped() .map(Expr::Literal); - let operator = choice(( - left_infix(just('+'), 0, |l, r| Expr::Add(Box::new(l), Box::new(r))), - left_infix(just('-'), 0, |l, r| Expr::Sub(Box::new(l), Box::new(r))), - right_infix(just('*'), 1, |l, r| Expr::Mul(Box::new(l), Box::new(r))), - right_infix(just('/'), 1, |l, r| Expr::Div(Box::new(l), Box::new(r))), - )); - let parser = atom - .pratt(operator) - .with_prefix_ops(choice(( + .pratt(( + // -- Prefix // Because we defined '*' and '/' as right associative operators, // in order to get these to function as expected, their strength // must be higher - prefix(just('-'), 2, |rhs| Expr::Negate(Box::new(rhs))), - prefix(just('~'), 2, |rhs| Expr::Not(Box::new(rhs))), + prefix(2, just('-'), |r| u(Expr::Negate, r)), + prefix(2, just('~'), |r| u(Expr::Not, r)), // This is what happens when not - prefix(just('Β§'), 1, |rhs| Expr::Confusion(Box::new(rhs))), - ))) + prefix(1, just('Β§'), |r| u(Expr::Confusion, r)), + // -- Infix + infix(left(0), just('+'), |l, r| i(Expr::Add, l, r)), + infix(left(0), just('-'), |l, r| i(Expr::Sub, l, r)), + infix(right(1), just('*'), |l, r| i(Expr::Mul, l, r)), + infix(right(1), just('/'), |l, r| i(Expr::Div, l, r)), + )) .map(|x| x.to_string()); assert_eq!( @@ -699,23 +664,21 @@ mod tests { .unwrapped() .map(Expr::Literal); - let operator = choice(( - left_infix(just('+'), 1, |l, r| Expr::Add(Box::new(l), Box::new(r))), - left_infix(just('-'), 1, |l, r| Expr::Sub(Box::new(l), Box::new(r))), - right_infix(just('*'), 2, |l, r| Expr::Mul(Box::new(l), Box::new(r))), - right_infix(just('/'), 2, |l, r| Expr::Div(Box::new(l), Box::new(r))), - )); - let parser = atom - .pratt(operator) - .with_postfix_ops(choice(( - // Because we defined '+' and '-' as left associative operators, + .pratt(( + // -- Postfix + // Because we defined '*' and '/' as right associative operators, // in order to get these to function as expected, their strength - // must be higher, i.e. they must bind tighter - postfix(just('!'), 2, |lhs| Expr::Factorial(Box::new(lhs))), - // Or weirdness happens - postfix(just('$'), 0, |lhs| Expr::Value(Box::new(lhs))), - ))) + // must be higher + postfix(2, just('!'), |l| u(Expr::Factorial, l)), + // This is what happens when not + postfix(0, just('$'), |l| u(Expr::Value, l)), + // -- Infix + infix(left(1), just('+'), |l, r| i(Expr::Add, l, r)), + infix(left(1), just('-'), |l, r| i(Expr::Sub, l, r)), + infix(right(2), just('*'), |l, r| i(Expr::Mul, l, r)), + infix(right(2), just('/'), |l, r| i(Expr::Div, l, r)), + )) .map(|x| x.to_string()); assert_eq!( @@ -731,34 +694,22 @@ mod tests { .unwrapped() .map(Expr::Literal); - let operator = choice(( - left_infix(just('+'), 1, |l, r| Expr::Add(Box::new(l), Box::new(r))), - left_infix(just('-'), 1, |l, r| Expr::Sub(Box::new(l), Box::new(r))), - right_infix(just('*'), 2, |l, r| Expr::Mul(Box::new(l), Box::new(r))), - right_infix(just('/'), 2, |l, r| Expr::Div(Box::new(l), Box::new(r))), - )); - let parser = atom - .pratt(operator) - .with_prefix_ops(choice(( - // Because we defined '*' and '/' as right associative operators, - // in order to get these to function as expected, their strength - // must be higher - prefix(just('-'), 4, |rhs| Expr::Negate(Box::new(rhs))), - prefix(just('~'), 4, |rhs| Expr::Not(Box::new(rhs))), - // This is what happens when not - prefix(just('Β§'), 1, |rhs| Expr::Confusion(Box::new(rhs))), - ))) - .with_postfix_ops(choice(( - // Because we defined '+' and '-' as left associative operators, - // in order to get these to function as expected, their strength - // must be higher, i.e. they must bind tighter - postfix(just('!'), 5, |lhs| Expr::Factorial(Box::new(lhs))), - // Or weirdness happens - postfix(just('$'), 0, |lhs| Expr::Value(Box::new(lhs))), - ))) + .pratt(( + // -- Prefix + prefix(4, just('-'), |r| u(Expr::Negate, r)), + prefix(4, just('~'), |r| u(Expr::Not, r)), + prefix(1, just('Β§'), |r| u(Expr::Confusion, r)), + // -- Postfix + postfix(5, just('!'), |l| u(Expr::Factorial, l)), + postfix(0, just('$'), |l| u(Expr::Value, l)), + // -- Infix + infix(left(1), just('+'), |l, r| i(Expr::Add, l, r)), + infix(left(1), just('-'), |l, r| i(Expr::Sub, l, r)), + infix(right(2), just('*'), |l, r| i(Expr::Mul, l, r)), + infix(right(2), just('/'), |l, r| i(Expr::Div, l, r)), + )) .map(|x| x.to_string()); - assert_eq!( parser.parse("Β§1+-~2!$*3").into_result(), Ok("(((Β§(1 + (-(~(2!)))))$) * 3)".to_string()), diff --git a/src/pratt/ops.rs b/src/pratt/ops.rs deleted file mode 100644 index e3abe137..00000000 --- a/src/pratt/ops.rs +++ /dev/null @@ -1,372 +0,0 @@ -use super::*; -use crate::EmptyPhantom; - -pub struct PrattOpOutput(pub(super) Precedence, pub(super) Builder); - -pub struct Infix { - pub(crate) infix: P, - pub(crate) _phantom: EmptyPhantom, -} - -pub struct InfixPrefix { - pub(crate) infix: P1, - pub(crate) prefix: P2, - pub(crate) _phantom: EmptyPhantom<(P1O, P2O)>, -} - -pub struct InfixPostfix { - pub(crate) infix: P1, - pub(crate) postfix: P2, - pub(crate) _phantom: EmptyPhantom<(P1O, P2O)>, -} - -pub struct InfixPrefixPostfix { - pub(crate) infix: P1, - pub(crate) prefix: P2, - pub(crate) postfix: P3, - pub(crate) _phantom: EmptyPhantom<(P1O, P2O, P3O)>, -} - -impl Clone for Infix -where - P: Clone, -{ - fn clone(&self) -> Self { - Self { - infix: self.infix.clone(), - _phantom: EmptyPhantom::new(), - } - } -} - -impl Clone for InfixPrefix -where - P1: Clone, - P2: Clone, -{ - fn clone(&self) -> Self { - Self { - infix: self.infix.clone(), - prefix: self.prefix.clone(), - _phantom: EmptyPhantom::new(), - } - } -} - -impl Clone for InfixPostfix -where - P1: Clone, - P2: Clone, -{ - fn clone(&self) -> Self { - Self { - infix: self.infix.clone(), - postfix: self.postfix.clone(), - _phantom: EmptyPhantom::new(), - } - } -} - -impl Clone for InfixPrefixPostfix -where - P1: Clone, - P2: Clone, - P3: Clone, -{ - fn clone(&self) -> Self { - Self { - infix: self.infix.clone(), - prefix: self.prefix.clone(), - postfix: self.postfix.clone(), - _phantom: EmptyPhantom::new(), - } - } -} - -/// A representation of an infix operator to be used in combination with -/// [`Parser::pratt`](super::Parser::pratt). -pub struct InfixOp { - strength: u8, - assoc: Assoc, - parser: P, - build: InfixBuilder, - _phantom: EmptyPhantom<(PO,)>, -} - -impl Clone for InfixOp { - fn clone(&self) -> Self { - Self { - strength: self.strength, - assoc: self.assoc, - parser: self.parser.clone(), - build: self.build, - _phantom: EmptyPhantom::new(), - } - } -} - -impl InfixOp { - /// Creates a left associative infix operator that is parsed with the - /// parser `P`, and a function which is used to `build` a value `E`. - /// The operator's precedence is determined by `strength`. The higher - /// the value, the higher the precedence. - pub fn new_left(parser: P, strength: u8, build: InfixBuilder) -> Self { - Self { - strength, - assoc: Assoc::Left, - parser, - build, - _phantom: EmptyPhantom::new(), - } - } - - /// Creates a right associative infix operator that is parsed with the - /// parser `P`, and a function which is used to `build` a value `E`. - /// The operator's precedence is determined by `strength`. The higher - /// the value, the higher the precedence. - pub fn new_right(parser: P, strength: u8, build: InfixBuilder) -> Self { - Self { - strength, - assoc: Assoc::Right, - parser, - build, - _phantom: EmptyPhantom::new(), - } - } -} - -impl<'a, P, Expr, I, O, E> ParserSealed<'a, I, PrattOpOutput>, E> - for InfixOp -where - I: Input<'a>, - E: ParserExtra<'a, I>, - P: Parser<'a, I, O, E>, -{ - fn go( - &self, - inp: &mut InputRef<'a, '_, I, E>, - ) -> PResult>> - where - Self: Sized, - { - match self.parser.go::(inp) { - Ok(()) => Ok(M::bind(|| { - PrattOpOutput(Precedence::new(self.strength, self.assoc), self.build) - })), - Err(()) => Err(()), - } - } - - go_extra!(PrattOpOutput>); -} - -/// A representation of a prefix operator to be used in combination with -/// [`Parser::pratt`](super::Parser::pratt). -pub struct PrefixOp { - strength: u8, - parser: Parser, - build: PrefixBuilder, - _phantom: EmptyPhantom<(ParserOut,)>, -} - -impl Clone for PrefixOp { - fn clone(&self) -> Self { - Self { - strength: self.strength, - parser: self.parser.clone(), - build: self.build, - _phantom: EmptyPhantom::new(), - } - } -} - -impl PrefixOp { - /// Creates a prefix operator (a right-associative unary operator) - /// that is parsed with the parser `P`, and a function which is used - /// to `build` a value `E`. The operator's precedence is determined - /// by `strength`. The higher the value, the higher the precedence. - pub fn new(parser: Parser, strength: u8, build: PrefixBuilder) -> Self { - Self { - strength, - parser, - build, - _phantom: EmptyPhantom::new(), - } - } -} - -impl<'a, P, Expr, I, O, E> ParserSealed<'a, I, PrattOpOutput>, E> - for PrefixOp -where - I: Input<'a>, - E: ParserExtra<'a, I>, - P: Parser<'a, I, O, E>, -{ - fn go( - &self, - inp: &mut InputRef<'a, '_, I, E>, - ) -> PResult>> - where - Self: Sized, - { - match self.parser.go::(inp) { - Ok(()) => Ok(M::bind(|| { - PrattOpOutput(Precedence::new(self.strength, Assoc::Right), self.build) - })), - Err(()) => Err(()), - } - } - - go_extra!(PrattOpOutput>); -} - -/// A representation of a postfix operator to be used in combination with -/// [`Parser::pratt`](super::Parser::pratt). -pub struct PostfixOp { - strength: u8, - parser: Parser, - build: PostfixBuilder, - _phantom: EmptyPhantom<(ParserOut,)>, -} - -impl Clone for PostfixOp { - fn clone(&self) -> Self { - Self { - strength: self.strength, - parser: self.parser.clone(), - build: self.build, - _phantom: EmptyPhantom::new(), - } - } -} - -impl PostfixOp { - /// Creates a postfix operator (a left-associative unary operator) - /// that is parsed with the parser `P`, and a function which is used - /// to `build` a value `E`. The operator's precedence is determined - /// by `strength`. The higher the value, the higher the precedence. - pub fn new(parser: Parser, strength: u8, build: PostfixBuilder) -> Self { - Self { - strength, - parser, - build, - _phantom: EmptyPhantom::new(), - } - } -} - -impl<'a, P, Expr, I, O, E> ParserSealed<'a, I, PrattOpOutput>, E> - for PostfixOp -where - I: Input<'a>, - E: ParserExtra<'a, I>, - P: Parser<'a, I, O, E>, -{ - fn go( - &self, - inp: &mut InputRef<'a, '_, I, E>, - ) -> PResult>> - where - Self: Sized, - { - match self.parser.go::(inp) { - Ok(()) => Ok(M::bind(|| { - PrattOpOutput(Precedence::new(self.strength, Assoc::Right), self.build) - })), - Err(()) => Err(()), - } - } - - go_extra!(PrattOpOutput>); -} - -/// Indicates which argument binds more strongly with a binary infix operator. -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -pub(super) enum Assoc { - /// The operator binds more strongly with the argument to the left. - /// - /// For example `a + b + c` is parsed as `(a + b) + c`. - Left, - - /// The operator binds more strongly with the argument to the right. - /// - /// For example `a + b + c` is parsed as `a + (b + c)`. - Right, -} - -/// Indicates the binding strength of an operator to an argument. -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -pub enum Strength { - /// This is the strongly associated side of the operator. - Strong(u8), - - /// This is the weakly associated side of the operator. - Weak(u8), -} - -impl Strength { - /// Get the binding strength, ignoring associativity. - pub fn strength(&self) -> &u8 { - match self { - Self::Strong(strength) => strength, - Self::Weak(strength) => strength, - } - } - - /// Compare two strengths. - /// - /// `None` is considered less strong than any `Some(Strength)`, - /// as it's used to indicate the lack of an operator - /// to the left of the first expression and cannot bind. - pub fn is_lt(&self, other: &Option) -> bool { - match (self, other) { - (x, Some(y)) => x < y, - (_, None) => false, - } - } -} - -impl PartialOrd for Strength { - fn partial_cmp(&self, other: &Self) -> Option { - match self.strength().partial_cmp(other.strength()) { - Some(Ordering::Equal) => match (self, other) { - (Self::Strong(_), Self::Weak(_)) => Some(cmp::Ordering::Greater), - (Self::Weak(_), Self::Strong(_)) => Some(cmp::Ordering::Less), - _ => Some(cmp::Ordering::Equal), - }, - ord => ord, - } - } -} - -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -pub(super) struct Precedence { - strength: u8, - associativity: Assoc, -} - -impl Precedence { - /// Create a new precedence value. - pub fn new(strength: u8, associativity: Assoc) -> Self { - Self { - strength, - associativity, - } - } - - /// Get the binding power of this operator with an argument on the left. - pub fn strength_left(&self) -> Strength { - match self.associativity { - Assoc::Left => Strength::Weak(self.strength), - Assoc::Right => Strength::Strong(self.strength), - } - } - - /// Get the binding power of this operator with an argument on the right. - pub fn strength_right(&self) -> Strength { - match self.associativity { - Assoc::Left => Strength::Strong(self.strength), - Assoc::Right => Strength::Weak(self.strength), - } - } -} diff --git a/src/pratt2.rs b/src/pratt2.rs deleted file mode 100644 index fed9102f..00000000 --- a/src/pratt2.rs +++ /dev/null @@ -1,562 +0,0 @@ -#![allow(missing_docs)] - -use super::*; - -trait Operator<'a, I, O, E> -where - I: Input<'a>, - E: ParserExtra<'a, I>, -{ - type Op; - type OpParser: Parser<'a, I, Self::Op, E>; - const INFIX: bool = false; - const PREFIX: bool = false; - const POSTFIX: bool = false; - - fn op_parser(&self) -> &Self::OpParser; - fn associativity(&self) -> Associativity; - fn fold_infix(&self, _lhs: O, _op: Self::Op, _rhs: O, _span: I::Span) -> O { - unreachable!() - } - fn fold_prefix(&self, _op: Self::Op, _rhs: O, _span: I::Span) -> O { - unreachable!() - } - fn fold_postfix(&self, _lhs: O, _op: Self::Op, _span: I::Span) -> O { - unreachable!() - } -} - -#[derive(Copy, Clone, PartialEq)] -pub enum Associativity { - Left(u16), - Right(u16), -} - -impl Associativity { - fn left_power(&self) -> u16 { - match self { - Self::Left(x) => x * 2, - Self::Right(x) => x * 2 + 1, - } - } - - fn right_power(&self) -> u16 { - match self { - Self::Left(x) => x * 2 + 1, - Self::Right(x) => x * 2, - } - } -} - -pub struct Infix { - op_parser: A, - f: F, - associativity: Associativity, - #[allow(dead_code)] - phantom: EmptyPhantom<(Op, Args)>, -} - -pub fn left(binding_power: u16) -> Associativity { - Associativity::Left(binding_power) -} -pub fn right(binding_power: u16) -> Associativity { - Associativity::Right(binding_power) -} - -pub const fn infix( - associativity: Associativity, - op_parser: A, - f: F, -) -> Infix { - Infix { - op_parser, - f, - associativity, - phantom: EmptyPhantom::new(), - } -} - -macro_rules! infix_op { - (|$f:ident : Fn($($Arg:ty),*) -> O, $lhs:ident, $op:ident, $rhs:ident, $span:ident| $invoke:expr) => { - impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Infix - where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn($($Arg),*) -> O, - { - type Op = Op; - type OpParser = A; - const INFIX: bool = true; - fn op_parser(&self) -> &Self::OpParser { &self.op_parser } - fn associativity(&self) -> Associativity { self.associativity } - fn fold_infix(&self, $lhs: O, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.f; $invoke } - } - }; -} - -// Allow `|lhs, rhs| ` to be used as a fold closure for infix operators -infix_op!(|f: Fn(O, O) -> O, lhs, _op, rhs, _span| f(lhs, rhs)); -// Allow `|lhs, op, rhs| ` to be used as a fold closure for infix operators -infix_op!(|f: Fn(O, Op, O) -> O, lhs, op, rhs, _span| f(lhs, op, rhs)); -// Allow `|lhs, op, rhs, span| ` to be used as a fold closure for infix operators -infix_op!(|f: Fn(O, Op, O, I::Span) -> O, lhs, op, rhs, span| f(lhs, op, rhs, span)); - -pub struct Prefix { - op_parser: A, - f: F, - binding_power: u16, - #[allow(dead_code)] - phantom: EmptyPhantom<(Op, Args)>, -} - -pub const fn prefix( - binding_power: u16, - op_parser: A, - f: F, -) -> Prefix { - Prefix { - op_parser, - f, - binding_power, - phantom: EmptyPhantom::new(), - } -} - -macro_rules! prefix_op { - (|$f:ident : Fn($($Arg:ty),*) -> O, $op:ident, $rhs:ident, $span:ident| $invoke:expr) => { - impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix - where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn($($Arg),*) -> O, - { - type Op = Op; - type OpParser = A; - const PREFIX: bool = true; - fn op_parser(&self) -> &Self::OpParser { &self.op_parser } - fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } - fn fold_prefix(&self, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.f; $invoke } - } - }; -} - -// Allow `|rhs| ` to be used as a fold closure for prefix operators -prefix_op!(|f: Fn(O) -> O, _op, rhs, _span| f(rhs)); -// Allow `|op, rhs| ` to be used as a fold closure for prefix operators -prefix_op!(|f: Fn(Op, O) -> O, op, rhs, _span| f(op, rhs)); -// Allow `|op, rhs, span| ` to be used as a fold closure for prefix operators -prefix_op!(|f: Fn(Op, O, I::Span) -> O, op, rhs, span| f(op, rhs, span)); - -pub struct Postfix { - op_parser: A, - f: F, - binding_power: u16, - #[allow(dead_code)] - phantom: EmptyPhantom<(Op, Args)>, -} - -pub const fn postfix( - binding_power: u16, - op_parser: A, - f: F, -) -> Postfix { - Postfix { - op_parser, - f, - binding_power, - phantom: EmptyPhantom::new(), - } -} - -macro_rules! postfix_op { - (|$f:ident : Fn($($Arg:ty),*) -> O, $lhs:ident, $op:ident, $span:ident| $invoke:expr) => { - impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Postfix - where - I: Input<'a>, - E: ParserExtra<'a, I>, - A: Parser<'a, I, Op, E>, - F: Fn($($Arg),*) -> O, - { - type Op = Op; - type OpParser = A; - const POSTFIX: bool = true; - fn op_parser(&self) -> &Self::OpParser { &self.op_parser } - fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } - fn fold_postfix(&self, $lhs: O, $op: Self::Op, $span: I::Span) -> O { let $f = &self.f; $invoke } - } - }; -} - -// Allow `|lhs| ` to be used as a fold closure for postfix operators -postfix_op!(|f: Fn(O) -> O, lhs, _op, _span| f(lhs)); -// Allow `|lhs, op| ` to be used as a fold closure for postfix operators -postfix_op!(|f: Fn(O, Op) -> O, lhs, op, _span| f(lhs, op)); -// Allow `|lhs, op, span| ` to be used as a fold closure for postfix operators -postfix_op!(|f: Fn(O, Op, I::Span) -> O, lhs, op, span| f(lhs, op, span)); - -pub struct Pratt { - pub(crate) atom: Atom, - pub(crate) ops: Ops, -} - -macro_rules! impl_pratt_for_tuple { - () => {}; - ($head:ident $($X:ident)*) => { - impl_pratt_for_tuple!($($X)*); - impl_pratt_for_tuple!(~ $head $($X)*); - }; - (~ $($X:ident)+) => { - #[allow(unused_variables, non_snake_case)] - impl<'a, Atom, $($X),*> Pratt { - fn pratt_go(&self, inp: &mut InputRef<'a, '_, I, E>, min_power: u16) -> PResult - where - I: Input<'a>, - E: ParserExtra<'a, I>, - Atom: Parser<'a, I, O, E>, - $($X: Operator<'a, I, O, E>),* - { - let pre_expr = inp.save(); - let mut lhs = 'choice: { - let ($($X,)*) = &self.ops; - - // Prefix unary operators - $( - if $X::PREFIX { - match $X.op_parser().go::(inp) { - Ok(op) => { - match self.pratt_go::(inp, $X.associativity().left_power()) { - Ok(rhs) => break 'choice M::combine(op, rhs, |op, rhs| { - let span = inp.span_since(pre_expr.offset()); - $X.fold_prefix(op, rhs, span) - }), - Err(()) => inp.rewind(pre_expr), - } - }, - Err(()) => inp.rewind(pre_expr), - } - } - )* - - self.atom.go::(inp)? - }; - - loop { - let ($($X,)*) = &self.ops; - - let pre_op = inp.save(); - - // Postfix unary operators - $( - let assoc = $X.associativity(); - if $X::POSTFIX && assoc.right_power() >= min_power { - match $X.op_parser().go::(inp) { - Ok(op) => { - lhs = M::combine(lhs, op, |lhs, op| { - let span = inp.span_since(pre_expr.offset()); - $X.fold_postfix(lhs, op, span) - }); - continue - }, - Err(()) => inp.rewind(pre_op), - } - } - )* - - // Infix binary operators - $( - let assoc = $X.associativity(); - if $X::INFIX && assoc.left_power() >= min_power { - match $X.op_parser().go::(inp) { - Ok(op) => match self.pratt_go::(inp, assoc.right_power()) { - Ok(rhs) => { - lhs = M::combine( - M::combine(lhs, rhs, |lhs, rhs| (lhs, rhs)), - op, - |(lhs, rhs), op| { - let span = inp.span_since(pre_expr.offset()); - $X.fold_infix(lhs, op, rhs, span) - }, - ); - continue - }, - Err(()) => inp.rewind(pre_op), - }, - Err(()) => inp.rewind(pre_op), - } - } - )* - - inp.rewind(pre_op); - break; - } - - Ok(lhs) - } - } - - #[allow(unused_variables, non_snake_case)] - impl<'a, I, O, E, Atom, $($X),*> ParserSealed<'a, I, O, E> for Pratt - where - I: Input<'a>, - E: ParserExtra<'a, I>, - Atom: Parser<'a, I, O, E>, - $($X: Operator<'a, I, O, E>),* - { - fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - self.pratt_go::(inp, 0) - } - - go_extra!(O); - } - }; -} - -impl_pratt_for_tuple!(A_ B_ C_ D_ E_ F_ G_ H_ I_ J_ K_ L_ M_ N_ O_ P_ Q_ R_ S_ T_ U_ V_ W_ X_ Y_ Z_); - -#[cfg(test)] -mod tests { - use super::*; - use crate::{extra::Err, prelude::*}; - - fn factorial(x: i64) -> i64 { - if x == 0 { - 1 - } else { - x * factorial(x - 1) - } - } - - fn parser<'a>() -> impl Parser<'a, &'a str, i64> { - let atom = text::int(10).padded().from_str::().unwrapped(); - - atom.pratt2(( - prefix(2, just('-'), |x: i64| -x), - postfix(2, just('!'), factorial), - infix(left(0), just('+'), |l, r| l + r), - infix(left(0), just('-'), |l, r| l - r), - infix(left(1), just('*'), |l, r| l * r), - infix(left(1), just('/'), |l, _, r| l / r), - )) - } - - #[test] - fn precedence() { - assert_eq!(parser().parse("2 + 3 * 4").into_result(), Ok(14)); - assert_eq!(parser().parse("2 * 3 + 4").into_result(), Ok(10)); - } - - #[test] - fn unary() { - assert_eq!(parser().parse("-2").into_result(), Ok(-2)); - assert_eq!(parser().parse("4!").into_result(), Ok(24)); - assert_eq!(parser().parse("2 + 4!").into_result(), Ok(26)); - assert_eq!(parser().parse("-2 + 2").into_result(), Ok(0)); - } - - enum Expr { - Literal(i64), - Not(Box), - Negate(Box), - Confusion(Box), - Factorial(Box), - Value(Box), - Add(Box, Box), - Sub(Box, Box), - Mul(Box, Box), - Div(Box, Box), - } - - impl std::fmt::Display for Expr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Literal(literal) => write!(f, "{literal}"), - Self::Not(right) => write!(f, "(~{right})"), - Self::Negate(right) => write!(f, "(-{right})"), - Self::Confusion(right) => write!(f, "(Β§{right})"), - Self::Factorial(right) => write!(f, "({right}!)"), - Self::Value(right) => write!(f, "({right}$)"), - Self::Add(left, right) => write!(f, "({left} + {right})"), - Self::Sub(left, right) => write!(f, "({left} - {right})"), - Self::Mul(left, right) => write!(f, "({left} * {right})"), - Self::Div(left, right) => write!(f, "({left} / {right})"), - } - } - } - - fn u(e: fn(Box) -> Expr, r: Expr) -> Expr { - e(Box::new(r)) - } - fn i(e: fn(Box, Box) -> Expr, l: Expr, r: Expr) -> Expr { - e(Box::new(l), Box::new(r)) - } - - fn expr_parser<'a>() -> impl Parser<'a, &'a str, String, Err>> { - let atom = text::int(10).from_str().unwrapped().map(Expr::Literal); - - atom.pratt2(( - infix(left(0), just('+'), |l, r| i(Expr::Add, l, r)), - infix(left(0), just('-'), |l, r| i(Expr::Sub, l, r)), - infix(right(1), just('*'), |l, r| i(Expr::Mul, l, r)), - infix(right(1), just('/'), |l, r| i(Expr::Div, l, r)), - )) - .map(|x| x.to_string()) - } - - fn complete_parser<'a>() -> impl Parser<'a, &'a str, String, Err>> { - expr_parser().then_ignore(end()) - } - - fn parse(input: &str) -> ParseResult> { - complete_parser().parse(input) - } - - fn parse_partial(input: &str) -> ParseResult> { - expr_parser().lazy().parse(input) - } - - fn unexpected<'a, C: Into>>, S: Into>( - c: C, - span: S, - ) -> Simple<'a, char> { - as Error<'_, &'_ str>>::expected_found(None, c.into(), span.into()) - } - - #[test] - fn missing_first_expression() { - assert_eq!(parse("").into_result(), Err(vec![unexpected(None, 0..0)])) - } - - #[test] - fn missing_later_expression() { - assert_eq!(parse("1+").into_result(), Err(vec![unexpected(None, 2..2)]),); - } - - #[test] - fn invalid_first_expression() { - assert_eq!( - parse("?").into_result(), - Err(vec![unexpected(Some('?'.into()), 0..1)]), - ); - } - - #[test] - fn invalid_later_expression() { - assert_eq!( - parse("1+?").into_result(), - Err(vec![dbg!(unexpected(Some('?'.into()), 2..3))]), - ); - } - - #[test] - fn invalid_operator() { - assert_eq!( - parse("1?").into_result(), - Err(vec![unexpected(Some('?'.into()), 1..2)]), - ); - } - - #[test] - fn invalid_operator_incomplete() { - assert_eq!(parse_partial("1?").into_result(), Ok("1".to_string()),); - } - - #[test] - fn complex_nesting() { - assert_eq!( - parse_partial("1+2*3/4*5-6*7+8-9+10").into_result(), - Ok("(((((1 + (2 * (3 / (4 * 5)))) - (6 * 7)) + 8) - 9) + 10)".to_string()), - ); - } - - #[test] - fn with_prefix_ops() { - let atom = text::int::<_, _, Err>>(10) - .from_str() - .unwrapped() - .map(Expr::Literal); - - let parser = atom - .pratt2(( - // -- Prefix - // Because we defined '*' and '/' as right associative operators, - // in order to get these to function as expected, their strength - // must be higher - prefix(2, just('-'), |r| u(Expr::Negate, r)), - prefix(2, just('~'), |r| u(Expr::Not, r)), - // This is what happens when not - prefix(1, just('Β§'), |r| u(Expr::Confusion, r)), - // -- Infix - infix(left(0), just('+'), |l, r| i(Expr::Add, l, r)), - infix(left(0), just('-'), |l, r| i(Expr::Sub, l, r)), - infix(right(1), just('*'), |l, r| i(Expr::Mul, l, r)), - infix(right(1), just('/'), |l, r| i(Expr::Div, l, r)), - )) - .map(|x| x.to_string()); - - assert_eq!( - parser.parse("-1+Β§~2*3").into_result(), - Ok("((-1) + (Β§((~2) * 3)))".to_string()), - ) - } - - #[test] - fn with_postfix_ops() { - let atom = text::int::<_, _, Err>>(10) - .from_str() - .unwrapped() - .map(Expr::Literal); - - let parser = atom - .pratt2(( - // -- Postfix - // Because we defined '*' and '/' as right associative operators, - // in order to get these to function as expected, their strength - // must be higher - postfix(2, just('!'), |l| u(Expr::Factorial, l)), - // This is what happens when not - postfix(0, just('$'), |l| u(Expr::Value, l)), - // -- Infix - infix(left(1), just('+'), |l, r| i(Expr::Add, l, r)), - infix(left(1), just('-'), |l, r| i(Expr::Sub, l, r)), - infix(right(2), just('*'), |l, r| i(Expr::Mul, l, r)), - infix(right(2), just('/'), |l, r| i(Expr::Div, l, r)), - )) - .map(|x| x.to_string()); - - assert_eq!( - parser.parse("1+2!$*3").into_result(), - Ok("(((1 + (2!))$) * 3)".to_string()), - ) - } - - #[test] - fn with_pre_and_postfix_ops() { - let atom = text::int::<_, _, Err>>(10) - .from_str() - .unwrapped() - .map(Expr::Literal); - - let parser = atom - .pratt2(( - // -- Prefix - prefix(4, just('-'), |r| u(Expr::Negate, r)), - prefix(4, just('~'), |r| u(Expr::Not, r)), - prefix(1, just('Β§'), |r| u(Expr::Confusion, r)), - // -- Postfix - postfix(5, just('!'), |l| u(Expr::Factorial, l)), - postfix(0, just('$'), |l| u(Expr::Value, l)), - // -- Infix - infix(left(1), just('+'), |l, r| i(Expr::Add, l, r)), - infix(left(1), just('-'), |l, r| i(Expr::Sub, l, r)), - infix(right(2), just('*'), |l, r| i(Expr::Mul, l, r)), - infix(right(2), just('/'), |l, r| i(Expr::Div, l, r)), - )) - .map(|x| x.to_string()); - assert_eq!( - parser.parse("Β§1+-~2!$*3").into_result(), - Ok("(((Β§(1 + (-(~(2!)))))$) * 3)".to_string()), - ) - } -} From 6632c1e4647480107364199a7d9f839a5ca2cc3c Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 20 Sep 2023 12:52:53 +0100 Subject: [PATCH 35/64] Use stacker recursion for pratt parser, added Copy/Clone impls --- src/pratt.rs | 74 +++++++++++++++++++++++++++++++++++++++++------- src/recursive.rs | 4 +-- 2 files changed, 65 insertions(+), 13 deletions(-) diff --git a/src/pratt.rs b/src/pratt.rs index e4fbcd3e..9756d74d 100644 --- a/src/pratt.rs +++ b/src/pratt.rs @@ -168,6 +168,18 @@ pub struct Infix { phantom: EmptyPhantom<(Op, Args)>, } +impl Copy for Infix {} +impl Clone for Infix { + fn clone(&self) -> Self { + Self { + op_parser: self.op_parser.clone(), + fold: self.fold.clone(), + associativity: self.associativity, + phantom: EmptyPhantom::new(), + } + } +} + /// Specify a binary infix operator for a pratt parser with the given associativity, binding power, and /// [fold function](crate::pratt#fold-functions). /// @@ -211,9 +223,9 @@ macro_rules! infix_op { type Op = Op; type OpParser = A; const IS_INFIX: bool = true; - fn op_parser(&self) -> &Self::OpParser { &self.op_parser } - fn associativity(&self) -> Associativity { self.associativity } - fn fold_infix(&self, $lhs: O, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.fold; $invoke } + #[inline(always)] fn op_parser(&self) -> &Self::OpParser { &self.op_parser } + #[inline(always)] fn associativity(&self) -> Associativity { self.associativity } + #[inline(always)] fn fold_infix(&self, $lhs: O, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.fold; $invoke } } }; } @@ -234,6 +246,18 @@ pub struct Prefix { phantom: EmptyPhantom<(Op, Args)>, } +impl Copy for Prefix {} +impl Clone for Prefix { + fn clone(&self) -> Self { + Self { + op_parser: self.op_parser.clone(), + fold: self.fold.clone(), + binding_power: self.binding_power, + phantom: EmptyPhantom::new(), + } + } +} + /// Specify a unary prefix operator for a pratt parser with the given binding power and /// [fold function](crate::pratt#fold-functions). /// @@ -274,9 +298,9 @@ macro_rules! prefix_op { type Op = Op; type OpParser = A; const IS_PREFIX: bool = true; - fn op_parser(&self) -> &Self::OpParser { &self.op_parser } - fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } - fn fold_prefix(&self, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.fold; $invoke } + #[inline(always)] fn op_parser(&self) -> &Self::OpParser { &self.op_parser } + #[inline(always)] fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } + #[inline(always)] fn fold_prefix(&self, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.fold; $invoke } } }; } @@ -297,6 +321,18 @@ pub struct Postfix { phantom: EmptyPhantom<(Op, Args)>, } +impl Copy for Postfix {} +impl Clone for Postfix { + fn clone(&self) -> Self { + Self { + op_parser: self.op_parser.clone(), + fold: self.fold.clone(), + binding_power: self.binding_power, + phantom: EmptyPhantom::new(), + } + } +} + /// Specify a unary postfix operator for a pratt parser with the given binding power and /// [fold function](crate::pratt#fold-functions). /// @@ -337,9 +373,9 @@ macro_rules! postfix_op { type Op = Op; type OpParser = A; const IS_POSTFIX: bool = true; - fn op_parser(&self) -> &Self::OpParser { &self.op_parser } - fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } - fn fold_postfix(&self, $lhs: O, $op: Self::Op, $span: I::Span) -> O { let $f = &self.fold; $invoke } + #[inline(always)] fn op_parser(&self) -> &Self::OpParser { &self.op_parser } + #[inline(always)] fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } + #[inline(always)] fn fold_postfix(&self, $lhs: O, $op: Self::Op, $span: I::Span) -> O { let $f = &self.fold; $invoke } } }; } @@ -352,6 +388,7 @@ postfix_op!(|f: Fn(O, Op) -> O, lhs, op, _span| f(lhs, op)); postfix_op!(|f: Fn(O, Op, I::Span) -> O, lhs, op, span| f(lhs, op, span)); /// See [`Parser::pratt`]. +#[derive(Copy, Clone)] pub struct Pratt { pub(crate) atom: Atom, pub(crate) ops: Ops, @@ -366,6 +403,7 @@ macro_rules! impl_pratt_for_tuple { (~ $($X:ident)+) => { #[allow(unused_variables, non_snake_case)] impl<'a, Atom, $($X),*> Pratt { + #[inline] fn pratt_go(&self, inp: &mut InputRef<'a, '_, I, E>, min_power: u32) -> PResult where I: Input<'a>, @@ -382,7 +420,7 @@ macro_rules! impl_pratt_for_tuple { if $X::IS_PREFIX { match $X.op_parser().go::(inp) { Ok(op) => { - match self.pratt_go::(inp, $X.associativity().left_power()) { + match recursive::recurse(|| self.pratt_go::(inp, $X.associativity().left_power())) { Ok(rhs) => break 'choice M::combine(op, rhs, |op, rhs| { let span = inp.span_since(pre_expr.offset()); $X.fold_prefix(op, rhs, span) @@ -425,7 +463,7 @@ macro_rules! impl_pratt_for_tuple { let assoc = $X.associativity(); if $X::IS_INFIX && assoc.left_power() >= min_power { match $X.op_parser().go::(inp) { - Ok(op) => match self.pratt_go::(inp, assoc.right_power()) { + Ok(op) => match recursive::recurse(|| self.pratt_go::(inp, assoc.right_power())) { Ok(rhs) => { lhs = M::combine( M::combine(lhs, rhs, |lhs, rhs| (lhs, rhs)), @@ -511,6 +549,20 @@ mod tests { assert_eq!(parser().parse("-2 + 2").into_result(), Ok(0)); } + // TODO: Make this work + // fn parser_dynamic<'a>() -> impl Parser<'a, &'a str, i64> { + // let atom = text::int(10).padded().from_str::().unwrapped(); + + // atom.pratt(vec![ + // prefix(2, just('-'), |x: i64| -x).into(), + // postfix(2, just('!'), factorial).into(), + // infix(left(0), just('+'), |l, r| l + r).into(), + // infix(left(0), just('-'), |l, r| l - r).into(), + // infix(left(1), just('*'), |l, r| l * r).into(), + // infix(left(1), just('/'), |l, _, r| l / r).into(), + // ]) + // } + enum Expr { Literal(i64), Not(Box), diff --git a/src/recursive.rs b/src/recursive.rs index b494936a..6684ceaf 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -170,12 +170,12 @@ impl Clone for Recursive

{ #[cfg(feature = "stacker")] #[inline] -fn recurse R>(f: F) -> R { +pub(crate) fn recurse R>(f: F) -> R { stacker::maybe_grow(1024 * 64, 1024 * 1024, f) } #[cfg(not(feature = "stacker"))] #[inline] -fn recurse R>(f: F) -> R { +pub(crate) fn recurse R>(f: F) -> R { f() } From bee67ce0bf4e4384ea1c88c1723751a7b022684e Mon Sep 17 00:00:00 2001 From: zyansheep Date: Sun, 8 Oct 2023 19:12:55 -0400 Subject: [PATCH 36/64] add flake stuff --- .envrc | 1 + flake.lock | 288 ++++++++++++++++++++++++++++++++++++++++++++ flake.nix | 40 ++++++ rust-toolchain.toml | 3 + 4 files changed, 332 insertions(+) create mode 100644 .envrc create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 rust-toolchain.toml diff --git a/.envrc b/.envrc new file mode 100644 index 00000000..8392d159 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake \ No newline at end of file diff --git a/flake.lock b/flake.lock new file mode 100644 index 00000000..e88a9073 --- /dev/null +++ b/flake.lock @@ -0,0 +1,288 @@ +{ + "nodes": { + "crane": { + "flake": false, + "locked": { + "lastModified": 1681175776, + "narHash": "sha256-7SsUy9114fryHAZ8p1L6G6YSu7jjz55FddEwa2U8XZc=", + "owner": "ipetkov", + "repo": "crane", + "rev": "445a3d222947632b5593112bb817850e8a9cf737", + "type": "github" + }, + "original": { + "owner": "ipetkov", + "ref": "v0.12.1", + "repo": "crane", + "type": "github" + } + }, + "dream2nix": { + "inputs": { + "all-cabal-json": [ + "nci" + ], + "crane": "crane", + "devshell": [ + "nci" + ], + "drv-parts": "drv-parts", + "flake-compat": "flake-compat", + "flake-parts": [ + "nci", + "parts" + ], + "flake-utils-pre-commit": [ + "nci" + ], + "ghc-utils": [ + "nci" + ], + "gomod2nix": [ + "nci" + ], + "mach-nix": [ + "nci" + ], + "nix-pypi-fetcher": [ + "nci" + ], + "nixpkgs": [ + "nci", + "nixpkgs" + ], + "nixpkgsV1": "nixpkgsV1", + "poetry2nix": [ + "nci" + ], + "pre-commit-hooks": [ + "nci" + ], + "pruned-racket-catalog": [ + "nci" + ] + }, + "locked": { + "lastModified": 1690660611, + "narHash": "sha256-nfDb1koAB/bD2pzENgVe+q4lwi9tgwR772dZgaGR4Io=", + "owner": "nix-community", + "repo": "dream2nix", + "rev": "ce7b3975b63062b9e440e48a75a5c12253231af5", + "type": "github" + }, + "original": { + "owner": "nix-community", + "ref": "legacy", + "repo": "dream2nix", + "type": "github" + } + }, + "drv-parts": { + "inputs": { + "flake-compat": [ + "nci", + "dream2nix", + "flake-compat" + ], + "flake-parts": [ + "nci", + "dream2nix", + "flake-parts" + ], + "nixpkgs": [ + "nci", + "dream2nix", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1680698112, + "narHash": "sha256-FgnobN/DvCjEsc0UAZEAdPLkL4IZi2ZMnu2K2bUaElc=", + "owner": "davhau", + "repo": "drv-parts", + "rev": "e8c2ec1157dc1edb002989669a0dbd935f430201", + "type": "github" + }, + "original": { + "owner": "davhau", + "repo": "drv-parts", + "type": "github" + } + }, + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1673956053, + "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "mk-naked-shell": { + "flake": false, + "locked": { + "lastModified": 1681286841, + "narHash": "sha256-3XlJrwlR0nBiREnuogoa5i1b4+w/XPe0z8bbrJASw0g=", + "owner": "yusdacra", + "repo": "mk-naked-shell", + "rev": "7612f828dd6f22b7fb332cc69440e839d7ffe6bd", + "type": "github" + }, + "original": { + "owner": "yusdacra", + "repo": "mk-naked-shell", + "type": "github" + } + }, + "nci": { + "inputs": { + "dream2nix": "dream2nix", + "mk-naked-shell": "mk-naked-shell", + "nixpkgs": [ + "nixpkgs" + ], + "parts": "parts", + "rust-overlay": "rust-overlay", + "treefmt": "treefmt" + }, + "locked": { + "lastModified": 1693807878, + "narHash": "sha256-mJ+Qsp1J9CLCCbHeQxe1y0bjvma1pI2/mJdz6LeVlEQ=", + "owner": "yusdacra", + "repo": "nix-cargo-integration", + "rev": "d498b79bf51b9da61f69b70e1aa319e39631c4d9", + "type": "github" + }, + "original": { + "owner": "yusdacra", + "repo": "nix-cargo-integration", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1693663421, + "narHash": "sha256-ImMIlWE/idjcZAfxKK8sQA7A1Gi/O58u5/CJA+mxvl8=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "e56990880811a451abd32515698c712788be5720", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgsV1": { + "locked": { + "lastModified": 1686501370, + "narHash": "sha256-G0WuM9fqTPRc2URKP9Lgi5nhZMqsfHGrdEbrLvAPJcg=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "75a5ebf473cd60148ba9aec0d219f72e5cf52519", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "ref": "nixos-unstable", + "type": "indirect" + } + }, + "parts": { + "inputs": { + "nixpkgs-lib": [ + "nci", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1693611461, + "narHash": "sha256-aPODl8vAgGQ0ZYFIRisxYG5MOGSkIczvu2Cd8Gb9+1Y=", + "owner": "hercules-ci", + "repo": "flake-parts", + "rev": "7f53fdb7bdc5bb237da7fefef12d099e4fd611ca", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "flake-parts", + "type": "github" + } + }, + "parts_2": { + "inputs": { + "nixpkgs-lib": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1693611461, + "narHash": "sha256-aPODl8vAgGQ0ZYFIRisxYG5MOGSkIczvu2Cd8Gb9+1Y=", + "owner": "hercules-ci", + "repo": "flake-parts", + "rev": "7f53fdb7bdc5bb237da7fefef12d099e4fd611ca", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "flake-parts", + "type": "github" + } + }, + "root": { + "inputs": { + "nci": "nci", + "nixpkgs": "nixpkgs", + "parts": "parts_2" + } + }, + "rust-overlay": { + "flake": false, + "locked": { + "lastModified": 1693793487, + "narHash": "sha256-MS6CDyAC0sJMTE/pRYlfrhBnhlAPvEo43ipwf5ZNzHg=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "f179280eed5eb93759c94bf3231fbbda28f894b7", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + }, + "treefmt": { + "inputs": { + "nixpkgs": [ + "nci", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1693689099, + "narHash": "sha256-NuilTRYMH+DDR/uBWQjDbX5mWCA05lwo2Sg9iTkkEs4=", + "owner": "numtide", + "repo": "treefmt-nix", + "rev": "e3e0f9f6d47f8fc68aff15150eda1224fb46f4d4", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "treefmt-nix", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 00000000..ff04a2b0 --- /dev/null +++ b/flake.nix @@ -0,0 +1,40 @@ +{ + inputs.nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; + inputs.nci.url = "github:yusdacra/nix-cargo-integration"; + inputs.nci.inputs.nixpkgs.follows = "nixpkgs"; + inputs.parts.url = "github:hercules-ci/flake-parts"; + inputs.parts.inputs.nixpkgs-lib.follows = "nixpkgs"; + + outputs = inputs @ { + parts, + nci, + ... + }: + parts.lib.mkFlake {inherit inputs;} { + systems = ["x86_64-linux"]; + imports = [nci.flakeModule]; + perSystem = {config, ...}: let + # TODO: change this to your crate's name + crateName = "disp"; + # shorthand for accessing this crate's outputs + # you can access crate outputs under `config.nci.outputs.` (see documentation) + crateOutputs = config.nci.outputs.${crateName}; + in { + # declare projects + # relPath is the relative path of a project to the flake root + # TODO: change this to your crate's path + nci.projects.${crateName}.relPath = ""; + # configure crates + nci.crates.${crateName} = { + # export crate (packages and devshell) in flake outputs + # alternatively you can access the outputs and export them yourself (see below) + export = true; + # look at documentation for more options + }; + # export the crate devshell as the default devshell + devShells.default = crateOutputs.devShell; + # export the release package of the crate as default package + packages.default = crateOutputs.packages.release; + }; + }; +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 00000000..0bd9afd8 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "nightly" # or whatever channel you are using +components = [ "rust-src" ] \ No newline at end of file From dcdeb8144dd58ed18c8d66b8c513a9c0ddb05e9f Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 9 Oct 2023 00:55:32 +0100 Subject: [PATCH 37/64] Bumped version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 35c1968e..178ea16d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "chumsky" -version = "1.0.0-alpha.4" +version = "1.0.0-alpha.5" description = "A parser library for humans with powerful error recovery" authors = ["Joshua Barretto ", "Elijah Hartvigsen "] repository = "https://github.com/zesterer/chumsky" From 3ea3820d26ddd6f5cbb609684368011bcab19a60 Mon Sep 17 00:00:00 2001 From: zyansheep Date: Mon, 9 Oct 2023 10:28:33 -0400 Subject: [PATCH 38/64] select-extra feature --- src/combinator.rs | 4 ++-- src/lib.rs | 12 ++++++------ src/primitive.rs | 31 +++++++++++++++---------------- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/combinator.rs b/src/combinator.rs index 79182573..af2980d3 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -332,8 +332,8 @@ where /// See [`Parser::map_with`]. pub struct MapExtra<'a, 'b, 'inv, I: Input<'a>, E: ParserExtra<'a, I>> { - before: Offset<'a, 'inv, I>, - inp: &'b mut InputRef<'a, 'inv, I, E>, + pub(crate) before: Offset<'a, 'inv, I>, + pub(crate) inp: &'b mut InputRef<'a, 'inv, I, E>, } impl<'a, 'b, 'inv, I: Input<'a>, E: ParserExtra<'a, I>> MapExtra<'a, 'b, 'inv, I, E> { diff --git a/src/lib.rs b/src/lib.rs index 3f6c9d59..b4658e96 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2769,10 +2769,10 @@ where /// ``` #[macro_export] macro_rules! select { - ($($p:pat $(= $span:ident)? $(, $state:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ + ($($p:pat $(= $extra:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ $crate::primitive::select( - move |x, span, state| match (x, span, state) { - $(($p $(,$span)? $(,$state)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+, + move |x, extra| match (x, extra) { + $(($p $(,$extra)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+, _ => ::core::option::Option::None, } ) @@ -2786,10 +2786,10 @@ macro_rules! select { /// `select_ref` requires that the parser input implements [`BorrowInput`]. #[macro_export] macro_rules! select_ref { - ($($p:pat $(= $span:ident)? $(, $state:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ + ($($p:pat $(= $extra:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ $crate::primitive::select_ref( - move |x, span, state| match (x, span, state) { - $(($p $(,$span)? $(,$state)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+, + move |x, extra| match (x, extra) { + $(($p $(,$extra)?, ..) $(if $guard)? => ::core::option::Option::Some({ () $(;$out)? })),+, _ => ::core::option::Option::None, } ) diff --git a/src/primitive.rs b/src/primitive.rs index 4814edc1..7ba32898 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -417,14 +417,17 @@ where } /// See [`select!`]. -pub struct Select { +pub struct Select<'a, F, I: Input<'a>, O, E: ParserExtra<'a, I>> +where + F: Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option +{ filter: F, #[allow(dead_code)] - phantom: EmptyPhantom<(E, O, I)>, + phantom: EmptyPhantom<(E, O, &'a I)>, } -impl Copy for Select {} -impl Clone for Select { +impl<'a, F: Copy + Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, I: Input<'a>, O, E: ParserExtra<'a, I>> Copy for Select<'a, F, I, O, E> {} +impl<'a, F: Clone + Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, I: Input<'a>, O, E: ParserExtra<'a, I>> Clone for Select<'a, F, I, O, E> { fn clone(&self) -> Self { Self { filter: self.filter.clone(), @@ -434,12 +437,12 @@ impl Clone for Select { } /// See [`select!`]. -pub const fn select<'a, F, I, O, E>(filter: F) -> Select +pub const fn select<'a, F, I, O, E>(filter: F) -> Select<'a, F, I, O, E> where I: Input<'a>, I::Token: Clone + 'a, E: ParserExtra<'a, I>, - F: Fn(I::Token, I::Span, &mut E::State) -> Option, + F: Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, { Select { filter, @@ -447,22 +450,20 @@ where } } -impl<'a, I, O, E, F> ParserSealed<'a, I, O, E> for Select +impl<'a, I, O, E, F> ParserSealed<'a, I, O, E> for Select<'a, F, I, O, E> where I: ValueInput<'a>, I::Token: Clone + 'a, E: ParserExtra<'a, I>, - F: Fn(I::Token, I::Span, &mut E::State) -> Option, + F: Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { let before = inp.offset(); let next = inp.next_inner(); let err_span = inp.span_since(before); - let span_since = inp.span_since(before); - let state = inp.state(); let (at, found) = match next { - (at, Some(tok)) => match (self.filter)(tok.clone(), span_since, state) { + (at, Some(tok)) => match (self.filter)(tok.clone(), &mut MapExtra { before, inp }) { Some(out) => return Ok(M::bind(|| out)), None => (at, Some(tok.into())), }, @@ -498,7 +499,7 @@ where I: BorrowInput<'a>, I::Token: 'a, E: ParserExtra<'a, I>, - F: Fn(&'a I::Token, I::Span, &mut E::State) -> Option, + F: Fn(&'a I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, { SelectRef { filter, @@ -511,17 +512,15 @@ where I: BorrowInput<'a>, I::Token: 'a, E: ParserExtra<'a, I>, - F: Fn(&'a I::Token, I::Span, &mut E::State) -> Option, + F: Fn(&'a I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { let before = inp.offset(); let next = inp.next_ref_inner(); - let span = inp.span_since(before); let err_span = inp.span_since(before); - let state = inp.state(); let (at, found) = match next { - (at, Some(tok)) => match (self.filter)(tok, span, state) { + (at, Some(tok)) => match (self.filter)(tok, &mut MapExtra { before, inp }) { Some(out) => return Ok(M::bind(|| out)), None => (at, Some(tok.into())), }, From 524eb882d500078e43cf341e0dc57f36889ac065 Mon Sep 17 00:00:00 2001 From: zyansheep Date: Mon, 9 Oct 2023 10:34:16 -0400 Subject: [PATCH 39/64] remove uneeded comments --- src/error.rs | 2 -- src/label.rs | 10 ---------- 2 files changed, 12 deletions(-) diff --git a/src/error.rs b/src/error.rs index 2120768c..b08584c3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -777,9 +777,7 @@ where #[inline] fn in_context(&mut self, label: L, span: I::Span) { - // label is not yet in context if self.context.iter().all(|(l, _)| l != &label) { - // add it to context self.context.push((label, span)); } } diff --git a/src/label.rs b/src/label.rs index e4bee920..d0879bfc 100644 --- a/src/label.rs +++ b/src/label.rs @@ -48,23 +48,15 @@ where { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { - // save potential alternate errors generated before this parser has run let old_alt = inp.errors.alt.take(); - // note position in source let before = inp.save(); - // run labelled parser let res = self.parser.go::(inp); // TODO: Label secondary errors too? - - // note any alternate errors generated by labelled parser let new_alt = inp.errors.alt.take(); - // reset input's alternate error back to old one inp.errors.alt = old_alt; - // if we have any new alt errors generated if let Some(mut new_alt) = new_alt { - let before_next = before.offset.into() + 1; if new_alt.pos.into() == before_next { new_alt.err.label_with(self.label.clone()); @@ -73,8 +65,6 @@ where let span = unsafe { inp.input.span(before.offset..new_alt.pos) }; new_alt.err.in_context(self.label.clone(), span); } - // new_alt.err.label_with(self.label.clone()); - // record new_alt as alternative error inp.add_alt_err(new_alt.pos, new_alt.err); } From e0bf004cdad9f4aff4700e8b615fbb1c0786e22b Mon Sep 17 00:00:00 2001 From: zyansheep Date: Mon, 9 Oct 2023 12:11:26 -0400 Subject: [PATCH 40/64] remove flake stuff for pr --- .envrc | 1 - flake.lock | 288 -------------------------------------------- flake.nix | 40 ------ rust-toolchain.toml | 3 - 4 files changed, 332 deletions(-) delete mode 100644 .envrc delete mode 100644 flake.lock delete mode 100644 flake.nix delete mode 100644 rust-toolchain.toml diff --git a/.envrc b/.envrc deleted file mode 100644 index 8392d159..00000000 --- a/.envrc +++ /dev/null @@ -1 +0,0 @@ -use flake \ No newline at end of file diff --git a/flake.lock b/flake.lock deleted file mode 100644 index e88a9073..00000000 --- a/flake.lock +++ /dev/null @@ -1,288 +0,0 @@ -{ - "nodes": { - "crane": { - "flake": false, - "locked": { - "lastModified": 1681175776, - "narHash": "sha256-7SsUy9114fryHAZ8p1L6G6YSu7jjz55FddEwa2U8XZc=", - "owner": "ipetkov", - "repo": "crane", - "rev": "445a3d222947632b5593112bb817850e8a9cf737", - "type": "github" - }, - "original": { - "owner": "ipetkov", - "ref": "v0.12.1", - "repo": "crane", - "type": "github" - } - }, - "dream2nix": { - "inputs": { - "all-cabal-json": [ - "nci" - ], - "crane": "crane", - "devshell": [ - "nci" - ], - "drv-parts": "drv-parts", - "flake-compat": "flake-compat", - "flake-parts": [ - "nci", - "parts" - ], - "flake-utils-pre-commit": [ - "nci" - ], - "ghc-utils": [ - "nci" - ], - "gomod2nix": [ - "nci" - ], - "mach-nix": [ - "nci" - ], - "nix-pypi-fetcher": [ - "nci" - ], - "nixpkgs": [ - "nci", - "nixpkgs" - ], - "nixpkgsV1": "nixpkgsV1", - "poetry2nix": [ - "nci" - ], - "pre-commit-hooks": [ - "nci" - ], - "pruned-racket-catalog": [ - "nci" - ] - }, - "locked": { - "lastModified": 1690660611, - "narHash": "sha256-nfDb1koAB/bD2pzENgVe+q4lwi9tgwR772dZgaGR4Io=", - "owner": "nix-community", - "repo": "dream2nix", - "rev": "ce7b3975b63062b9e440e48a75a5c12253231af5", - "type": "github" - }, - "original": { - "owner": "nix-community", - "ref": "legacy", - "repo": "dream2nix", - "type": "github" - } - }, - "drv-parts": { - "inputs": { - "flake-compat": [ - "nci", - "dream2nix", - "flake-compat" - ], - "flake-parts": [ - "nci", - "dream2nix", - "flake-parts" - ], - "nixpkgs": [ - "nci", - "dream2nix", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1680698112, - "narHash": "sha256-FgnobN/DvCjEsc0UAZEAdPLkL4IZi2ZMnu2K2bUaElc=", - "owner": "davhau", - "repo": "drv-parts", - "rev": "e8c2ec1157dc1edb002989669a0dbd935f430201", - "type": "github" - }, - "original": { - "owner": "davhau", - "repo": "drv-parts", - "type": "github" - } - }, - "flake-compat": { - "flake": false, - "locked": { - "lastModified": 1673956053, - "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=", - "owner": "edolstra", - "repo": "flake-compat", - "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9", - "type": "github" - }, - "original": { - "owner": "edolstra", - "repo": "flake-compat", - "type": "github" - } - }, - "mk-naked-shell": { - "flake": false, - "locked": { - "lastModified": 1681286841, - "narHash": "sha256-3XlJrwlR0nBiREnuogoa5i1b4+w/XPe0z8bbrJASw0g=", - "owner": "yusdacra", - "repo": "mk-naked-shell", - "rev": "7612f828dd6f22b7fb332cc69440e839d7ffe6bd", - "type": "github" - }, - "original": { - "owner": "yusdacra", - "repo": "mk-naked-shell", - "type": "github" - } - }, - "nci": { - "inputs": { - "dream2nix": "dream2nix", - "mk-naked-shell": "mk-naked-shell", - "nixpkgs": [ - "nixpkgs" - ], - "parts": "parts", - "rust-overlay": "rust-overlay", - "treefmt": "treefmt" - }, - "locked": { - "lastModified": 1693807878, - "narHash": "sha256-mJ+Qsp1J9CLCCbHeQxe1y0bjvma1pI2/mJdz6LeVlEQ=", - "owner": "yusdacra", - "repo": "nix-cargo-integration", - "rev": "d498b79bf51b9da61f69b70e1aa319e39631c4d9", - "type": "github" - }, - "original": { - "owner": "yusdacra", - "repo": "nix-cargo-integration", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1693663421, - "narHash": "sha256-ImMIlWE/idjcZAfxKK8sQA7A1Gi/O58u5/CJA+mxvl8=", - "owner": "nixos", - "repo": "nixpkgs", - "rev": "e56990880811a451abd32515698c712788be5720", - "type": "github" - }, - "original": { - "owner": "nixos", - "ref": "nixos-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "nixpkgsV1": { - "locked": { - "lastModified": 1686501370, - "narHash": "sha256-G0WuM9fqTPRc2URKP9Lgi5nhZMqsfHGrdEbrLvAPJcg=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "75a5ebf473cd60148ba9aec0d219f72e5cf52519", - "type": "github" - }, - "original": { - "id": "nixpkgs", - "ref": "nixos-unstable", - "type": "indirect" - } - }, - "parts": { - "inputs": { - "nixpkgs-lib": [ - "nci", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1693611461, - "narHash": "sha256-aPODl8vAgGQ0ZYFIRisxYG5MOGSkIczvu2Cd8Gb9+1Y=", - "owner": "hercules-ci", - "repo": "flake-parts", - "rev": "7f53fdb7bdc5bb237da7fefef12d099e4fd611ca", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "flake-parts", - "type": "github" - } - }, - "parts_2": { - "inputs": { - "nixpkgs-lib": [ - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1693611461, - "narHash": "sha256-aPODl8vAgGQ0ZYFIRisxYG5MOGSkIczvu2Cd8Gb9+1Y=", - "owner": "hercules-ci", - "repo": "flake-parts", - "rev": "7f53fdb7bdc5bb237da7fefef12d099e4fd611ca", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "flake-parts", - "type": "github" - } - }, - "root": { - "inputs": { - "nci": "nci", - "nixpkgs": "nixpkgs", - "parts": "parts_2" - } - }, - "rust-overlay": { - "flake": false, - "locked": { - "lastModified": 1693793487, - "narHash": "sha256-MS6CDyAC0sJMTE/pRYlfrhBnhlAPvEo43ipwf5ZNzHg=", - "owner": "oxalica", - "repo": "rust-overlay", - "rev": "f179280eed5eb93759c94bf3231fbbda28f894b7", - "type": "github" - }, - "original": { - "owner": "oxalica", - "repo": "rust-overlay", - "type": "github" - } - }, - "treefmt": { - "inputs": { - "nixpkgs": [ - "nci", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1693689099, - "narHash": "sha256-NuilTRYMH+DDR/uBWQjDbX5mWCA05lwo2Sg9iTkkEs4=", - "owner": "numtide", - "repo": "treefmt-nix", - "rev": "e3e0f9f6d47f8fc68aff15150eda1224fb46f4d4", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "treefmt-nix", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/flake.nix b/flake.nix deleted file mode 100644 index ff04a2b0..00000000 --- a/flake.nix +++ /dev/null @@ -1,40 +0,0 @@ -{ - inputs.nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; - inputs.nci.url = "github:yusdacra/nix-cargo-integration"; - inputs.nci.inputs.nixpkgs.follows = "nixpkgs"; - inputs.parts.url = "github:hercules-ci/flake-parts"; - inputs.parts.inputs.nixpkgs-lib.follows = "nixpkgs"; - - outputs = inputs @ { - parts, - nci, - ... - }: - parts.lib.mkFlake {inherit inputs;} { - systems = ["x86_64-linux"]; - imports = [nci.flakeModule]; - perSystem = {config, ...}: let - # TODO: change this to your crate's name - crateName = "disp"; - # shorthand for accessing this crate's outputs - # you can access crate outputs under `config.nci.outputs.` (see documentation) - crateOutputs = config.nci.outputs.${crateName}; - in { - # declare projects - # relPath is the relative path of a project to the flake root - # TODO: change this to your crate's path - nci.projects.${crateName}.relPath = ""; - # configure crates - nci.crates.${crateName} = { - # export crate (packages and devshell) in flake outputs - # alternatively you can access the outputs and export them yourself (see below) - export = true; - # look at documentation for more options - }; - # export the crate devshell as the default devshell - devShells.default = crateOutputs.devShell; - # export the release package of the crate as default package - packages.default = crateOutputs.packages.release; - }; - }; -} diff --git a/rust-toolchain.toml b/rust-toolchain.toml deleted file mode 100644 index 0bd9afd8..00000000 --- a/rust-toolchain.toml +++ /dev/null @@ -1,3 +0,0 @@ -[toolchain] -channel = "nightly" # or whatever channel you are using -components = [ "rust-src" ] \ No newline at end of file From dd3ed15aac7d8fcee3352e1ccdc691a5a8d24a63 Mon Sep 17 00:00:00 2001 From: zyansheep Date: Mon, 9 Oct 2023 12:14:03 -0400 Subject: [PATCH 41/64] remove unnecessary bounds --- src/primitive.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/primitive.rs b/src/primitive.rs index 7ba32898..58ee2691 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -417,17 +417,14 @@ where } /// See [`select!`]. -pub struct Select<'a, F, I: Input<'a>, O, E: ParserExtra<'a, I>> -where - F: Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option -{ +pub struct Select { filter: F, #[allow(dead_code)] - phantom: EmptyPhantom<(E, O, &'a I)>, + phantom: EmptyPhantom<(E, O, I)>, } -impl<'a, F: Copy + Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, I: Input<'a>, O, E: ParserExtra<'a, I>> Copy for Select<'a, F, I, O, E> {} -impl<'a, F: Clone + Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, I: Input<'a>, O, E: ParserExtra<'a, I>> Clone for Select<'a, F, I, O, E> { +impl Copy for Select {} +impl Clone for Select { fn clone(&self) -> Self { Self { filter: self.filter.clone(), From a1f08bc43673019f649e5badbd9c7c2801732f51 Mon Sep 17 00:00:00 2001 From: zyansheep Date: Mon, 9 Oct 2023 12:14:03 -0400 Subject: [PATCH 42/64] remove unnecessary bounds --- src/primitive.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/primitive.rs b/src/primitive.rs index 7ba32898..4d9b29f5 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -417,17 +417,14 @@ where } /// See [`select!`]. -pub struct Select<'a, F, I: Input<'a>, O, E: ParserExtra<'a, I>> -where - F: Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option -{ +pub struct Select { filter: F, #[allow(dead_code)] - phantom: EmptyPhantom<(E, O, &'a I)>, + phantom: EmptyPhantom<(E, O, I)>, } -impl<'a, F: Copy + Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, I: Input<'a>, O, E: ParserExtra<'a, I>> Copy for Select<'a, F, I, O, E> {} -impl<'a, F: Clone + Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, I: Input<'a>, O, E: ParserExtra<'a, I>> Clone for Select<'a, F, I, O, E> { +impl Copy for Select {} +impl Clone for Select { fn clone(&self) -> Self { Self { filter: self.filter.clone(), @@ -437,7 +434,7 @@ impl<'a, F: Clone + Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, } /// See [`select!`]. -pub const fn select<'a, F, I, O, E>(filter: F) -> Select<'a, F, I, O, E> +pub const fn select<'a, F, I, O, E>(filter: F) -> Select where I: Input<'a>, I::Token: Clone + 'a, @@ -450,7 +447,7 @@ where } } -impl<'a, I, O, E, F> ParserSealed<'a, I, O, E> for Select<'a, F, I, O, E> +impl<'a, I, O, E, F> ParserSealed<'a, I, O, E> for Select where I: ValueInput<'a>, I::Token: Clone + 'a, From 36f8e8b406535ecd942e51507d4da32b2f6a3bed Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 9 Oct 2023 18:12:51 +0100 Subject: [PATCH 43/64] Fixed EoI reporting issue, made `either` private (since it didn't contain anything) --- src/lib.rs | 2 +- src/primitive.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4d2e742d..89215378 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -85,7 +85,7 @@ pub mod cache; pub mod combinator; pub mod container; #[cfg(feature = "either")] -pub mod either; +mod either; pub mod error; #[cfg(feature = "extension")] pub mod extension; diff --git a/src/primitive.rs b/src/primitive.rs index 67113894..f2bacfac 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -45,7 +45,7 @@ where match inp.next_maybe_inner() { (_, None) => Ok(M::bind(|| ())), (at, Some(tok)) => { - inp.add_alt(at, None, Some(tok.into()), inp.span_since(before)); + inp.add_alt(at, Some(None), Some(tok.into()), inp.span_since(before)); Err(()) } } From bb7aa255bc331e131bd24afb0ce0a24087ad2894 Mon Sep 17 00:00:00 2001 From: wackbyte Date: Mon, 9 Oct 2023 13:50:22 -0400 Subject: [PATCH 44/64] Pratt grammar fixes --- src/pratt.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pratt.rs b/src/pratt.rs index 9756d74d..3bdc7b6d 100644 --- a/src/pratt.rs +++ b/src/pratt.rs @@ -70,14 +70,14 @@ //! // We want factorial to happen before any negation, so we need its precedence to be higher than `Expr::Neg`. //! postfix(4, op('!'), |lhs| Expr::Factorial(Box::new(lhs))), //! // Just like in math, we want that if we write -x^2, our parser parses that as -(x^2), so we need it to have -//! // exponents bind tighter than our prefix operators +//! // exponents bind tighter than our prefix operators. //! infix(right(3), op('^'), |l, r| Expr::Pow(Box::new(l), Box::new(r))), //! // Notice the conflict with our `Expr::Sub`. This will still parse correctly. We want negation to happen before -//! // `+` and `-`, so we set it's precedence higher. +//! // `+` and `-`, so we set its precedence higher. //! prefix(2, op('-'), |rhs| Expr::Neg(Box::new(rhs))), //! prefix(2, op('*'), |rhs| Expr::Deref(Box::new(rhs))), //! // Our `-` and `+` bind the weakest, meaning that even if they occur first in an expression, they will be the -//! // last executed +//! // last executed. //! infix(left(1), op('+'), |l, r| Expr::Add(Box::new(l), Box::new(r))), //! infix(left(1), op('-'), |l, r| Expr::Sub(Box::new(l), Box::new(r))), //! )) From e0d48125acd2e80afe2e9c49f065ec1efdae56e5 Mon Sep 17 00:00:00 2001 From: zyansheep Date: Mon, 9 Oct 2023 22:02:58 -0400 Subject: [PATCH 45/64] fix examples --- examples/nested_spans.rs | 2 +- src/lib.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/nested_spans.rs b/examples/nested_spans.rs index 6371dd9f..47bbee36 100644 --- a/examples/nested_spans.rs +++ b/examples/nested_spans.rs @@ -17,7 +17,7 @@ fn parser<'a>() -> impl Parser<'a, TokenTreeInput<'a>, i64> { let parens = expr // Here we specify how the parser should come up with the nested tokens .nested_in(select_ref! { - Token::Parens(xs) = span => xs.as_slice().spanned(SimpleSpan::to_end(&span)), + Token::Parens(xs) = extra => xs.as_slice().spanned(SimpleSpan::to_end(&extra.span())), }); let atom = num.or(parens); diff --git a/src/lib.rs b/src/lib.rs index 9b218b69..5cd5f8a6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2726,8 +2726,8 @@ where /// /// # let _: chumsky::primitive::Select<_, &[Token], (Expr, Span), extra::Default> = /// select! { -/// Token::Num(x) = span => Expr::Num(x).spanned(span), -/// Token::Str(s) = span => Expr::Str(s).spanned(span), +/// Token::Num(x) = extra => Expr::Num(x).spanned(extra.span()), +/// Token::Str(s) = extra => Expr::Str(s).spanned(extra.span()), /// } /// # ; /// ``` From 03771f59f0cd4158b377c60156b61f4d40ab82ac Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Tue, 10 Oct 2023 10:57:21 +0100 Subject: [PATCH 46/64] Improve docs --- Cargo.lock | 2 +- examples/nested_spans.rs | 2 +- src/lib.rs | 22 ++++++++++++---------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 16af2fba..cdb120b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -192,7 +192,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chumsky" -version = "1.0.0-alpha.4" +version = "1.0.0-alpha.5" dependencies = [ "ariadne", "ciborium", diff --git a/examples/nested_spans.rs b/examples/nested_spans.rs index 47bbee36..fec39292 100644 --- a/examples/nested_spans.rs +++ b/examples/nested_spans.rs @@ -17,7 +17,7 @@ fn parser<'a>() -> impl Parser<'a, TokenTreeInput<'a>, i64> { let parens = expr // Here we specify how the parser should come up with the nested tokens .nested_in(select_ref! { - Token::Parens(xs) = extra => xs.as_slice().spanned(SimpleSpan::to_end(&extra.span())), + Token::Parens(xs) = e => xs.as_slice().spanned(SimpleSpan::to_end(&e.span())), }); let atom = num.or(parens); diff --git a/src/lib.rs b/src/lib.rs index 50ded953..3f12ba5b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2679,13 +2679,12 @@ where /// used for parsing, although it can also generally be used to select inputs and map them to outputs. Any unmapped /// input patterns will become syntax errors, just as with [`Parser::filter`]. /// -/// Internally, [`select!`] is very similar to [`Parser::try_map`] and thinking of it as such might make it less -/// confusing. +/// Internally, [`select!`] is very similar to a single-token [`Parser::filter`] and thinking of it as such might make +/// it less confusing. /// -/// `select!` requires that tokens implement [`Clone`] and the input type implements [`ValueInput`]. -/// -/// If you're trying to access tokens referentially (for the sake of nested parsing, or simply because you want to -/// avoid cloning the token), see [`select_ref!`]. +/// `select!` requires that tokens implement [`Clone`] and the input type implements [`ValueInput`]. If you're trying +/// to access tokens referentially (for the sake of nested parsing, or simply because you want to avoid cloning the +/// token), see [`select_ref!`]. /// /// # Examples /// @@ -2709,7 +2708,8 @@ where /// # ; /// ``` /// -/// If you require access to the token's span, you may add an argument after a pattern to gain access to it: +/// If you require access to the token's span or other metadata, you may add an argument after a pattern to gain access +/// to it (see the docs for [`Parser::map_with`] and [`MapExtra`]): /// /// ``` /// # use chumsky::{prelude::*, error::Simple}; @@ -2726,8 +2726,8 @@ where /// /// # let _: chumsky::primitive::Select<_, &[Token], (Expr, Span), extra::Default> = /// select! { -/// Token::Num(x) = extra => Expr::Num(x).spanned(extra.span()), -/// Token::Str(s) = extra => Expr::Str(s).spanned(extra.span()), +/// Token::Num(x) = e => Expr::Num(x).spanned(e.span()), +/// Token::Str(s) = e => Expr::Str(s).spanned(e.span()), /// } /// # ; /// ``` @@ -2794,7 +2794,9 @@ macro_rules! select { /// /// Useful if you want to extract elements from a token in a zero-copy manner. /// -/// `select_ref` requires that the parser input implements [`BorrowInput`]. +/// See the docs for [`select!`] for more information. +/// +/// Requires that the parser input implements [`BorrowInput`]. #[macro_export] macro_rules! select_ref { ($($p:pat $(= $extra:ident)? $(if $guard:expr)? $(=> $out:expr)?),+ $(,)?) => ({ From 70d018634438653ce6d3667cab6e71c9f6667ca3 Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Tue, 10 Oct 2023 12:49:27 -0400 Subject: [PATCH 47/64] fix: remove `debug_assertions` check causing `release` builds to fail --- src/combinator.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/combinator.rs b/src/combinator.rs index af2980d3..0cab264f 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -2290,7 +2290,6 @@ where let mut a_out = M::bind(Vec::new); let mut iter_state = self.parser_a.make_iter::(inp)?; loop { - #[cfg(debug_assertions)] let before = inp.offset(); match self.parser_a.next::(inp, &mut iter_state) { Ok(Some(out)) => { From bbd0393bf1053f7ed9906a8285cb879464b81d92 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 11 Oct 2023 15:00:07 +0100 Subject: [PATCH 48/64] Fixed MapExtra lifetime issue --- src/combinator.rs | 66 +++++++++-------------------------------------- src/input.rs | 55 +++++++++++++++++++++++++++++++++++++++ src/lib.rs | 15 +++++------ src/primitive.rs | 12 ++++----- src/recovery.rs | 2 +- 5 files changed, 80 insertions(+), 70 deletions(-) diff --git a/src/combinator.rs b/src/combinator.rs index 0cab264f..a8956cf4 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -330,41 +330,6 @@ where } } -/// See [`Parser::map_with`]. -pub struct MapExtra<'a, 'b, 'inv, I: Input<'a>, E: ParserExtra<'a, I>> { - pub(crate) before: Offset<'a, 'inv, I>, - pub(crate) inp: &'b mut InputRef<'a, 'inv, I, E>, -} - -impl<'a, 'b, 'inv, I: Input<'a>, E: ParserExtra<'a, I>> MapExtra<'a, 'b, 'inv, I, E> { - /// Get the span corresponding to the output. - #[inline(always)] - pub fn span(&self) -> I::Span { - self.inp.span_since(self.before) - } - - /// Get the slice corresponding to the output. - #[inline(always)] - pub fn slice(&self) -> I::Slice - where - I: SliceInput<'a>, - { - self.inp.slice_since(self.before..) - } - - /// Get the parser state. - #[inline(always)] - pub fn state(&mut self) -> &mut E::State { - self.inp.state() - } - - /// Get the current parser context. - #[inline(always)] - pub fn ctx(&self) -> &E::Context { - self.inp.ctx() - } -} - /// See [`Parser::map_with`]. pub struct MapWith { pub(crate) parser: A, @@ -389,14 +354,14 @@ where I: Input<'a>, E: ParserExtra<'a, I>, A: Parser<'a, I, OA, E>, - F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>) -> O, + F: Fn(OA, &mut MapExtra<'a, '_, I, E>) -> O, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { let before = inp.offset(); let out = self.parser.go::(inp)?; Ok(M::map(out, |out| { - (self.mapper)(out, &mut MapExtra { before, inp }) + (self.mapper)(out, &mut MapExtra::new(before, inp)) })) } @@ -408,7 +373,7 @@ where I: Input<'a>, E: ParserExtra<'a, I>, A: IterParser<'a, I, OA, E>, - F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>) -> O, + F: Fn(OA, &mut MapExtra<'a, '_, I, E>) -> O, { type IterState = A::IterState where @@ -431,7 +396,7 @@ where let before = inp.offset(); match self.parser.next::(inp, state) { Ok(Some(o)) => Ok(Some(M::map(o, |o| { - (self.mapper)(o, &mut MapExtra { before, inp }) + (self.mapper)(o, &mut MapExtra::new(before, inp)) }))), Ok(None) => Ok(None), Err(()) => Err(()), @@ -614,13 +579,13 @@ where I: Input<'a>, E: ParserExtra<'a, I>, A: Parser<'a, I, OA, E>, - F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>) -> Result, + F: Fn(OA, &mut MapExtra<'a, '_, I, E>) -> Result, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { let before = inp.offset(); let out = self.parser.go::(inp)?; - match (self.mapper)(out, &mut MapExtra { before, inp }) { + match (self.mapper)(out, &mut MapExtra::new(before, inp)) { Ok(out) => Ok(M::bind(|| out)), Err(err) => { inp.add_alt_err(inp.offset().offset, err); @@ -2280,7 +2245,7 @@ where A: IterParser<'a, I, OA, E>, B: Parser<'a, I, O, E>, E: ParserExtra<'a, I>, - F: Fn(OA, O, &mut MapExtra<'a, '_, '_, I, E>) -> O, + F: Fn(OA, O, &mut MapExtra<'a, '_, I, E>) -> O, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult @@ -2312,7 +2277,7 @@ where Ok(M::combine(a_out, b_out, |a_out, b_out| { a_out.into_iter().rfold(b_out, |b, (a, before)| { - (self.folder)(a, b, &mut MapExtra { before, inp }) + (self.folder)(a, b, &mut MapExtra::new(before, inp)) }) })) } @@ -2415,7 +2380,7 @@ where A: Parser<'a, I, O, E>, B: IterParser<'a, I, OB, E>, E: ParserExtra<'a, I>, - F: Fn(O, OB, &mut MapExtra<'a, '_, '_, I, E>) -> O, + F: Fn(O, OB, &mut MapExtra<'a, '_, I, E>) -> O, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult @@ -2431,14 +2396,7 @@ where match self.parser_b.next::(inp, &mut iter_state) { Ok(Some(b_out)) => { out = M::combine(out, b_out, |out, b_out| { - (self.folder)( - out, - b_out, - &mut MapExtra { - before: before_all, - inp, - }, - ) + (self.folder)(out, b_out, &mut MapExtra::new(before_all, inp)) }) } Ok(None) => break Ok(out), @@ -2613,7 +2571,7 @@ where I: Input<'a>, E: ParserExtra<'a, I>, A: Parser<'a, I, OA, E>, - F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>, &mut Emitter) -> U, + F: Fn(OA, &mut MapExtra<'a, '_, I, E>, &mut Emitter) -> U, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult @@ -2624,7 +2582,7 @@ where let out = self.parser.go::(inp)?; let mut emitter = Emitter::new(); - let out = (self.validator)(out, &mut MapExtra { before, inp }, &mut emitter); + let out = (self.validator)(out, &mut MapExtra::new(before, inp), &mut emitter); for err in emitter.errors() { inp.emit(inp.offset, err); } diff --git a/src/input.rs b/src/input.rs index f02c4543..9fc17bfc 100644 --- a/src/input.rs +++ b/src/input.rs @@ -1512,3 +1512,58 @@ impl Emitter { self.emitted.push(err) } } + +/// See [`Parser::map_with`]. +pub struct MapExtra<'a, 'b, I: Input<'a>, E: ParserExtra<'a, I>> { + before: I::Offset, + after: I::Offset, + inp: &'b I, + state: &'b mut E::State, + ctx: &'b E::Context, +} + +impl<'a, 'b, I: Input<'a>, E: ParserExtra<'a, I>> MapExtra<'a, 'b, I, E> { + #[inline(always)] + pub(crate) fn new<'parse>( + before: Offset<'a, 'parse, I>, + inp: &'b mut InputRef<'a, 'parse, I, E>, + ) -> Self { + Self { + before: before.offset, + after: inp.offset, + ctx: inp.ctx, + state: inp.state, + inp: inp.input, + } + } + + /// Get the span corresponding to the output. + #[inline(always)] + pub fn span(&self) -> I::Span { + // SAFETY: The offsets both came from the same input + // TODO: Should this make `MapExtra::new` unsafe? Probably, but it's an internal API and we simply wouldn't + // ever abuse it in this way, even accidentally. + unsafe { self.inp.span(self.before..self.after) } + } + + /// Get the slice corresponding to the output. + #[inline(always)] + pub fn slice(&self) -> I::Slice + where + I: SliceInput<'a>, + { + self.inp.slice(self.before..self.after) + } + + /// Get the parser state. + #[inline(always)] + pub fn state(&mut self) -> &mut E::State { + self.state + } + + /// Get the current parser context. + #[inline(always)] + pub fn ctx(&self) -> &E::Context { + self.ctx + } +} diff --git a/src/lib.rs b/src/lib.rs index 3f12ba5b..a4ca8cbd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -162,7 +162,7 @@ use self::{ error::Error, extra::ParserExtra, input::{ - BorrowInput, Emitter, ExactSizeInput, InputRef, Offset, SliceInput, StrInput, ValueInput, + BorrowInput, Emitter, ExactSizeInput, InputRef, MapExtra, SliceInput, StrInput, ValueInput, }, prelude::*, primitive::Any, @@ -611,10 +611,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// assert_eq!(palindrome_parser().parse("hello olleh").into_result().as_deref(), Ok(" olleh")); /// assert!(palindrome_parser().parse("abccb").into_result().is_err()); /// ``` - fn map_with) -> U>( - self, - f: F, - ) -> MapWith + fn map_with) -> U>(self, f: F) -> MapWith where Self: Sized, { @@ -768,7 +765,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// [`Parser::validate`] instead. /// /// The output type of this parser is `U`, the [`Ok`] return value of the function. - fn try_map_with) -> Result>( + fn try_map_with) -> Result>( self, f: F, ) -> TryMapWith @@ -1618,7 +1615,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: #[cfg_attr(debug_assertions, track_caller)] fn foldl_with(self, other: B, f: F) -> FoldlWith where - F: Fn(O, OB, &mut MapExtra<'a, '_, '_, I, E>) -> O, + F: Fn(O, OB, &mut MapExtra<'a, '_, I, E>) -> O, B: IterParser<'a, I, OB, E>, Self: Sized, { @@ -1934,7 +1931,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: fn validate(self, f: F) -> Validate where Self: Sized, - F: Fn(O, &mut MapExtra<'a, '_, '_, I, E>, &mut Emitter) -> U, + F: Fn(O, &mut MapExtra<'a, '_, I, E>, &mut Emitter) -> U, { Validate { parser: self, @@ -2487,7 +2484,7 @@ where #[cfg_attr(debug_assertions, track_caller)] fn foldr_with(self, other: B, f: F) -> FoldrWith where - F: Fn(O, OA, &mut MapExtra<'a, '_, '_, I, E>) -> OA, + F: Fn(O, OA, &mut MapExtra<'a, '_, I, E>) -> OA, B: Parser<'a, I, OA, E>, Self: Sized, { diff --git a/src/primitive.rs b/src/primitive.rs index 79bd3b08..89122b8b 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -439,7 +439,7 @@ where I: Input<'a>, I::Token: Clone + 'a, E: ParserExtra<'a, I>, - F: Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, + F: Fn(I::Token, &mut MapExtra<'a, '_, I, E>) -> Option, { Select { filter, @@ -452,7 +452,7 @@ where I: ValueInput<'a>, I::Token: Clone + 'a, E: ParserExtra<'a, I>, - F: Fn(I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, + F: Fn(I::Token, &mut MapExtra<'a, '_, I, E>) -> Option, { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { @@ -460,7 +460,7 @@ where let next = inp.next_inner(); let err_span = inp.span_since(before); let (at, found) = match next { - (at, Some(tok)) => match (self.filter)(tok.clone(), &mut MapExtra { before, inp }) { + (at, Some(tok)) => match (self.filter)(tok.clone(), &mut MapExtra::new(before, inp)) { Some(out) => return Ok(M::bind(|| out)), None => (at, Some(tok.into())), }, @@ -496,7 +496,7 @@ where I: BorrowInput<'a>, I::Token: 'a, E: ParserExtra<'a, I>, - F: Fn(&'a I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, + F: Fn(&'a I::Token, &mut MapExtra<'a, '_, I, E>) -> Option, { SelectRef { filter, @@ -509,7 +509,7 @@ where I: BorrowInput<'a>, I::Token: 'a, E: ParserExtra<'a, I>, - F: Fn(&'a I::Token, &mut MapExtra<'a, '_, '_, I, E>) -> Option, + F: Fn(&'a I::Token, &mut MapExtra<'a, '_, I, E>) -> Option, { #[inline] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { @@ -517,7 +517,7 @@ where let next = inp.next_ref_inner(); let err_span = inp.span_since(before); let (at, found) = match next { - (at, Some(tok)) => match (self.filter)(tok, &mut MapExtra { before, inp }) { + (at, Some(tok)) => match (self.filter)(tok, &mut MapExtra::new(before, inp)) { Some(out) => return Ok(M::bind(|| out)), None => (at, Some(tok.into())), }, diff --git a/src/recovery.rs b/src/recovery.rs index da918d19..b3bbd8f3 100644 --- a/src/recovery.rs +++ b/src/recovery.rs @@ -212,7 +212,7 @@ pub fn nested_delimiters<'a, I, O, E, F, const N: usize>( fallback: F, ) -> impl Parser<'a, I, O, E> + Clone where - I: ValueInput<'a> + 'a, + I: ValueInput<'a>, I::Token: PartialEq + Clone + MaybeSync, E: extra::ParserExtra<'a, I> + MaybeSync, F: Fn(I::Span) -> O + Clone, From 51698149fc4f38bc8cc74af7b27617a946f9179c Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 11 Oct 2023 17:10:47 +0100 Subject: [PATCH 49/64] Add compile test for map_with --- src/lib.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index a4ca8cbd..67190ad8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3459,4 +3459,18 @@ mod tests { } } } + + #[test] + #[allow(dead_code)] + fn map_with_compiles() { + enum Token {} + enum Expr {} + + fn expr<'src, I>() -> impl Parser<'src, I, (Expr, SimpleSpan)> + 'src + where + I: Input<'src, Token = Token, Span = SimpleSpan> + 'src, + { + todo().map_with(|expr, e| (expr, e.span())) + } + } } From 09f1199c977c36f1a2328e986e3aac1698ea0c09 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 11 Oct 2023 21:22:59 +0100 Subject: [PATCH 50/64] README2 improvements --- README2.md | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/README2.md b/README2.md index 6352fc10..22fbc11e 100644 --- a/README2.md +++ b/README2.md @@ -7,26 +7,28 @@ Chumsky is a parser combinator library for Rust that makes writing expressive, high-performance parsers easy. -Although chumsky is designed primarily for user-fancing parsers such as compilers, chumsky is just as much at home -parsing binary protocols in a networking layer, configuration files, or any other form of complex input validation that -you may need. - Example usage with my own language, Tao +Although chumsky is designed primarily for user-fancing parsers such as compilers, chumsky is just as much at home +parsing binary protocols at the networking layer, configuration files, or any other form of complex input validation that +you may need. It also has `no_std` support, making it suitable for embedded environments. + ## Features - πŸͺ„ **Expressive combinators** that make writing your parser a joy - πŸŽ›οΈ **Fully generic** across input, token, output, span, and error types -- πŸ“‘ **Zero-copy parsing** minimises your parser's need to allocate +- πŸ“‘ **Zero-copy parsing** minimises allocation by having outputs hold references/slices of the input - 🚦 **Flexible error recovery** strategies out of the box - πŸš€ **Internal optimiser** leverages the power of [GATs](https://smallcultfollowing.com/babysteps/blog/2022/06/27/many-modes-a-gats-pattern/) to optimise your parser for you - πŸ“– **Text-oriented parsers** for text inputs (i.e: `&[u8]` and `&str`) - πŸ‘οΈβ€πŸ—¨οΈ **Context-free grammars** are fully supported, with support for context-sensitivity - πŸ”„ **Left recursion and memoization** have opt-in support -- πŸͺΊ **Nested inputs** such as token trees are fully supported +- πŸͺΊ **Nested inputs** such as token trees are fully supported both as inputs and outputs - 🏷️ **Pattern labelling** for dynamic, user-friendly error messages +- πŸ—ƒοΈ **Caching** allows parsers to be created once and reused many times +- ↔️ **Pratt parsing** support for unary and binary operators *Note: Error diagnostic rendering is performed by [Ariadne](https://github.com/zesterer/ariadne)* @@ -39,19 +41,18 @@ See [`examples/brainfuck.rs`](https://github.com/zesterer/chumsky/blob/master/ex ```rust use chumsky::prelude::*; -/// Define out output AST (Abstract Syntax Tree) +/// An AST (Abstract Syntax Tree) for Brainfuck instructions #[derive(Clone)] enum Instr { Left, Right, Incr, Decr, Read, Write, - // In Brainfuck, `[...]` blocks are loops - Loop(Vec), + Loop(Vec), // In Brainfuck, `[...]` loops contain sub-blocks of instructions } /// A function that returns an instance of our Brainfuck parser fn parser<'a>() -> impl Parser<'a, &'a str, Vec> { - // Our parser is recursive: each instruction can contain many instructions (via `[...]` blocks) + // Brainfuck syntax is recursive: each block can contain many sub-blocks (via `[...]` loops) recursive(|bf| choice(( // All of the basic instructions are just single characters just('<').to(Instr::Left), @@ -87,6 +88,32 @@ Chumsky has [a tutorial](https://github.com/zesterer/chumsky/blob/master/tutoria parser and interpreter for a simple dynamic language with unary and binary operators, operator precedence, functions, let declarations, and calls. +## Cargo Features + +Chumsky contains several optional features that extend the crate's functionality. + +- `pratt`: enables the [pratt parsing](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) combinator + +- `regex`: enables the regex combinator + +- `serde`: enables `serde` (de)serialization support for several types + +- `either`: implements `Parser` for `either::Either`, allowing dynamic configuration of parsers at runtime + +- `sync`: enables thread-safe features + +- `extension`: enables the extension API, allowing you to write your own first-class combinators that integrate with and extend chumsky + +- `memoization`: enables [memoization](https://en.wikipedia.org/wiki/Memoization#Parsers) features + +- `spill-stack` (enabled by default): avoid stack overflows by spilling stack data to the heap + +- `unstable`: enables experimental chumsky features + +- `std` (enabled by default): support for standard library features + +- `nightly`: enable support for features only supported by the nightly Rust compiler + ## *What* is a parser combinator? Parser combinators are a technique for implementing parsers by defining them in terms of other parsers. The resulting From 91a77041dcd92e3a8d382098d1ae84e93421c2ba Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 11 Oct 2023 21:23:29 +0100 Subject: [PATCH 51/64] Bumped version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3eb1f26d..cbfb4937 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "chumsky" -version = "1.0.0-alpha.5" +version = "1.0.0-alpha.6" description = "A parser library for humans with powerful error recovery" authors = ["Joshua Barretto ", "Elijah Hartvigsen "] repository = "https://github.com/zesterer/chumsky" From 46a1188f4cbc1253bd86f41f4257dd5639fa2e4e Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 23 Oct 2023 14:37:37 +0100 Subject: [PATCH 52/64] Added MapExtra support to the pratt combinator --- Cargo.lock | 2 +- src/pratt.rs | 71 ++++++++++++++++++++++++++++------------------------ 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cdb120b7..2861fe29 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -192,7 +192,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chumsky" -version = "1.0.0-alpha.5" +version = "1.0.0-alpha.6" dependencies = [ "ariadne", "ciborium", diff --git a/src/pratt.rs b/src/pratt.rs index 3bdc7b6d..cc617cf6 100644 --- a/src/pratt.rs +++ b/src/pratt.rs @@ -24,8 +24,8 @@ //! [`prefix`], and [`postfix`]. //! //! Fold functions have several overloads, allowing you to make use of only the operands, the operands and the -//! operators, and even additionally a [`Span`] that covers the entire operation. See the documentation for each -//! function to see which fold signatures can be used. +//! operators, and even additionally [`MapExtra`], providing access to the span, slice, and parser state. See the +//! documentation for each function to see which fold signatures can be used. //! //! # Examples //! @@ -104,13 +104,19 @@ where fn op_parser(&self) -> &Self::OpParser; fn associativity(&self) -> Associativity; - fn fold_infix(&self, _lhs: O, _op: Self::Op, _rhs: O, _span: I::Span) -> O { + fn fold_infix( + &self, + _lhs: O, + _op: Self::Op, + _rhs: O, + _extra: &mut MapExtra<'a, '_, I, E>, + ) -> O { unreachable!() } - fn fold_prefix(&self, _op: Self::Op, _rhs: O, _span: I::Span) -> O { + fn fold_prefix(&self, _op: Self::Op, _rhs: O, _extra: &mut MapExtra<'a, '_, I, E>) -> O { unreachable!() } - fn fold_postfix(&self, _lhs: O, _op: Self::Op, _span: I::Span) -> O { + fn fold_postfix(&self, _lhs: O, _op: Self::Op, _extra: &mut MapExtra<'a, '_, I, E>) -> O { unreachable!() } } @@ -195,8 +201,8 @@ impl Clone for Infix { /// impl Fn(O, O) -> O /// // Combine the left operand, the operator itself, and the right operand /// impl Fn(O, Op, O) -> O -/// // Combine the left operand, the operator itself, the right operand, and the span that covers the whole operation -/// impl Fn(O, Op, O, I::Span) -> O +/// // Combine the left operand, the operator itself, the right operand, and a [`MapExtra`] covering the whole operation +/// impl Fn(O, Op, O, &mut MapExtra<'a, '_, I, E>) -> O /// ``` pub const fn infix( associativity: Associativity, @@ -212,7 +218,7 @@ pub const fn infix( } macro_rules! infix_op { - (|$f:ident : Fn($($Arg:ty),*) -> O, $lhs:ident, $op:ident, $rhs:ident, $span:ident| $invoke:expr) => { + (|$f:ident : Fn($($Arg:ty),*) -> O, $lhs:ident, $op:ident, $rhs:ident, $extra:ident| $invoke:expr) => { impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Infix where I: Input<'a>, @@ -225,17 +231,19 @@ macro_rules! infix_op { const IS_INFIX: bool = true; #[inline(always)] fn op_parser(&self) -> &Self::OpParser { &self.op_parser } #[inline(always)] fn associativity(&self) -> Associativity { self.associativity } - #[inline(always)] fn fold_infix(&self, $lhs: O, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.fold; $invoke } + #[inline(always)] fn fold_infix(&self, $lhs: O, $op: Self::Op, $rhs: O, $extra: &mut MapExtra<'a, '_, I, E>) -> O { let $f = &self.fold; $invoke } } }; } // Allow `|lhs, rhs| ` to be used as a fold closure for infix operators -infix_op!(|f: Fn(O, O) -> O, lhs, _op, rhs, _span| f(lhs, rhs)); +infix_op!(|f: Fn(O, O) -> O, lhs, _op, rhs, _extra| f(lhs, rhs)); // Allow `|lhs, op, rhs| ` to be used as a fold closure for infix operators -infix_op!(|f: Fn(O, Op, O) -> O, lhs, op, rhs, _span| f(lhs, op, rhs)); -// Allow `|lhs, op, rhs, span| ` to be used as a fold closure for infix operators -infix_op!(|f: Fn(O, Op, O, I::Span) -> O, lhs, op, rhs, span| f(lhs, op, rhs, span)); +infix_op!(|f: Fn(O, Op, O) -> O, lhs, op, rhs, _extra| f(lhs, op, rhs)); +// Allow `|lhs, op, rhs, extra| ` to be used as a fold closure for infix operators +infix_op!( + |f: Fn(O, Op, O, &mut MapExtra<'a, '_, I, E>) -> O, lhs, op, rhs, extra| f(lhs, op, rhs, extra) +); /// See [`prefix`]. pub struct Prefix { @@ -270,8 +278,8 @@ impl Clone for Prefix { /// impl Fn(O) -> O /// // Combine the operator itself and the operand /// impl Fn(Op, O) -> O -/// // Combine the operator itself, the operand, and the span that covers the whole operation -/// impl Fn(Op, O, I::Span) -> O +/// // Combine the operator itself, the operand, and a [`MapExtra`] covering the whole operation +/// impl Fn(Op, O, &mut MapExtra<'a, '_, I, E>) -> O /// ``` pub const fn prefix( binding_power: u16, @@ -287,7 +295,7 @@ pub const fn prefix( } macro_rules! prefix_op { - (|$f:ident : Fn($($Arg:ty),*) -> O, $op:ident, $rhs:ident, $span:ident| $invoke:expr) => { + (|$f:ident : Fn($($Arg:ty),*) -> O, $op:ident, $rhs:ident, $extra:ident| $invoke:expr) => { impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Prefix where I: Input<'a>, @@ -300,17 +308,17 @@ macro_rules! prefix_op { const IS_PREFIX: bool = true; #[inline(always)] fn op_parser(&self) -> &Self::OpParser { &self.op_parser } #[inline(always)] fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } - #[inline(always)] fn fold_prefix(&self, $op: Self::Op, $rhs: O, $span: I::Span) -> O { let $f = &self.fold; $invoke } + #[inline(always)] fn fold_prefix(&self, $op: Self::Op, $rhs: O, $extra: &mut MapExtra<'a, '_, I, E>) -> O { let $f = &self.fold; $invoke } } }; } // Allow `|rhs| ` to be used as a fold closure for prefix operators -prefix_op!(|f: Fn(O) -> O, _op, rhs, _span| f(rhs)); +prefix_op!(|f: Fn(O) -> O, _op, rhs, _extra| f(rhs)); // Allow `|op, rhs| ` to be used as a fold closure for prefix operators -prefix_op!(|f: Fn(Op, O) -> O, op, rhs, _span| f(op, rhs)); +prefix_op!(|f: Fn(Op, O) -> O, op, rhs, _extra| f(op, rhs)); // Allow `|op, rhs, span| ` to be used as a fold closure for prefix operators -prefix_op!(|f: Fn(Op, O, I::Span) -> O, op, rhs, span| f(op, rhs, span)); +prefix_op!(|f: Fn(Op, O, &mut MapExtra<'a, '_, I, E>) -> O, op, rhs, extra| f(op, rhs, extra)); /// See [`postfix`]. pub struct Postfix { @@ -345,8 +353,8 @@ impl Clone for Postfix { /// impl Fn(O) -> O /// // Combine the operand and the operator itself /// impl Fn(O, Op) -> O -/// // Combine the operand, the operator itself, and the span that covers the whole operation -/// impl Fn(Op, O, I::Span) -> O +/// // Combine the operand, the operator itself, and a [`MapExtra`] covering the whole operation +/// impl Fn(Op, O, &mut MapExtra<'a, '_, I, E>) -> O /// ``` pub const fn postfix( binding_power: u16, @@ -362,7 +370,7 @@ pub const fn postfix( } macro_rules! postfix_op { - (|$f:ident : Fn($($Arg:ty),*) -> O, $lhs:ident, $op:ident, $span:ident| $invoke:expr) => { + (|$f:ident : Fn($($Arg:ty),*) -> O, $lhs:ident, $op:ident, $extra:ident| $invoke:expr) => { impl<'a, I, O, E, A, F, Op> Operator<'a, I, O, E> for Postfix where I: Input<'a>, @@ -375,17 +383,17 @@ macro_rules! postfix_op { const IS_POSTFIX: bool = true; #[inline(always)] fn op_parser(&self) -> &Self::OpParser { &self.op_parser } #[inline(always)] fn associativity(&self) -> Associativity { Associativity::Left(self.binding_power) } - #[inline(always)] fn fold_postfix(&self, $lhs: O, $op: Self::Op, $span: I::Span) -> O { let $f = &self.fold; $invoke } + #[inline(always)] fn fold_postfix(&self, $lhs: O, $op: Self::Op, $extra: &mut MapExtra<'a, '_, I, E>) -> O { let $f = &self.fold; $invoke } } }; } // Allow `|lhs| ` to be used as a fold closure for postfix operators -postfix_op!(|f: Fn(O) -> O, lhs, _op, _span| f(lhs)); +postfix_op!(|f: Fn(O) -> O, lhs, _op, _extra| f(lhs)); // Allow `|lhs, op| ` to be used as a fold closure for postfix operators -postfix_op!(|f: Fn(O, Op) -> O, lhs, op, _span| f(lhs, op)); +postfix_op!(|f: Fn(O, Op) -> O, lhs, op, _extra| f(lhs, op)); // Allow `|lhs, op, span| ` to be used as a fold closure for postfix operators -postfix_op!(|f: Fn(O, Op, I::Span) -> O, lhs, op, span| f(lhs, op, span)); +postfix_op!(|f: Fn(O, Op, &mut MapExtra<'a, '_, I, E>) -> O, lhs, op, extra| f(lhs, op, extra)); /// See [`Parser::pratt`]. #[derive(Copy, Clone)] @@ -422,8 +430,7 @@ macro_rules! impl_pratt_for_tuple { Ok(op) => { match recursive::recurse(|| self.pratt_go::(inp, $X.associativity().left_power())) { Ok(rhs) => break 'choice M::combine(op, rhs, |op, rhs| { - let span = inp.span_since(pre_expr.offset()); - $X.fold_prefix(op, rhs, span) + $X.fold_prefix(op, rhs, &mut MapExtra::new(pre_expr.offset(), inp)) }), Err(()) => inp.rewind(pre_expr), } @@ -448,8 +455,7 @@ macro_rules! impl_pratt_for_tuple { match $X.op_parser().go::(inp) { Ok(op) => { lhs = M::combine(lhs, op, |lhs, op| { - let span = inp.span_since(pre_expr.offset()); - $X.fold_postfix(lhs, op, span) + $X.fold_postfix(lhs, op, &mut MapExtra::new(pre_expr.offset(), inp)) }); continue }, @@ -469,8 +475,7 @@ macro_rules! impl_pratt_for_tuple { M::combine(lhs, rhs, |lhs, rhs| (lhs, rhs)), op, |(lhs, rhs), op| { - let span = inp.span_since(pre_expr.offset()); - $X.fold_infix(lhs, op, rhs, span) + $X.fold_infix(lhs, op, rhs, &mut MapExtra::new(pre_expr.offset(), inp)) }, ); continue From a1268f51d1e2ba9f4fd0ecd97dddab87f8b5cd8c Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Fri, 3 Nov 2023 16:36:36 +0000 Subject: [PATCH 53/64] Patched a problem with text filter errors --- src/error.rs | 6 +----- src/text.rs | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/src/error.rs b/src/error.rs index b08584c3..c92cd01e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -811,11 +811,7 @@ fn write_token( tok: Option<&T>, ) -> fmt::Result { match tok { - Some(tok) => { - write!(f, "'")?; - fmt_token(tok, f)?; - write!(f, "'") - } + Some(tok) => fmt_token(tok, f), None => write!(f, "end of input"), } } diff --git a/src/text.rs b/src/text.rs index 91d32179..c7aec836 100644 --- a/src/text.rs +++ b/src/text.rs @@ -170,8 +170,7 @@ pub fn whitespace<'a, C: Char, I: ValueInput<'a> + StrInput<'a, C>, E: ParserExt where I::Token: Char, { - any() - .filter(|c: &I::Token| c.is_whitespace()) + select! { c if (c as I::Token).is_whitespace() => () } .ignored() .repeated() } @@ -200,8 +199,7 @@ pub fn inline_whitespace<'a, C: Char, I: ValueInput<'a> + StrInput<'a, C>, E: Pa where I::Token: Char, { - any() - .filter(|c: &I::Token| c.is_inline_whitespace()) + select! { c if (c as I::Token).is_inline_whitespace() => () } .ignored() .repeated() } @@ -245,8 +243,8 @@ where just(I::Token::from_ascii(b'\r')) .or_not() .ignore_then(just(I::Token::from_ascii(b'\n'))) - .or(any().filter(|c: &I::Token| { - [ + .or(select! { + c if [ '\r', // Carriage return '\x0B', // Vertical tab '\x0C', // Form feed @@ -254,8 +252,8 @@ where '\u{2028}', // Line separator '\u{2029}', // Paragraph separator ] - .contains(&c.to_char()) - })) + .contains(&(c as I::Token).to_char()) => c, + }) .ignored() } @@ -344,7 +342,7 @@ pub fn int<'a, I: ValueInput<'a> + StrInput<'a, C>, C: Char, E: ParserExtra<'a, } }) // This error never appears due to `repeated` so can use `filter` - .then(any().filter(move |c: &C| c.is_digit(radix)).repeated()) + .then(select! { c if (c as I::Token).is_digit(radix) => () }.repeated()) .ignored() .or(just(C::digit_zero()).ignored()) .to_slice() @@ -374,9 +372,7 @@ pub mod ascii { } }) .then( - any() - // This error never appears due to `repeated` so can use `filter` - .filter(|c: &C| c.to_char().is_ascii_alphanumeric() || c.to_char() == '_') + select! { c if (c as I::Token).to_char().is_ascii_alphanumeric() || (c as I::Token).to_char() == '_' => () } .repeated(), ) .to_slice() @@ -463,12 +459,7 @@ pub mod unicode { Err(Error::expected_found([], Some(MaybeRef::Val(c)), span)) } }) - .then( - any() - // This error never appears due to `repeated` so can use `filter` - .filter(|c: &C| c.is_ident_continue()) - .repeated(), - ) + .then(select! { c if (c as C).is_ident_continue() => () }.repeated()) .to_slice() } From 1a235c5e998707e6ef61a0ddce15d30f171d6d71 Mon Sep 17 00:00:00 2001 From: morzel85 Date: Thu, 16 Nov 2023 22:01:14 +0100 Subject: [PATCH 54/64] Branch name updated to main in tutorial.md --- tutorial.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tutorial.md b/tutorial.md index af8efbc0..6d944f05 100644 --- a/tutorial.md +++ b/tutorial.md @@ -1,6 +1,6 @@ # Chumsky: A Tutorial -*Please note that this tutorial is kept up to date with the `master` branch and not the most stable release: small +*Please note that this tutorial is kept up to date with the `main` branch and not the most stable release: small details may differ!* In this tutorial, we'll develop a parser (and interpreter!) for a programming language called 'Foo'. @@ -20,7 +20,7 @@ By the end of this tutorial, you'll have an interpreter that will let you run th This tutorial should take somewhere between 30 and 100 minutes to complete, depending on factors such as knowledge of Rust and compiler theory. -*You can find the source code for the full interpreter in [`examples/foo.rs`](https://github.com/zesterer/chumsky/blob/master/examples/foo.rs) in the main repository.* +*You can find the source code for the full interpreter in [`examples/foo.rs`](https://github.com/zesterer/chumsky/blob/main/examples/foo.rs) in the main repository.* ## Assumptions @@ -34,7 +34,7 @@ This tutorial is here to show you how to use Chumsky: it's not a general-purpose As we go, we'll be encountering many functions and concepts from Chumsky. I strongly recommend you keep [Chumsky's documentation](https://docs.rs/chumsky/) open in another browser tab and use it to cross-reference your understanding or gain more insight into specific things that you'd like more clarification on. In particular, most of the functions we'll be using come from the [`Parser`](https://docs.rs/chumsky/latest/chumsky/trait.Parser.html) trait. Chumsky's docs include extensive doc examples for almost every function, so be sure to make use of them! -Chumsky also has [several longer examples](https://github.com/zesterer/chumsky/tree/master/examples) in the main repository: looking at these may help improve your understanding if you get stuck. +Chumsky also has [several longer examples](https://github.com/zesterer/chumsky/tree/main/examples) in the main repository: looking at these may help improve your understanding if you get stuck. ## A note on imperative vs declarative parsers From 712af66669ea6bb4d3c4fc222186f8776232af49 Mon Sep 17 00:00:00 2001 From: wackbyte Date: Mon, 27 Nov 2023 10:20:15 -0500 Subject: [PATCH 55/64] Fix benches and examples and check them in CI, satisfy lints This should prevent them from becoming out-of-date. I chose not to test them with the MSRV since it would be an unnecessary restriction. Note: Is the use of `#[allow(clippy::let_and_return)]` okay, should it be moved, or should it be removed and fixed? --- .github/workflows/rust.yml | 6 +++--- Cargo.lock | 4 ++-- Cargo.toml | 2 +- benches/cbor.rs | 11 +++++++---- benches/json.rs | 29 ++++++++++++++++------------- benches/lex.rs | 11 ++++++----- examples/foo.rs | 1 + examples/json.rs | 4 ++-- examples/nano_rust.rs | 28 ++++++++++++---------------- examples/nested.rs | 1 + examples/nested_spans.rs | 1 + src/container.rs | 2 ++ src/input.rs | 6 ++++-- src/lib.rs | 14 ++++---------- src/number.rs | 4 ++-- 15 files changed, 64 insertions(+), 60 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f48ebe3d..027bae58 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -23,11 +23,11 @@ jobs: toolchain: nightly components: rustfmt, clippy - name: Run cargo check (all features) - run: cargo check --tests --verbose --all-features + run: cargo check --benches --examples --tests --verbose --all-features - name: Run cargo check (no features) - run: cargo check --tests --verbose --no-default-features + run: cargo check --benches --examples --tests --verbose --no-default-features - name: Run cargo clippy - run: cargo clippy --verbose --all-features -- -D warnings + run: cargo clippy --benches --examples --tests --verbose --all-features -- -D warnings - name: Run cargo fmt run: cargo fmt --verbose --check - name: Run cargo doc diff --git a/Cargo.lock b/Cargo.lock index 2861fe29..ec2987d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2411,9 +2411,9 @@ checksum = "d419259aba16b663966e29e6d7c6ecfa0bb8425818bb96f6f1f3c3eb71a6e7b9" [[package]] name = "winnow" -version = "0.5.11" +version = "0.5.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e461589e194280efaa97236b73623445efa195aa633fd7004f39805707a9d53" +checksum = "829846f3e3db426d4cee4510841b71a8e58aa2a76b1132579487ae430ccd9c7b" dependencies = [ "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index cbfb4937..3aa410d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,7 +85,7 @@ unicode-ident = "1.0.10" ariadne = "0.2" pom = "3.2" nom = "7.1" -winnow = "0.5.0" +winnow = "0.5.19" serde_json = { version = "1.0", features = ["preserve_order"] } ciborium = { version = "0.2" } criterion = "0.4.0" diff --git a/benches/cbor.rs b/benches/cbor.rs index 2000405d..683dbdb1 100644 --- a/benches/cbor.rs +++ b/benches/cbor.rs @@ -3,7 +3,7 @@ use std::hint::black_box; mod utils; -static CBOR: &'static [u8] = include_bytes!("samples/sample.cbor"); +static CBOR: &[u8] = include_bytes!("samples/sample.cbor"); fn bench_cbor(c: &mut Criterion) { // c.bench_function("cbor_nom", { @@ -125,7 +125,8 @@ mod chumsky_zero_copy { }; cfg.exactly(num) })) - .map_slice(int_out); + .to_slice() + .map(int_out); let uint = read_int.map(CborZero::Int); let nint = read_int.map(|i| CborZero::Int(-1 - i)); @@ -134,14 +135,16 @@ mod chumsky_zero_copy { any() .repeated() .configure(|cfg, ctx| cfg.exactly(*ctx as usize)) - .map_slice(CborZero::Bytes), + .to_slice() + .map(CborZero::Bytes), ); let str = read_int.ignore_with_ctx( any() .repeated() .configure(|cfg, ctx| cfg.exactly(*ctx as usize)) - .map_slice(|slice| CborZero::String(std::str::from_utf8(slice).unwrap())), + .to_slice() + .map(|slice| CborZero::String(std::str::from_utf8(slice).unwrap())), ); let array = read_int.ignore_with_ctx( diff --git a/benches/json.rs b/benches/json.rs index e63ead9d..06a65b80 100644 --- a/benches/json.rs +++ b/benches/json.rs @@ -1,3 +1,5 @@ +#![allow(clippy::result_large_err, clippy::type_complexity)] + use criterion::{black_box, criterion_group, criterion_main, Criterion}; mod utils; @@ -22,7 +24,7 @@ pub enum JsonZero<'a> { Object(Vec<(&'a [u8], JsonZero<'a>)>), } -static JSON: &'static [u8] = include_bytes!("samples/sample.json"); +static JSON: &[u8] = include_bytes!("samples/sample.json"); fn bench_json(c: &mut Criterion) { c.bench_function("json_nom", { @@ -142,7 +144,8 @@ mod chumsky_zero_copy { .then(int) .then(frac.or_not()) .then(exp.or_not()) - .map_slice(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap()) + .to_slice() + .map(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap()) .boxed(); let escape = just(b'\\').then_ignore(one_of(b"\\/\"bfnrt")); @@ -150,7 +153,7 @@ mod chumsky_zero_copy { let string = none_of(b"\\\"") .or(escape) .repeated() - .slice() + .to_slice() .delimited_by(just(b'"'), just(b'"')) .boxed(); @@ -197,14 +200,14 @@ mod pom { } fn number() -> Parser { - let integer = one_of(b"123456789") - one_of(b"0123456789").repeat(0..) | sym(b'0'); + let integer = (one_of(b"123456789") - one_of(b"0123456789").repeat(0..)) | sym(b'0'); let frac = sym(b'.') + one_of(b"0123456789").repeat(1..); let exp = one_of(b"eE") + one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..); let number = sym(b'-').opt() + integer + frac.opt() + exp.opt(); number .collect() .convert(str::from_utf8) - .convert(|s| f64::from_str(&s)) + .convert(f64::from_str) } fn string() -> Parser { @@ -237,10 +240,10 @@ mod pom { (seq(b"null").map(|_| Json::Null) | seq(b"true").map(|_| Json::Bool(true)) | seq(b"false").map(|_| Json::Bool(false)) - | number().map(|num| Json::Num(num)) - | string().map(|text| Json::Str(text)) - | array().map(|arr| Json::Array(arr)) - | object().map(|obj| Json::Object(obj))) + | number().map(Json::Num) + | string().map(Json::Str) + | array().map(Json::Array) + | object().map(Json::Object)) - space() } @@ -344,7 +347,7 @@ mod nom { terminated(value, space)(i) } - pub fn json<'a>(i: &'a [u8]) -> IResult<&'a [u8], JsonZero, (&'a [u8], nom::error::ErrorKind)> { + pub fn json(i: &[u8]) -> IResult<&[u8], JsonZero, (&[u8], nom::error::ErrorKind)> { root(i) } } @@ -352,7 +355,7 @@ mod nom { mod winnow { use winnow::{ ascii::{digit0, digit1, escaped}, - combinator::separated0, + combinator::separated, combinator::{alt, dispatch}, combinator::{cut_err, fail, opt, peek}, combinator::{preceded, separated_pair, terminated}, @@ -403,7 +406,7 @@ mod winnow { preceded( '[', cut_err(terminated( - separated0(value, preceded(space, ',')), + separated(0.., value, preceded(space, ',')), preceded(space, ']'), )), ) @@ -427,7 +430,7 @@ mod winnow { preceded( '{', cut_err(terminated( - separated0(member, preceded(space, ',')), + separated(0.., member, preceded(space, ',')), preceded(space, '}'), )), ) diff --git a/benches/lex.rs b/benches/lex.rs index 7c51eb42..e8f22a27 100644 --- a/benches/lex.rs +++ b/benches/lex.rs @@ -26,7 +26,7 @@ pub enum Token<'a> { Comma, } -static SAMPLE: &'static [u8] = include_bytes!("tokens.txt"); +static SAMPLE: &[u8] = include_bytes!("tokens.txt"); fn bench_lex(c: &mut Criterion) { c.bench_function("lex_chumsky_zero_copy", { @@ -126,7 +126,7 @@ mod chumsky_zero_copy { use std::str; pub fn parser<'a>() -> impl Parser<'a, &'a [u8], Vec>> { - let digits = one_of(b'0'..=b'9').repeated().slice(); + let digits = one_of(b'0'..=b'9').repeated().to_slice(); let int = one_of(b'1'..=b'9') .repeated() @@ -148,7 +148,8 @@ mod chumsky_zero_copy { .then(int) .then(frac.or_not()) .then(exp.or_not()) - .map_slice(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap()) + .to_slice() + .map(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap()) .boxed(); let escape = just(b'\\') @@ -169,11 +170,11 @@ mod chumsky_zero_copy { .ignored() .or(escape) .repeated() - .slice() + .to_slice() .delimited_by(just(b'"'), just(b'"')) .boxed(); - let ident = text::ascii::ident().map_slice(Token::Ident); + let ident = text::ascii::ident().to_slice().map(Token::Ident); choice(( just(b"null").to(Token::Null), diff --git a/examples/foo.rs b/examples/foo.rs index a2ca4b4b..cb628d1b 100644 --- a/examples/foo.rs +++ b/examples/foo.rs @@ -27,6 +27,7 @@ enum Expr<'a> { }, } +#[allow(clippy::let_and_return)] fn parser<'a>() -> impl Parser<'a, &'a str, Expr<'a>> { let ident = text::ascii::ident().padded(); diff --git a/examples/json.rs b/examples/json.rs index 7f5dc68e..3b74a32b 100644 --- a/examples/json.rs +++ b/examples/json.rs @@ -21,12 +21,12 @@ fn parser<'a>() -> impl Parser<'a, &'a str, Json, extra::Err>> { recursive(|value| { let digits = text::digits(10).to_slice(); - let frac = just('.').then(digits.clone()); + let frac = just('.').then(digits); let exp = just('e') .or(just('E')) .then(one_of("+-").or_not()) - .then(digits.clone()); + .then(digits); let number = just('-') .or_not() diff --git a/examples/nano_rust.rs b/examples/nano_rust.rs index 9dd4d8e3..232df921 100644 --- a/examples/nano_rust.rs +++ b/examples/nano_rust.rs @@ -454,7 +454,7 @@ fn funcs_parser<'tokens, 'src: 'tokens>() -> impl Parser< for ((name, name_span), f) in fs { if funcs.insert(name, f).is_some() { emitter.emit(Rich::custom( - name_span.clone(), + name_span, format!("Function '{}' already exists", name), )); } @@ -489,7 +489,7 @@ fn eval_expr<'src>( .map(|(_, v)| v.clone()) .or_else(|| Some(Value::Func(name)).filter(|_| funcs.contains_key(name))) .ok_or_else(|| Error { - span: expr.1.clone(), + span: expr.1, msg: format!("No such variable '{}' in scope", name), })?, Expr::Let(local, val, body) => { @@ -504,20 +504,16 @@ fn eval_expr<'src>( eval_expr(b, funcs, stack)? } Expr::Binary(a, BinaryOp::Add, b) => Value::Num( - eval_expr(a, funcs, stack)?.num(a.1.clone())? - + eval_expr(b, funcs, stack)?.num(b.1.clone())?, + eval_expr(a, funcs, stack)?.num(a.1)? + eval_expr(b, funcs, stack)?.num(b.1)?, ), Expr::Binary(a, BinaryOp::Sub, b) => Value::Num( - eval_expr(a, funcs, stack)?.num(a.1.clone())? - - eval_expr(b, funcs, stack)?.num(b.1.clone())?, + eval_expr(a, funcs, stack)?.num(a.1)? - eval_expr(b, funcs, stack)?.num(b.1)?, ), Expr::Binary(a, BinaryOp::Mul, b) => Value::Num( - eval_expr(a, funcs, stack)?.num(a.1.clone())? - * eval_expr(b, funcs, stack)?.num(b.1.clone())?, + eval_expr(a, funcs, stack)?.num(a.1)? * eval_expr(b, funcs, stack)?.num(b.1)?, ), Expr::Binary(a, BinaryOp::Div, b) => Value::Num( - eval_expr(a, funcs, stack)?.num(a.1.clone())? - / eval_expr(b, funcs, stack)?.num(b.1.clone())?, + eval_expr(a, funcs, stack)?.num(a.1)? / eval_expr(b, funcs, stack)?.num(b.1)?, ), Expr::Binary(a, BinaryOp::Eq, b) => { Value::Bool(eval_expr(a, funcs, stack)? == eval_expr(b, funcs, stack)?) @@ -532,7 +528,7 @@ fn eval_expr<'src>( let f = &funcs[&name]; let mut stack = if f.args.len() != args.0.len() { return Err(Error { - span: expr.1.clone(), + span: expr.1, msg: format!("'{}' called with wrong number of arguments (expected {}, found {})", name, f.args.len(), args.0.len()), }); } else { @@ -546,7 +542,7 @@ fn eval_expr<'src>( } f => { return Err(Error { - span: func.1.clone(), + span: func.1, msg: format!("'{:?}' is not callable", f), }) } @@ -559,7 +555,7 @@ fn eval_expr<'src>( Value::Bool(false) => eval_expr(b, funcs, stack)?, c => { return Err(Error { - span: cond.1.clone(), + span: cond.1, msg: format!("Conditions must be booleans, found '{:?}'", c), }) } @@ -588,10 +584,10 @@ fn main() { if let Some((funcs, file_span)) = ast.filter(|_| errs.len() + parse_errs.len() == 0) { if let Some(main) = funcs.get("main") { - if main.args.len() != 0 { + if !main.args.is_empty() { errs.push(Rich::custom( main.span, - format!("The main function cannot have arguments"), + "The main function cannot have arguments".to_string(), )) } else { match eval_expr(&main.body, &funcs, &mut Vec::new()) { @@ -602,7 +598,7 @@ fn main() { } else { errs.push(Rich::custom( file_span, - format!("Programs need a main function but none was found"), + "Programs need a main function but none was found".to_string(), )); } } diff --git a/examples/nested.rs b/examples/nested.rs index 4fecbf1a..b202c198 100644 --- a/examples/nested.rs +++ b/examples/nested.rs @@ -9,6 +9,7 @@ enum Token { Parens(Vec), } +#[allow(clippy::let_and_return)] fn parser<'a>() -> impl Parser<'a, &'a [Token], i64> { recursive(|expr| { let num = select_ref! { Token::Num(x) => *x }; diff --git a/examples/nested_spans.rs b/examples/nested_spans.rs index fec39292..cd208d38 100644 --- a/examples/nested_spans.rs +++ b/examples/nested_spans.rs @@ -11,6 +11,7 @@ enum Token { type TokenTreeInput<'a> = SpannedInput; +#[allow(clippy::let_and_return)] fn parser<'a>() -> impl Parser<'a, TokenTreeInput<'a>, i64> { recursive(|expr| { let num = select_ref! { Token::Num(x) => *x }; diff --git a/src/container.rs b/src/container.rs index 43ef7e17..c9afd65e 100644 --- a/src/container.rs +++ b/src/container.rs @@ -812,6 +812,7 @@ mod test { for idx in 0..C::LEN { C::write(&mut uninit, idx, idx); } + // SAFETY: All elements were initialized. unsafe { C::take(uninit) } } @@ -820,6 +821,7 @@ mod test { for idx in 0..(C::LEN / 2) { C::write(&mut uninit, idx, idx); } + // SAFETY: All elements up to this point were initialized. unsafe { C::drop_before(&mut uninit, C::LEN / 2) }; } diff --git a/src/input.rs b/src/input.rs index 9fc17bfc..1e902cba 100644 --- a/src/input.rs +++ b/src/input.rs @@ -6,13 +6,15 @@ //! ways: from strings, slices, arrays, etc. pub use crate::stream::{BoxedExactSizeStream, BoxedStream, Stream}; -use core::cell::RefCell; use super::*; #[cfg(feature = "memoization")] use hashbrown::HashMap; #[cfg(feature = "std")] -use std::io::{BufReader, Read, Seek}; +use std::{ + cell::RefCell, + io::{BufReader, Read, Seek}, +}; /// A trait for types that represents a stream of input tokens. Unlike [`Iterator`], this type /// supports backtracking and a few other features required by the crate. diff --git a/src/lib.rs b/src/lib.rs index 67190ad8..ebaed13a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3027,7 +3027,7 @@ mod tests { } let mut chars = String::new(); - for c in parser().parse_iter(&"abcdefg").into_result().unwrap() { + for c in parser().parse_iter("abcdefg").into_result().unwrap() { chars.push(c); } @@ -3409,7 +3409,7 @@ mod tests { many_as.into_iter().collect() } - assert_eq!(parser().parse("aaa").into_result().unwrap(), ()); + assert_eq!(parser().parse("aaa").into_result(), Ok(())); } #[test] @@ -3437,10 +3437,7 @@ mod tests { let s = "hello".to_string(); assert_eq!(parser.get().parse(&s).into_result(), Ok("hello")); - assert!(matches!( - parser.get().parse("goodbye").into_result(), - Err(_) - )); + assert!(parser.get().parse("goodbye").into_result().is_err()); } } @@ -3452,10 +3449,7 @@ mod tests { let parser = crate::cache::Cache::new(MyCache); assert_eq!(parser.get().parse(&s).into_result(), Ok("hello")); - assert!(matches!( - parser.get().parse("goodbye").into_result(), - Err(_) - )); + assert!(parser.get().parse("goodbye").into_result().is_err()); } } } diff --git a/src/number.rs b/src/number.rs index 426125d5..f13503ad 100644 --- a/src/number.rs +++ b/src/number.rs @@ -130,9 +130,9 @@ mod tests { #[test] fn subnorm() { for bits in 0u32..(1 << 21) { - let single: f32 = unsafe { core::mem::transmute(bits) }; + let single: f32 = f32::from_bits(bits); validate(&format!("{:e}", single)); - let double: f64 = unsafe { core::mem::transmute(bits as u64) }; + let double: f64 = f64::from_bits(bits as u64); validate(&format!("{:e}", double)); } } From dce5918bd2dad591ab399d2e191254640a9ed14f Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Fri, 1 Dec 2023 23:41:17 +0000 Subject: [PATCH 56/64] Remove unsound impls of ContainerExactly --- src/container.rs | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/container.rs b/src/container.rs index c9afd65e..82495373 100644 --- a/src/container.rs +++ b/src/container.rs @@ -215,6 +215,8 @@ where } } +/* +// TODO: Unsound! // Safety: `Rc>` is sound to reinterpret assuming the inner `C` implements // this trait soundly unsafe impl ContainerExactly for Rc @@ -238,7 +240,10 @@ where Rc::from_raw(Rc::into_raw(uninit) as *mut C) } } +*/ +/* +// TODO: Unsound! #[allow(clippy::arc_with_non_send_sync)] // SAFETY: `Arc>` is sound to reinterpret assuming the inner `C` implements // this trait soundly @@ -263,6 +268,7 @@ where Arc::from_raw(Arc::into_raw(uninit) as *mut C) } } +*/ /// A utility trait to abstract over container-like things. /// @@ -832,24 +838,24 @@ mod test { drop_container::<[usize; 4]>(); } - #[test] - fn exact_rc_array() { - let c = init_container::>(); - assert_eq!(&*c, &[0, 1, 2, 3]); - drop_container::>(); - } - - #[test] - fn exact_rc_box_array() { - let c = init_container::>>(); - assert_eq!(&**c, &[0, 1, 2, 3]); - drop_container::>>(); - } - - #[test] - fn exact_box_rc_array() { - let c = init_container::>>(); - assert_eq!(&**c, &[0, 1, 2, 3]); - drop_container::>>(); - } + // #[test] + // fn exact_rc_array() { + // let c = init_container::>(); + // assert_eq!(&*c, &[0, 1, 2, 3]); + // drop_container::>(); + // } + + // #[test] + // fn exact_rc_box_array() { + // let c = init_container::>>(); + // assert_eq!(&**c, &[0, 1, 2, 3]); + // drop_container::>>(); + // } + + // #[test] + // fn exact_box_rc_array() { + // let c = init_container::>>(); + // assert_eq!(&**c, &[0, 1, 2, 3]); + // drop_container::>>(); + // } } From c9b9c38272cf0753161968836e2372eacad57de4 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 1 Jan 2024 18:52:32 +0000 Subject: [PATCH 57/64] Added ability to get span and token from Cheap/Simple --- src/error.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/error.rs b/src/error.rs index c92cd01e..7db01c7f 100644 --- a/src/error.rs +++ b/src/error.rs @@ -139,6 +139,13 @@ pub struct Cheap> { span: S, } +impl Cheap { + /// Get the span than that error related to. + pub fn span(&self) -> &S { + &self.span + } +} + impl<'a, I: Input<'a>> Error<'a, I> for Cheap { #[inline] fn expected_found>>>( @@ -178,6 +185,18 @@ pub struct Simple<'a, T, S = SimpleSpan> { found: Option>, } +impl<'a, T, S> Simple<'a, T, S> { + /// Get the span than that error related to. + pub fn span(&self) -> &S { + &self.span + } + + /// Get the token, if any, that was found at the error location. + pub fn found(&self) -> Option<&T> { + self.found.as_deref() + } +} + impl<'a, T, S> Simple<'a, T, S> { /// Transform this error's tokens using the given function. /// From 735dd0b876dbf27a47bb75de684794fd849d8a2d Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 1 Jan 2024 19:16:33 +0000 Subject: [PATCH 58/64] Appease clippy --- src/lib.rs | 4 ++-- src/text.rs | 15 +++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ebaed13a..43f55210 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -135,12 +135,12 @@ pub mod prelude { } use crate::input::InputOwn; -use alloc::{boxed::Box, rc::Rc, string::String, sync::Arc, vec, vec::Vec}; +use alloc::{boxed::Box, string::String, vec, vec::Vec}; #[cfg(feature = "nightly")] use core::marker::Tuple; use core::{ borrow::Borrow, - cell::{Cell, RefCell, UnsafeCell}, + cell::{Cell, RefCell}, cmp::{Eq, Ordering}, fmt, hash::Hash, diff --git a/src/text.rs b/src/text.rs index c7aec836..c82167b5 100644 --- a/src/text.rs +++ b/src/text.rs @@ -166,7 +166,7 @@ where /// assert_eq!(whitespace.parse("").into_result(), Ok(())); /// ``` pub fn whitespace<'a, C: Char, I: ValueInput<'a> + StrInput<'a, C>, E: ParserExtra<'a, I>>( -) -> Repeated + Copy + Clone, (), I, E> +) -> Repeated + Copy, (), I, E> where I::Token: Char, { @@ -195,7 +195,7 @@ where /// assert!(inline_whitespace.at_least(1).parse("\n\r").has_errors()); /// ``` pub fn inline_whitespace<'a, C: Char, I: ValueInput<'a> + StrInput<'a, C>, E: ParserExtra<'a, I>>( -) -> Repeated + Copy + Clone, (), I, E> +) -> Repeated + Copy, (), I, E> where I::Token: Char, { @@ -235,8 +235,7 @@ where /// assert_eq!(newline.parse("\u{2029}").into_result(), Ok(())); /// ``` #[must_use] -pub fn newline<'a, I: ValueInput<'a>, E: ParserExtra<'a, I>>( -) -> impl Parser<'a, I, (), E> + Copy + Clone +pub fn newline<'a, I: ValueInput<'a>, E: ParserExtra<'a, I>>() -> impl Parser<'a, I, (), E> + Copy where I::Token: Char, { @@ -279,7 +278,7 @@ where /// assert!(digits.parse("").has_errors()); /// ``` #[must_use] -pub fn digits<'a, C, I, E>(radix: u32) -> Repeated + Copy + Clone, C, I, E> +pub fn digits<'a, C, I, E>(radix: u32) -> Repeated + Copy, C, I, E> where C: Char, I: ValueInput<'a> + Input<'a, Token = C>, @@ -331,7 +330,7 @@ where #[must_use] pub fn int<'a, I: ValueInput<'a> + StrInput<'a, C>, C: Char, E: ParserExtra<'a, I>>( radix: u32, -) -> impl Parser<'a, I, &'a C::Str, E> + Copy + Clone { +) -> impl Parser<'a, I, &'a C::Str, E> + Copy { any() // Use try_map over filter to get a better error on failure .try_map(move |c: C, span| { @@ -361,7 +360,7 @@ pub mod ascii { /// characters or underscores. The regex pattern for it is `[a-zA-Z_][a-zA-Z0-9_]*`. #[must_use] pub fn ident<'a, I: ValueInput<'a> + StrInput<'a, C>, C: Char, E: ParserExtra<'a, I>>( - ) -> impl Parser<'a, I, &'a C::Str, E> + Copy + Clone { + ) -> impl Parser<'a, I, &'a C::Str, E> + Copy { any() // Use try_map over filter to get a better error on failure .try_map(|c: C, span| { @@ -449,7 +448,7 @@ pub mod unicode { /// An identifier is defined as per "Default Identifiers" in [Unicode Standard Annex #31](https://www.unicode.org/reports/tr31/). #[must_use] pub fn ident<'a, I: ValueInput<'a> + StrInput<'a, C>, C: Char, E: ParserExtra<'a, I>>( - ) -> impl Parser<'a, I, &'a C::Str, E> + Copy + Clone { + ) -> impl Parser<'a, I, &'a C::Str, E> + Copy { any() // Use try_map over filter to get a better error on failure .try_map(|c: C, span| { From d3a2667a40a2a04caebf56ee408a1f33d2b69fa8 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 1 Jan 2024 20:59:12 +0000 Subject: [PATCH 59/64] Added IterParser::flatten --- src/combinator.rs | 107 ++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 25 +++++++++++ src/private.rs | 13 ++++++ 3 files changed, 145 insertions(+) diff --git a/src/combinator.rs b/src/combinator.rs index a8956cf4..a3db6ad3 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -2037,6 +2037,47 @@ where go_extra!(Option); } +impl<'a, A, O, I, E> IterParserSealed<'a, I, O, E> for OrNot +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: Parser<'a, I, O, E>, +{ + type IterState = bool; + + #[inline(always)] + fn make_iter( + &self, + _inp: &mut InputRef<'a, '_, I, E>, + ) -> PResult> { + Ok(false) + } + + #[inline(always)] + fn next( + &self, + inp: &mut InputRef<'a, '_, I, E>, + finished: &mut Self::IterState, + ) -> IPResult { + if *finished { + return Ok(None); + } + + let before = inp.save(); + match self.parser.go::(inp) { + Ok(item) => { + *finished = true; + Ok(Some(item)) + }, + Err(()) => { + inp.rewind(before); + *finished = true; + Ok(None) + } + } + } +} + /// See [`Parser::not`]. pub struct Not { pub(crate) parser: A, @@ -2085,6 +2126,72 @@ where go_extra!(()); } +/// See [`IterParser::flatten`]. +#[cfg(feature = "nightly")] +pub struct Flatten { + pub(crate) parser: A, + #[allow(dead_code)] + pub(crate) phantom: EmptyPhantom, +} + +#[cfg(feature = "nightly")] +impl Copy for Flatten {} +impl Clone for Flatten { + fn clone(&self) -> Self { + Self { + parser: self.parser.clone(), + phantom: EmptyPhantom::new(), + } + } +} + +#[cfg(feature = "nightly")] +impl<'a, A, O, I, E> IterParserSealed<'a, I, O::Item, E> for Flatten +where + I: Input<'a>, + E: ParserExtra<'a, I>, + A: IterParser<'a, I, O, E>, + O: IntoIterator, +{ + type IterState = (A::IterState, Option>); + + #[inline(always)] + fn make_iter( + &self, + inp: &mut InputRef<'a, '_, I, E>, + ) -> PResult> { + Ok((self.parser.make_iter(inp)?, None)) + } + + #[inline(always)] + fn next( + &self, + inp: &mut InputRef<'a, '_, I, E>, + (st, iter): &mut Self::IterState, + ) -> IPResult { + if let Some(item) = iter.as_mut().and_then(|i| M::get_or(M::map(M::from_mut(i), |i| i.next()), || None)) { + return Ok(Some(M::bind(move || item))); + } + + // TODO: Debug looping check + loop { + let before = inp.save(); + match self.parser.next::(inp, st) { + Ok(Some(item)) => match M::get_or(M::map(M::from_mut(iter.insert(M::map(item, |i| i.into_iter()))), |i| i.next().map(Some)), || Some(None)) { + Some(Some(item)) => break Ok(Some(M::bind(move || item))), + Some(None) => break Ok(Some(M::bind(|| unreachable!()))), + None => continue, + }, + Ok(None) => break Ok(None), + Err(()) => { + inp.rewind(before); + break Err(()) + } + } + } + } +} + /// See [`Parser::and_is`]. pub struct AndIs { pub(crate) parser_a: A, diff --git a/src/lib.rs b/src/lib.rs index 43f55210..37921664 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2498,6 +2498,16 @@ where } } + /// TODO + #[cfg(feature = "nightly")] + fn flatten(self) -> Flatten + where + O: IntoIterator, + Self: Sized, + { + Flatten { parser: self, phantom: EmptyPhantom::new() } + } + /// Create an iterator over the outputs generated by an iterable parser. /// /// Warning: Trailing errors will be ignored @@ -3412,6 +3422,21 @@ mod tests { assert_eq!(parser().parse("aaa").into_result(), Ok(())); } + #[test] + fn flatten() { + fn parser<'a>() -> impl Parser<'a, &'a str, Vec, extra::Err> { + let many_as = just('a').map(Some) + .or(any().to(None)) + .repeated() + .flatten() + .collect::>(); + + many_as.into_iter().collect() + } + + assert_eq!(parser().parse("abracadabra").into_result(), Ok(vec!['a', 'a', 'a', 'a', 'a'])); + } + #[test] #[cfg(feature = "unstable")] fn cached() { diff --git a/src/private.rs b/src/private.rs index 4c2182f5..5418c675 100644 --- a/src/private.rs +++ b/src/private.rs @@ -50,6 +50,10 @@ pub trait Mode { /// Given an array of outputs, bind them into an output of arrays fn array(x: [Self::Output; N]) -> Self::Output<[T; N]>; + fn from_mut(r: &mut Self::Output) -> Self::Output<&mut T>; + + fn get_or T>(r: Self::Output, f: F) -> T; + /// Invoke a parser user the current mode. This is normally equivalent to /// [`parser.go::(inp)`](Parser::go), but it can be called on unsized values such as /// `dyn Parser`. @@ -111,6 +115,11 @@ impl Mode for Emit { x } + #[inline(always)] + fn from_mut(r: &mut Self::Output) -> Self::Output<&mut T> { r } + #[inline(always)] + fn get_or T>(r: Self::Output, f: F) -> T { r } + #[inline(always)] fn invoke<'a, I, O, E, P>(parser: &P, inp: &mut InputRef<'a, '_, I, E>) -> PResult where @@ -164,6 +173,10 @@ impl Mode for Check { fn combine_mut(_: &mut Self::Output, _: Self::Output, _: F) {} #[inline(always)] fn array(_: [Self::Output; N]) -> Self::Output<[T; N]> {} + #[inline(always)] + fn from_mut(r: &mut Self::Output) -> Self::Output<&mut T> {} + #[inline(always)] + fn get_or T>(r: Self::Output, f: F) -> T { f() } #[inline(always)] fn invoke<'a, I, O, E, P>(parser: &P, inp: &mut InputRef<'a, '_, I, E>) -> PResult From 55b230de2eddf301e2d26ea74403f6107a4df31d Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 1 Jan 2024 21:13:30 +0000 Subject: [PATCH 60/64] Appease clippy --- src/combinator.rs | 18 ++++++++++++++---- src/lib.rs | 14 +++++++++++--- src/private.rs | 14 ++++++++++---- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/combinator.rs b/src/combinator.rs index a3db6ad3..b4c7928c 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -2068,7 +2068,7 @@ where Ok(item) => { *finished = true; Ok(Some(item)) - }, + } Err(()) => { inp.rewind(before); *finished = true; @@ -2136,6 +2136,7 @@ pub struct Flatten { #[cfg(feature = "nightly")] impl Copy for Flatten {} +#[cfg(feature = "nightly")] impl Clone for Flatten { fn clone(&self) -> Self { Self { @@ -2169,7 +2170,10 @@ where inp: &mut InputRef<'a, '_, I, E>, (st, iter): &mut Self::IterState, ) -> IPResult { - if let Some(item) = iter.as_mut().and_then(|i| M::get_or(M::map(M::from_mut(i), |i| i.next()), || None)) { + if let Some(item) = iter + .as_mut() + .and_then(|i| M::get_or(M::map(M::from_mut(i), |i| i.next()), || None)) + { return Ok(Some(M::bind(move || item))); } @@ -2177,7 +2181,13 @@ where loop { let before = inp.save(); match self.parser.next::(inp, st) { - Ok(Some(item)) => match M::get_or(M::map(M::from_mut(iter.insert(M::map(item, |i| i.into_iter()))), |i| i.next().map(Some)), || Some(None)) { + Ok(Some(item)) => match M::get_or( + M::map( + M::from_mut(iter.insert(M::map(item, |i| i.into_iter()))), + |i| i.next().map(Some), + ), + || Some(None), + ) { Some(Some(item)) => break Ok(Some(M::bind(move || item))), Some(None) => break Ok(Some(M::bind(|| unreachable!()))), None => continue, @@ -2185,7 +2195,7 @@ where Ok(None) => break Ok(None), Err(()) => { inp.rewind(before); - break Err(()) + break Err(()); } } } diff --git a/src/lib.rs b/src/lib.rs index 37921664..9631cc53 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2505,7 +2505,10 @@ where O: IntoIterator, Self: Sized, { - Flatten { parser: self, phantom: EmptyPhantom::new() } + Flatten { + parser: self, + phantom: EmptyPhantom::new(), + } } /// Create an iterator over the outputs generated by an iterable parser. @@ -3422,10 +3425,12 @@ mod tests { assert_eq!(parser().parse("aaa").into_result(), Ok(())); } + #[cfg(feature = "nightly")] #[test] fn flatten() { fn parser<'a>() -> impl Parser<'a, &'a str, Vec, extra::Err> { - let many_as = just('a').map(Some) + let many_as = just('a') + .map(Some) .or(any().to(None)) .repeated() .flatten() @@ -3434,7 +3439,10 @@ mod tests { many_as.into_iter().collect() } - assert_eq!(parser().parse("abracadabra").into_result(), Ok(vec!['a', 'a', 'a', 'a', 'a'])); + assert_eq!( + parser().parse("abracadabra").into_result(), + Ok(vec!['a', 'a', 'a', 'a', 'a']) + ); } #[test] diff --git a/src/private.rs b/src/private.rs index 5418c675..4afc5bcd 100644 --- a/src/private.rs +++ b/src/private.rs @@ -116,9 +116,13 @@ impl Mode for Emit { } #[inline(always)] - fn from_mut(r: &mut Self::Output) -> Self::Output<&mut T> { r } + fn from_mut(r: &mut Self::Output) -> Self::Output<&mut T> { + r + } #[inline(always)] - fn get_or T>(r: Self::Output, f: F) -> T { r } + fn get_or T>(r: Self::Output, _f: F) -> T { + r + } #[inline(always)] fn invoke<'a, I, O, E, P>(parser: &P, inp: &mut InputRef<'a, '_, I, E>) -> PResult @@ -174,9 +178,11 @@ impl Mode for Check { #[inline(always)] fn array(_: [Self::Output; N]) -> Self::Output<[T; N]> {} #[inline(always)] - fn from_mut(r: &mut Self::Output) -> Self::Output<&mut T> {} + fn from_mut(_r: &mut Self::Output) -> Self::Output<&mut T> {} #[inline(always)] - fn get_or T>(r: Self::Output, f: F) -> T { f() } + fn get_or T>(_r: Self::Output, f: F) -> T { + f() + } #[inline(always)] fn invoke<'a, I, O, E, P>(parser: &P, inp: &mut InputRef<'a, '_, I, E>) -> PResult From 8b8cf0a04b157df30799d4f385ddedc1dca85014 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Wed, 17 Jan 2024 19:10:03 +0000 Subject: [PATCH 61/64] Fixed SpannedInput --- src/input.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/input.rs b/src/input.rs index 1e902cba..00b22ad9 100644 --- a/src/input.rs +++ b/src/input.rs @@ -490,6 +490,8 @@ where .input .next_maybe(range.start) .1 + .or_else(|| self.input.next_maybe(self.input.start()).1) + // TODO: Should EOI actually be 'full input'? .map_or(self.eoi.start(), |tok| tok.borrow().1.start()); let end = self .input From 2ab11af304b0695572fd64e1a9cb0598814fecf0 Mon Sep 17 00:00:00 2001 From: Rune Tynan Date: Sat, 27 Jan 2024 11:58:25 -0800 Subject: [PATCH 62/64] Use diagnostic namespace, make message appear on ParserSealed mismatches --- src/lib.rs | 13 +++++++++---- src/private.rs | 8 ++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9631cc53..a4ecb079 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,7 @@ #![cfg_attr(feature = "nightly", allow(internal_features))] #![cfg_attr( feature = "nightly", - feature(never_type, rustc_attrs, fn_traits, tuple_trait, unboxed_closures) + feature(never_type, fn_traits, tuple_trait, unboxed_closures, diagnostic_namespace) )] // // README.md links these files via the main branch. For docs.rs we however want to link them @@ -351,9 +351,9 @@ impl ParseResult { /// implement it in chumsky itself. #[cfg_attr( feature = "nightly", - rustc_on_unimplemented( - message = "`{Self}` is not a parser from `{I}` to `{O}`", - label = "This parser is not compatible because it does not implement `Parser<{I}, {O}>`", + diagnostic::on_unimplemented( + message = "The following is not a parser from `{I}` to `{O}`: `{Self}`", + label = "This parser is not compatible because it does not implement `Parser<{I}, {O}, E>`", note = "You should check that the output types of your parsers are consistent with the combinators you're using", ) )] @@ -3501,3 +3501,8 @@ mod tests { } } } + +fn foo() { + use prelude::*; + let p = any::<&str, extra::Default>().or(any().to(())); +} diff --git a/src/private.rs b/src/private.rs index 4afc5bcd..764bc1cf 100644 --- a/src/private.rs +++ b/src/private.rs @@ -212,6 +212,14 @@ impl Mode for Check { // TODO: Consider removing these sealed traits in favour of `Sealed`, with the given methods just being on `Parser` // with doc(hidden) +#[cfg_attr( + feature = "nightly", + diagnostic::on_unimplemented( + message = "The following is not a parser from `{I}` to `{O}`: `{Self}`", + label = "This parser is not compatible because it does not implement `Parser<{I}, {O}, E>`", + note = "You should check that the output types of your parsers are consistent with the combinators you're using", + ) +)] pub trait ParserSealed<'a, I: Input<'a>, O, E: ParserExtra<'a, I>> { fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult where From 97df74dc0f38f9fd75250a97a5bc9760b7c2f8b5 Mon Sep 17 00:00:00 2001 From: Rune Tynan Date: Sat, 27 Jan 2024 18:07:17 -0800 Subject: [PATCH 63/64] Remove test left in --- src/lib.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a4ecb079..3b61ff3a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3501,8 +3501,3 @@ mod tests { } } } - -fn foo() { - use prelude::*; - let p = any::<&str, extra::Default>().or(any().to(())); -} From cbdb4dbaa1a963a9bdc71d1b8704f5306d1f193e Mon Sep 17 00:00:00 2001 From: Rune Tynan Date: Sat, 27 Jan 2024 18:11:11 -0800 Subject: [PATCH 64/64] Fix warning --- src/stream.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stream.rs b/src/stream.rs index 6cf9510d..7b3e9b9f 100644 --- a/src/stream.rs +++ b/src/stream.rs @@ -122,7 +122,7 @@ where } // Get the token at the given offset - let tok = vec.get(offset).map(I::Item::clone); + let tok = vec.get(offset).cloned(); self.tokens.swap(&other);