From 60d96cd7339a8084ae081964604a17d303db1081 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Tue, 22 Aug 2023 17:53:17 +0100 Subject: [PATCH] Added cache docs --- src/cache.rs | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/either.rs | 2 +- src/lib.rs | 73 +++---------------------------------------- src/text.rs | 3 ++ 4 files changed, 95 insertions(+), 69 deletions(-) create mode 100644 src/cache.rs diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 00000000..a1e89e81 --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,86 @@ +//! Traits and types that allow parsers to be cached between invocations. +//! +//! # Example +//! +//! ``` +//! #![feature(lazy_cell)] +//! use std::sync::{LazyLock, Arc}; +//! use chumsky::{prelude::*, cache::{Cache, Cached}}; +//! +//! #[derive(Debug, PartialEq)] +//! enum Token<'a> { Ident(&'a str), Int(u64) } +//! +//! #[derive(Default)] +//! struct TokenParser; +//! impl Cached for TokenParser { +//! type Parser<'a> = Arc, extra::Default> + Send + Sync + 'a>; +//! +//! fn make_parser<'a>(self) -> Self::Parser<'a> { +//! let ident = text::ident().map(Token::Ident); +//! let num = text::int(10).from_str().unwrapped().map(Token::Int); +//! Arc::new(ident.or(num)) +//! } +//! } +//! +//! // The parser cache doesn't have a lifetime and so can be stored pretty much anywhere: +//! static PARSER: LazyLock> = LazyLock::new(Cache::default); +//! +//! // The parser can be used from any context simply by calling `.get()` on the cache +//! assert_eq!(PARSER.get().parse("42").into_result(), Ok(Token::Int(42))); +//! assert_eq!(PARSER.get().parse("hello").into_result(), Ok(Token::Ident("hello"))); +//! ``` + +use super::*; + +/// Implementing this trait allows you to cache parsers for use with inputs of different lifetimes, avoiding the +/// need to recreate the parser for each input lifetime. +pub trait Cached { + /// The type of the parser to be cached. + /// + /// Because parsers tend to have unwieldy types, it is recommended to perform type erasure here. For example, + /// a parser with input type `&'src str` and output type `Token<'src>` might have one of the following types. + /// + /// ```ignore + /// Boxed<'src, 'src, &'src str, Token<'src>, extra::Default> + /// Arc, extra::Default> + Send + Sync + 'src> + /// ``` + type Parser<'src>; + + /// Create an instance of the parser + fn make_parser<'src>(self) -> Self::Parser<'src>; +} + +/// Allows a parser to be cached for use with inputs and outputs of difference lifetimes. +pub struct Cache { + parser: C::Parser<'static>, + #[allow(dead_code)] + phantom: EmptyPhantom, +} + +impl Default for Cache { + fn default() -> Self { + Self::new(C::default()) + } +} + +impl Cache { + /// Create a new cached parser. + pub fn new(cacher: C) -> Self { + Self { + parser: cacher.make_parser(), + phantom: EmptyPhantom::new(), + } + } + + /// Get a reference to the cached parser. + /// + /// Because this function is generic over an input lifetime, the returned parser can be used in many + /// difference contexts. + pub fn get<'src>(&self) -> &C::Parser<'src> { + // SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary + // lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to + // 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser + // must be valid for arbitrary lifetimes. + unsafe { &*(&self.parser as *const C::Parser<'_>).cast() } + } +} diff --git a/src/either.rs b/src/either.rs index b6c89b53..20f59e15 100644 --- a/src/either.rs +++ b/src/either.rs @@ -1,4 +1,4 @@ -//! A small module that implements the [`Parser`](Parser) trait for the +//! A small module that implements the [`Parser`] trait for the //! [`either::Either`](https://docs.rs/either/latest/either/enum.Either.html) type. use either::Either; diff --git a/src/lib.rs b/src/lib.rs index fd4c4b17..dc106bb6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,6 +69,8 @@ macro_rules! go_cfg_extra { } mod blanket; +#[cfg(feature = "unstable")] +pub mod cache; pub mod combinator; pub mod container; #[cfg(feature = "either")] @@ -347,7 +349,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it. fn parse(&self, input: I) -> ParseResult where - Self: Sized, I: Input<'a>, E::State: Default, E::Context: Default, @@ -365,7 +366,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it. fn parse_with_state(&self, input: I, state: &mut E::State) -> ParseResult where - Self: Sized, I: Input<'a>, E::Context: Default, { @@ -3461,14 +3461,9 @@ mod tests { struct MyCache; impl crate::cache::Cached for MyCache { - type Input<'src> = &'src str; - type Output<'src> = &'src str; - type Extra<'src> = extra::Default; - - fn make_parser<'src>( - self, - ) -> Boxed<'src, 'src, Self::Input<'src>, Self::Output<'src>, Self::Extra<'src>> - { + type Parser<'src> = Boxed<'src, 'src, &'src str, &'src str, extra::Default>; + + fn make_parser<'src>(self) -> Self::Parser<'src> { Parser::boxed(my_parser()) } } @@ -3504,61 +3499,3 @@ mod tests { } } } - -/// Traits and types that allow parsers to be cached between invocations. -#[cfg(feature = "unstable")] -pub mod cache { - use super::*; - - /// Implementing this trait allows you to cache parser for use with multiple inputs. - pub trait Cached { - /// The input type of the parser (for example, `&'a str`). - type Input<'src>: Input<'src>; - - /// The output type of the parser. - type Output<'src>; - - /// The extra type of the parser (see [`extra`]). - type Extra<'src>: ParserExtra<'src, Self::Input<'src>>; - - // /// The type of the parser to be cached. - // type Parser<'a>: Parser<'a, Self::Input<'a>, Self::Output<'a>, Self::Extra<'a>>; - - /// Create an instance of the parser - fn make_parser<'src>( - self, - ) -> Boxed<'src, 'src, Self::Input<'src>, Self::Output<'src>, Self::Extra<'src>>; - } - - /// Allows a parser to be cached for use with inputs and outputs of difference lifetimes. - pub struct Cache { - parser: Boxed<'static, 'static, C::Input<'static>, C::Output<'static>, C::Extra<'static>>, - #[allow(dead_code)] - phantom: EmptyPhantom, - } - - impl Cache { - /// Create a new cached parser. - pub fn new(cacher: C) -> Self { - Self { - parser: cacher.make_parser(), - phantom: EmptyPhantom::new(), - } - } - - /// Get a reference to the cached parser. - /// - /// Because this function is generic over an input lifetime, the - pub fn get<'src>( - &self, - ) -> &Boxed<'src, 'src, C::Input<'src>, C::Output<'src>, C::Extra<'src>> { - // SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary - // lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to - // 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser - // must be valid for arbitrary lifetimes. - unsafe { - &*(&self.parser as *const Boxed, C::Output<'_>, C::Extra<'_>>).cast() - } - } - } -} diff --git a/src/text.rs b/src/text.rs index 688609ae..11817f3b 100644 --- a/src/text.rs +++ b/src/text.rs @@ -438,6 +438,9 @@ pub mod ascii { } } +// Unicode is the default +pub use unicode::*; + /// Parsers and utilities for working with unicode inputs. pub mod unicode { use super::*;