Skip to content

Commit

Permalink
Merge pull request #506 from zesterer/cache
Browse files Browse the repository at this point in the history
Added cache docs
  • Loading branch information
zesterer authored Aug 22, 2023
2 parents 01b96cd + 60d96cd commit f10e56b
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 69 deletions.
86 changes: 86 additions & 0 deletions src/cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//! Traits and types that allow parsers to be cached between invocations.
//!
//! # Example
//!
//! ```
//! #![feature(lazy_cell)]
//! use std::sync::{LazyLock, Arc};
//! use chumsky::{prelude::*, cache::{Cache, Cached}};
//!
//! #[derive(Debug, PartialEq)]
//! enum Token<'a> { Ident(&'a str), Int(u64) }
//!
//! #[derive(Default)]
//! struct TokenParser;
//! impl Cached for TokenParser {
//! type Parser<'a> = Arc<dyn Parser<'a, &'a str, Token<'a>, extra::Default> + Send + Sync + 'a>;
//!
//! fn make_parser<'a>(self) -> Self::Parser<'a> {
//! let ident = text::ident().map(Token::Ident);
//! let num = text::int(10).from_str().unwrapped().map(Token::Int);
//! Arc::new(ident.or(num))
//! }
//! }
//!
//! // The parser cache doesn't have a lifetime and so can be stored pretty much anywhere:
//! static PARSER: LazyLock<Cache<TokenParser>> = LazyLock::new(Cache::default);
//!
//! // The parser can be used from any context simply by calling `.get()` on the cache
//! assert_eq!(PARSER.get().parse("42").into_result(), Ok(Token::Int(42)));
//! assert_eq!(PARSER.get().parse("hello").into_result(), Ok(Token::Ident("hello")));
//! ```

use super::*;

/// Implementing this trait allows you to cache parsers for use with inputs of different lifetimes, avoiding the
/// need to recreate the parser for each input lifetime.
pub trait Cached {
/// The type of the parser to be cached.
///
/// Because parsers tend to have unwieldy types, it is recommended to perform type erasure here. For example,
/// a parser with input type `&'src str` and output type `Token<'src>` might have one of the following types.
///
/// ```ignore
/// Boxed<'src, 'src, &'src str, Token<'src>, extra::Default>
/// Arc<dyn Parser<'src, &'src str, Token<'src>, extra::Default> + Send + Sync + 'src>
/// ```
type Parser<'src>;

/// Create an instance of the parser
fn make_parser<'src>(self) -> Self::Parser<'src>;
}

/// Allows a parser to be cached for use with inputs and outputs of difference lifetimes.
pub struct Cache<C: Cached> {
parser: C::Parser<'static>,
#[allow(dead_code)]
phantom: EmptyPhantom<C>,
}

impl<C: Cached + Default> Default for Cache<C> {
fn default() -> Self {
Self::new(C::default())
}
}

impl<C: Cached> Cache<C> {
/// Create a new cached parser.
pub fn new(cacher: C) -> Self {
Self {
parser: cacher.make_parser(),
phantom: EmptyPhantom::new(),
}
}

/// Get a reference to the cached parser.
///
/// Because this function is generic over an input lifetime, the returned parser can be used in many
/// difference contexts.
pub fn get<'src>(&self) -> &C::Parser<'src> {
// SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary
// lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to
// 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser
// must be valid for arbitrary lifetimes.
unsafe { &*(&self.parser as *const C::Parser<'_>).cast() }
}
}
2 changes: 1 addition & 1 deletion src/either.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! A small module that implements the [`Parser`](Parser) trait for the
//! A small module that implements the [`Parser`] trait for the
//! [`either::Either`](https://docs.rs/either/latest/either/enum.Either.html) type.

use either::Either;
Expand Down
73 changes: 5 additions & 68 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ macro_rules! go_cfg_extra {
}

mod blanket;
#[cfg(feature = "unstable")]
pub mod cache;
pub mod combinator;
pub mod container;
#[cfg(feature = "either")]
Expand Down Expand Up @@ -347,7 +349,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
fn parse(&self, input: I) -> ParseResult<O, E::Error>
where
Self: Sized,
I: Input<'a>,
E::State: Default,
E::Context: Default,
Expand All @@ -365,7 +366,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
fn parse_with_state(&self, input: I, state: &mut E::State) -> ParseResult<O, E::Error>
where
Self: Sized,
I: Input<'a>,
E::Context: Default,
{
Expand Down Expand Up @@ -3461,14 +3461,9 @@ mod tests {
struct MyCache;

impl crate::cache::Cached for MyCache {
type Input<'src> = &'src str;
type Output<'src> = &'src str;
type Extra<'src> = extra::Default;

fn make_parser<'src>(
self,
) -> Boxed<'src, 'src, Self::Input<'src>, Self::Output<'src>, Self::Extra<'src>>
{
type Parser<'src> = Boxed<'src, 'src, &'src str, &'src str, extra::Default>;

fn make_parser<'src>(self) -> Self::Parser<'src> {
Parser::boxed(my_parser())
}
}
Expand Down Expand Up @@ -3504,61 +3499,3 @@ mod tests {
}
}
}

/// Traits and types that allow parsers to be cached between invocations.
#[cfg(feature = "unstable")]
pub mod cache {
use super::*;

/// Implementing this trait allows you to cache parser for use with multiple inputs.
pub trait Cached {
/// The input type of the parser (for example, `&'a str`).
type Input<'src>: Input<'src>;

/// The output type of the parser.
type Output<'src>;

/// The extra type of the parser (see [`extra`]).
type Extra<'src>: ParserExtra<'src, Self::Input<'src>>;

// /// The type of the parser to be cached.
// type Parser<'a>: Parser<'a, Self::Input<'a>, Self::Output<'a>, Self::Extra<'a>>;

/// Create an instance of the parser
fn make_parser<'src>(
self,
) -> Boxed<'src, 'src, Self::Input<'src>, Self::Output<'src>, Self::Extra<'src>>;
}

/// Allows a parser to be cached for use with inputs and outputs of difference lifetimes.
pub struct Cache<C: Cached> {
parser: Boxed<'static, 'static, C::Input<'static>, C::Output<'static>, C::Extra<'static>>,
#[allow(dead_code)]
phantom: EmptyPhantom<C>,
}

impl<C: Cached> Cache<C> {
/// Create a new cached parser.
pub fn new(cacher: C) -> Self {
Self {
parser: cacher.make_parser(),
phantom: EmptyPhantom::new(),
}
}

/// Get a reference to the cached parser.
///
/// Because this function is generic over an input lifetime, the
pub fn get<'src>(
&self,
) -> &Boxed<'src, 'src, C::Input<'src>, C::Output<'src>, C::Extra<'src>> {
// SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary
// lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to
// 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser
// must be valid for arbitrary lifetimes.
unsafe {
&*(&self.parser as *const Boxed<C::Input<'_>, C::Output<'_>, C::Extra<'_>>).cast()
}
}
}
}
3 changes: 3 additions & 0 deletions src/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,9 @@ pub mod ascii {
}
}

// Unicode is the default
pub use unicode::*;

/// Parsers and utilities for working with unicode inputs.
pub mod unicode {
use super::*;
Expand Down

0 comments on commit f10e56b

Please sign in to comment.