Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added cache docs #506

Merged
merged 1 commit into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions src/cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//! Traits and types that allow parsers to be cached between invocations.
//!
//! # Example
//!
//! ```
//! #![feature(lazy_cell)]
//! use std::sync::{LazyLock, Arc};
//! use chumsky::{prelude::*, cache::{Cache, Cached}};
//!
//! #[derive(Debug, PartialEq)]
//! enum Token<'a> { Ident(&'a str), Int(u64) }
//!
//! #[derive(Default)]
//! struct TokenParser;
//! impl Cached for TokenParser {
//! type Parser<'a> = Arc<dyn Parser<'a, &'a str, Token<'a>, extra::Default> + Send + Sync + 'a>;
//!
//! fn make_parser<'a>(self) -> Self::Parser<'a> {
//! let ident = text::ident().map(Token::Ident);
//! let num = text::int(10).from_str().unwrapped().map(Token::Int);
//! Arc::new(ident.or(num))
//! }
//! }
//!
//! // The parser cache doesn't have a lifetime and so can be stored pretty much anywhere:
//! static PARSER: LazyLock<Cache<TokenParser>> = LazyLock::new(Cache::default);
//!
//! // The parser can be used from any context simply by calling `.get()` on the cache
//! assert_eq!(PARSER.get().parse("42").into_result(), Ok(Token::Int(42)));
//! assert_eq!(PARSER.get().parse("hello").into_result(), Ok(Token::Ident("hello")));
//! ```

use super::*;

/// Implementing this trait allows you to cache parsers for use with inputs of different lifetimes, avoiding the
/// need to recreate the parser for each input lifetime.
pub trait Cached {
/// The type of the parser to be cached.
///
/// Because parsers tend to have unwieldy types, it is recommended to perform type erasure here. For example,
/// a parser with input type `&'src str` and output type `Token<'src>` might have one of the following types.
///
/// ```ignore
/// Boxed<'src, 'src, &'src str, Token<'src>, extra::Default>
/// Arc<dyn Parser<'src, &'src str, Token<'src>, extra::Default> + Send + Sync + 'src>
/// ```
type Parser<'src>;

/// Create an instance of the parser
fn make_parser<'src>(self) -> Self::Parser<'src>;
}

/// Allows a parser to be cached for use with inputs and outputs of difference lifetimes.
pub struct Cache<C: Cached> {
parser: C::Parser<'static>,
#[allow(dead_code)]
phantom: EmptyPhantom<C>,
}

impl<C: Cached + Default> Default for Cache<C> {
fn default() -> Self {
Self::new(C::default())
}
}

impl<C: Cached> Cache<C> {
/// Create a new cached parser.
pub fn new(cacher: C) -> Self {
Self {
parser: cacher.make_parser(),
phantom: EmptyPhantom::new(),
}
}

/// Get a reference to the cached parser.
///
/// Because this function is generic over an input lifetime, the returned parser can be used in many
/// difference contexts.
pub fn get<'src>(&self) -> &C::Parser<'src> {
// SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary
// lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to
// 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser
// must be valid for arbitrary lifetimes.
unsafe { &*(&self.parser as *const C::Parser<'_>).cast() }
}
}
2 changes: 1 addition & 1 deletion src/either.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! A small module that implements the [`Parser`](Parser) trait for the
//! A small module that implements the [`Parser`] trait for the
//! [`either::Either`](https://docs.rs/either/latest/either/enum.Either.html) type.

use either::Either;
Expand Down
73 changes: 5 additions & 68 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ macro_rules! go_cfg_extra {
}

mod blanket;
#[cfg(feature = "unstable")]
pub mod cache;
pub mod combinator;
pub mod container;
#[cfg(feature = "either")]
Expand Down Expand Up @@ -347,7 +349,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
fn parse(&self, input: I) -> ParseResult<O, E::Error>
where
Self: Sized,
I: Input<'a>,
E::State: Default,
E::Context: Default,
Expand All @@ -365,7 +366,6 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// [`&[T]`], a [`&str`], [`Stream`], or anything implementing [`Input`] to it.
fn parse_with_state(&self, input: I, state: &mut E::State) -> ParseResult<O, E::Error>
where
Self: Sized,
I: Input<'a>,
E::Context: Default,
{
Expand Down Expand Up @@ -3461,14 +3461,9 @@ mod tests {
struct MyCache;

impl crate::cache::Cached for MyCache {
type Input<'src> = &'src str;
type Output<'src> = &'src str;
type Extra<'src> = extra::Default;

fn make_parser<'src>(
self,
) -> Boxed<'src, 'src, Self::Input<'src>, Self::Output<'src>, Self::Extra<'src>>
{
type Parser<'src> = Boxed<'src, 'src, &'src str, &'src str, extra::Default>;

fn make_parser<'src>(self) -> Self::Parser<'src> {
Parser::boxed(my_parser())
}
}
Expand Down Expand Up @@ -3504,61 +3499,3 @@ mod tests {
}
}
}

/// Traits and types that allow parsers to be cached between invocations.
#[cfg(feature = "unstable")]
pub mod cache {
use super::*;

/// Implementing this trait allows you to cache parser for use with multiple inputs.
pub trait Cached {
/// The input type of the parser (for example, `&'a str`).
type Input<'src>: Input<'src>;

/// The output type of the parser.
type Output<'src>;

/// The extra type of the parser (see [`extra`]).
type Extra<'src>: ParserExtra<'src, Self::Input<'src>>;

// /// The type of the parser to be cached.
// type Parser<'a>: Parser<'a, Self::Input<'a>, Self::Output<'a>, Self::Extra<'a>>;

/// Create an instance of the parser
fn make_parser<'src>(
self,
) -> Boxed<'src, 'src, Self::Input<'src>, Self::Output<'src>, Self::Extra<'src>>;
}

/// Allows a parser to be cached for use with inputs and outputs of difference lifetimes.
pub struct Cache<C: Cached> {
parser: Boxed<'static, 'static, C::Input<'static>, C::Output<'static>, C::Extra<'static>>,
#[allow(dead_code)]
phantom: EmptyPhantom<C>,
}

impl<C: Cached> Cache<C> {
/// Create a new cached parser.
pub fn new(cacher: C) -> Self {
Self {
parser: cacher.make_parser(),
phantom: EmptyPhantom::new(),
}
}

/// Get a reference to the cached parser.
///
/// Because this function is generic over an input lifetime, the
pub fn get<'src>(
&self,
) -> &Boxed<'src, 'src, C::Input<'src>, C::Output<'src>, C::Extra<'src>> {
// SAFETY: This is safe because the API of `Cache` requires that the parser we store is bound by an arbitrary
// lifetime variable (see `Cached::make_parser`). Therefore, the implementor of `Cached` has no way to
// 'discover' the lifetime and so, because lifetimes are entirely removed during monomorphisation, the parser
// must be valid for arbitrary lifetimes.
unsafe {
&*(&self.parser as *const Boxed<C::Input<'_>, C::Output<'_>, C::Extra<'_>>).cast()
}
}
}
}
3 changes: 3 additions & 0 deletions src/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,9 @@ pub mod ascii {
}
}

// Unicode is the default
pub use unicode::*;

/// Parsers and utilities for working with unicode inputs.
pub mod unicode {
use super::*;
Expand Down