Merge branch 'main' into fixes/recursive-leak

zesterer · Jan 30, 2024 · 32c97f9 · 32c97f9
2 parents c35479f + 8658681
commit 32c97f9
Show file tree

Hide file tree

Showing 37 changed files with 1,751 additions and 1,621 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -23,11 +23,11 @@ jobs:
             toolchain: nightly
             components: rustfmt, clippy
       - name: Run cargo check (all features)
-        run: cargo check --tests --verbose --all-features
+        run: cargo check --benches --examples --tests --verbose --all-features
       - name: Run cargo check (no features)
-        run: cargo check --tests --verbose --no-default-features
+        run: cargo check --benches --examples --tests --verbose --no-default-features
       - name: Run cargo clippy
-        run: cargo clippy --verbose --all-features -- -D warnings
+        run: cargo clippy --benches --examples --tests --verbose --all-features -- -D warnings
       - name: Run cargo fmt
         run: cargo fmt --verbose --check
       - name: Run cargo doc

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "chumsky"
-version = "1.0.0-alpha.4"
+version = "1.0.0-alpha.6"
 description = "A parser library for humans with powerful error recovery"
 authors = ["Joshua Barretto <joshua.s.barretto@gmail.com>", "Elijah Hartvigsen <elijah.reed@hartvigsen.xyz", "Jakob Wiesmore <runetynan@gmail.com>"]
 repository = "https://github.com/zesterer/chumsky"
@@ -59,6 +59,9 @@ regex = ["dep:regex-automata"]
 # Enable serde serialization support
 serde = ["dep:serde"]
 
+# Enable dependencies only needed for generation of documentation on docs.rs
+docsrs = ["dep:vergen"]
+
 # An alias of all features that work with the stable compiler.
 # Do not use this feature, its removal is not considered a breaking change and its behaviour may change.
 # If you're working on chumsky and you're adding a feature that does not require nightly support, please add it to this list.
@@ -82,7 +85,7 @@ unicode-ident =  "1.0.10"
 ariadne = "0.2"
 pom = "3.2"
 nom = "7.1"
-winnow = "0.5.0"
+winnow = "0.5.19"
 serde_json = { version = "1.0", features = ["preserve_order"] }
 ciborium = { version = "0.2" }
 criterion = "0.4.0"
@@ -94,7 +97,7 @@ lasso = "0.7"
 slotmap = "1.0"
 
 [build-dependencies]
-vergen = { version = "=8.1.1", features = ["git", "gitoxide"] }
+vergen = { version = "=8.1.1", optional = true, features = ["git", "gitoxide"] }
 
 [target.'cfg(unix)'.dev-dependencies]
 pprof = { version = "0.11", features = ["flamegraph", "criterion"] }

diff --git a/README2.md b/README2.md
@@ -7,26 +7,28 @@
 
 Chumsky is a parser combinator library for Rust that makes writing expressive, high-performance parsers easy.
 
-Although chumsky is designed primarily for user-fancing parsers such as compilers, chumsky is just as much at home
-parsing binary protocols in a networking layer, configuration files, or any other form of complex input validation that
-you may need.
-
 <a href = "https://www.github.com/zesterer/tao">
     <img src="https://raw.githubusercontent.com/zesterer/chumsky/master/misc/example.png" alt="Example usage with my own language, Tao"/>
 </a>
 
+Although chumsky is designed primarily for user-fancing parsers such as compilers, chumsky is just as much at home
+parsing binary protocols at the networking layer, configuration files, or any other form of complex input validation that
+you may need. It also has `no_std` support, making it suitable for embedded environments.
+
 ## Features
 
 - 🪄 **Expressive combinators** that make writing your parser a joy
 - 🎛️ **Fully generic** across input, token, output, span, and error types
-- 📑 **Zero-copy parsing** minimises your parser's need to allocate
+- 📑 **Zero-copy parsing** minimises allocation by having outputs hold references/slices of the input
 - 🚦 **Flexible error recovery** strategies out of the box
 - 🚀 **Internal optimiser** leverages the power of [GATs](https://smallcultfollowing.com/babysteps/blog/2022/06/27/many-modes-a-gats-pattern/) to optimise your parser for you
 - 📖 **Text-oriented parsers** for text inputs (i.e: `&[u8]` and `&str`)
 - 👁️‍🗨️ **Context-free grammars** are fully supported, with support for context-sensitivity
 - 🔄 **Left recursion and memoization** have opt-in support
-- 🪺 **Nested inputs** such as token trees are fully supported
+- 🪺 **Nested inputs** such as token trees are fully supported both as inputs and outputs
 - 🏷️ **Pattern labelling** for dynamic, user-friendly error messages
+- 🗃️ **Caching** allows parsers to be created once and reused many times
+- ↔️ **Pratt parsing** support for unary and binary operators
 
 *Note: Error diagnostic rendering is performed by [Ariadne](https://github.com/zesterer/ariadne)*
 
@@ -39,19 +41,18 @@ See [`examples/brainfuck.rs`](https://github.com/zesterer/chumsky/blob/master/ex
 ```rust
 use chumsky::prelude::*;
 
-/// Define out output AST (Abstract Syntax Tree)
+/// An AST (Abstract Syntax Tree) for Brainfuck instructions
 #[derive(Clone)]
 enum Instr {
     Left, Right,
     Incr, Decr,
     Read, Write,
-	// In Brainfuck, `[...]` blocks are loops
-    Loop(Vec<Self>),
+    Loop(Vec<Self>), // In Brainfuck, `[...]` loops contain sub-blocks of instructions
 }
 
 /// A function that returns an instance of our Brainfuck parser
 fn parser<'a>() -> impl Parser<'a, &'a str, Vec<Instr>> {
-	// Our parser is recursive: each instruction can contain many instructions (via `[...]` blocks)
+	// Brainfuck syntax is recursive: each block can contain many sub-blocks (via `[...]` loops)
     recursive(|bf| choice((
 		// All of the basic instructions are just single characters
         just('<').to(Instr::Left),
@@ -87,6 +88,32 @@ Chumsky has [a tutorial](https://github.com/zesterer/chumsky/blob/master/tutoria
 parser and interpreter for a simple dynamic language with unary and binary operators, operator precedence, functions,
 let declarations, and calls.
 
+## Cargo Features
+
+Chumsky contains several optional features that extend the crate's functionality.
+
+- `pratt`: enables the [pratt parsing](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) combinator
+
+- `regex`: enables the regex combinator
+
+- `serde`: enables `serde` (de)serialization support for several types
+
+- `either`: implements `Parser` for `either::Either`, allowing dynamic configuration of parsers at runtime
+
+- `sync`: enables thread-safe features
+
+- `extension`: enables the extension API, allowing you to write your own first-class combinators that integrate with and extend chumsky
+
+- `memoization`: enables [memoization](https://en.wikipedia.org/wiki/Memoization#Parsers) features
+
+- `spill-stack` (enabled by default): avoid stack overflows by spilling stack data to the heap
+
+- `unstable`: enables experimental chumsky features
+
+- `std` (enabled by default): support for standard library features
+
+- `nightly`: enable support for features only supported by the nightly Rust compiler
+
 ## *What* is a parser combinator?
 
 Parser combinators are a technique for implementing parsers by defining them in terms of other parsers. The resulting

diff --git a/benches/cbor.rs b/benches/cbor.rs
@@ -3,7 +3,7 @@ use std::hint::black_box;
 
 mod utils;
 
-static CBOR: &'static [u8] = include_bytes!("samples/sample.cbor");
+static CBOR: &[u8] = include_bytes!("samples/sample.cbor");
 
 fn bench_cbor(c: &mut Criterion) {
     // c.bench_function("cbor_nom", {
@@ -125,7 +125,8 @@ mod chumsky_zero_copy {
                     };
                     cfg.exactly(num)
                 }))
-                .map_slice(int_out);
+                .to_slice()
+                .map(int_out);
 
             let uint = read_int.map(CborZero::Int);
             let nint = read_int.map(|i| CborZero::Int(-1 - i));
@@ -134,14 +135,16 @@ mod chumsky_zero_copy {
                 any()
                     .repeated()
                     .configure(|cfg, ctx| cfg.exactly(*ctx as usize))
-                    .map_slice(CborZero::Bytes),
+                    .to_slice()
+                    .map(CborZero::Bytes),
             );
 
             let str = read_int.ignore_with_ctx(
                 any()
                     .repeated()
                     .configure(|cfg, ctx| cfg.exactly(*ctx as usize))
-                    .map_slice(|slice| CborZero::String(std::str::from_utf8(slice).unwrap())),
+                    .to_slice()
+                    .map(|slice| CborZero::String(std::str::from_utf8(slice).unwrap())),
             );
 
             let array = read_int.ignore_with_ctx(

diff --git a/benches/json.rs b/benches/json.rs
@@ -1,3 +1,5 @@
+#![allow(clippy::result_large_err, clippy::type_complexity)]
+
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 
 mod utils;
@@ -22,7 +24,7 @@ pub enum JsonZero<'a> {
     Object(Vec<(&'a [u8], JsonZero<'a>)>),
 }
 
-static JSON: &'static [u8] = include_bytes!("samples/sample.json");
+static JSON: &[u8] = include_bytes!("samples/sample.json");
 
 fn bench_json(c: &mut Criterion) {
     c.bench_function("json_nom", {
@@ -142,15 +144,16 @@ mod chumsky_zero_copy {
                 .then(int)
                 .then(frac.or_not())
                 .then(exp.or_not())
-                .map_slice(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap())
+                .to_slice()
+                .map(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap())
                 .boxed();
 
             let escape = just(b'\\').then_ignore(one_of(b"\\/\"bfnrt"));
 
             let string = none_of(b"\\\"")
                 .or(escape)
                 .repeated()
-                .slice()
+                .to_slice()
                 .delimited_by(just(b'"'), just(b'"'))
                 .boxed();
 
@@ -197,14 +200,14 @@ mod pom {
     }
 
     fn number() -> Parser<u8, f64> {
-        let integer = one_of(b"123456789") - one_of(b"0123456789").repeat(0..) | sym(b'0');
+        let integer = (one_of(b"123456789") - one_of(b"0123456789").repeat(0..)) | sym(b'0');
         let frac = sym(b'.') + one_of(b"0123456789").repeat(1..);
         let exp = one_of(b"eE") + one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..);
         let number = sym(b'-').opt() + integer + frac.opt() + exp.opt();
         number
             .collect()
             .convert(str::from_utf8)
-            .convert(|s| f64::from_str(&s))
+            .convert(f64::from_str)
     }
 
     fn string() -> Parser<u8, String> {
@@ -237,10 +240,10 @@ mod pom {
         (seq(b"null").map(|_| Json::Null)
             | seq(b"true").map(|_| Json::Bool(true))
             | seq(b"false").map(|_| Json::Bool(false))
-            | number().map(|num| Json::Num(num))
-            | string().map(|text| Json::Str(text))
-            | array().map(|arr| Json::Array(arr))
-            | object().map(|obj| Json::Object(obj)))
+            | number().map(Json::Num)
+            | string().map(Json::Str)
+            | array().map(Json::Array)
+            | object().map(Json::Object))
             - space()
     }
 
@@ -344,15 +347,15 @@ mod nom {
         terminated(value, space)(i)
     }
 
-    pub fn json<'a>(i: &'a [u8]) -> IResult<&'a [u8], JsonZero, (&'a [u8], nom::error::ErrorKind)> {
+    pub fn json(i: &[u8]) -> IResult<&[u8], JsonZero, (&[u8], nom::error::ErrorKind)> {
         root(i)
     }
 }
 
 mod winnow {
     use winnow::{
         ascii::{digit0, digit1, escaped},
-        combinator::separated0,
+        combinator::separated,
         combinator::{alt, dispatch},
         combinator::{cut_err, fail, opt, peek},
         combinator::{preceded, separated_pair, terminated},
@@ -403,7 +406,7 @@ mod winnow {
         preceded(
             '[',
             cut_err(terminated(
-                separated0(value, preceded(space, ',')),
+                separated(0.., value, preceded(space, ',')),
                 preceded(space, ']'),
             )),
         )
@@ -427,7 +430,7 @@ mod winnow {
         preceded(
             '{',
             cut_err(terminated(
-                separated0(member, preceded(space, ',')),
+                separated(0.., member, preceded(space, ',')),
                 preceded(space, '}'),
             )),
         )

diff --git a/benches/lex.rs b/benches/lex.rs
@@ -26,7 +26,7 @@ pub enum Token<'a> {
     Comma,
 }
 
-static SAMPLE: &'static [u8] = include_bytes!("tokens.txt");
+static SAMPLE: &[u8] = include_bytes!("tokens.txt");
 
 fn bench_lex(c: &mut Criterion) {
     c.bench_function("lex_chumsky_zero_copy", {
@@ -126,7 +126,7 @@ mod chumsky_zero_copy {
     use std::str;
 
     pub fn parser<'a>() -> impl Parser<'a, &'a [u8], Vec<Token<'a>>> {
-        let digits = one_of(b'0'..=b'9').repeated().slice();
+        let digits = one_of(b'0'..=b'9').repeated().to_slice();
 
         let int = one_of(b'1'..=b'9')
             .repeated()
@@ -148,7 +148,8 @@ mod chumsky_zero_copy {
             .then(int)
             .then(frac.or_not())
             .then(exp.or_not())
-            .map_slice(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap())
+            .to_slice()
+            .map(|bytes| str::from_utf8(bytes).unwrap().parse().unwrap())
             .boxed();
 
         let escape = just(b'\\')
@@ -169,11 +170,11 @@ mod chumsky_zero_copy {
             .ignored()
             .or(escape)
             .repeated()
-            .slice()
+            .to_slice()
             .delimited_by(just(b'"'), just(b'"'))
             .boxed();
 
-        let ident = text::ascii::ident().map_slice(Token::Ident);
+        let ident = text::ascii::ident().to_slice().map(Token::Ident);
 
         choice((
             just(b"null").to(Token::Null),

diff --git a/build.rs b/build.rs
@@ -1,8 +1,20 @@
 use std::error::Error;
+#[cfg(feature = "docsrs")]
 use vergen::EmitBuilder;
 
 fn main() -> Result<(), Box<dyn Error>> {
+    emit_git_metadata()?;
+    Ok(())
+}
+
+#[cfg(feature = "docsrs")]
+fn emit_git_metadata() -> Result<(), Box<dyn Error>> {
     // Emit the instructions
     EmitBuilder::builder().all_git().emit()?;
     Ok(())
 }
+
+#[cfg(not(feature = "docsrs"))]
+fn emit_git_metadata() -> Result<(), Box<dyn Error>> {
+    Ok(())
+}
diff --git a/examples/foo.rs b/examples/foo.rs
@@ -27,6 +27,7 @@ enum Expr<'a> {
     },
 }
 
+#[allow(clippy::let_and_return)]
 fn parser<'a>() -> impl Parser<'a, &'a str, Expr<'a>> {
     let ident = text::ascii::ident().padded();