input-output-hk
diff --git a/‎hermes/crates/cbork/Cargo.toml‎
Lines changed: 8 additions & 0 deletions b/‎hermes/crates/cbork/Cargo.toml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎hermes/crates/cbork/README.md‎
Lines changed: 20 additions & 0 deletions b/‎hermes/crates/cbork/README.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎hermes/crates/cbork/cddl-parser/Cargo.toml‎
Lines changed: 13 additions & 0 deletions b/‎hermes/crates/cbork/cddl-parser/Cargo.toml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎hermes/crates/cbork/cddl-parser/src/grammar/cddl.pest‎
Lines changed: 187 additions & 0 deletions b/‎hermes/crates/cbork/cddl-parser/src/grammar/cddl.pest‎
Lines changed: 187 additions & 0 deletions
diff --git a/‎hermes/crates/cbork/cddl-parser/src/grammar/cddl_test.pest‎
Lines changed: 40 additions & 0 deletions b/‎hermes/crates/cbork/cddl-parser/src/grammar/cddl_test.pest‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎hermes/crates/cbork/cddl-parser/src/grammar/postlude.cddl‎
Lines changed: 48 additions & 0 deletions b/‎hermes/crates/cbork/cddl-parser/src/grammar/postlude.cddl‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎hermes/crates/cbork/cddl-parser/src/lib.rs‎
Lines changed: 52 additions & 0 deletions b/‎hermes/crates/cbork/cddl-parser/src/lib.rs‎
Lines changed: 52 additions & 0 deletions
@@ -0,0 +1,8 @@
+[workspace]
+
+members = [
+    "cddl-parser",
+]
+
+[workspace.dependencies]
+derive_more = "0.99.17"
@@ -0,0 +1,20 @@
+# cbork
+
+CBOR Kit
+
+We need to support the parsing of CDDL in the following priority sequence.
+Each needs to be complete before extending with the subsequent specification extension.
+We do not need to handle choosing which extensions are enabled.
+
+1. CDDL Spec: <https://www.rfc-editor.org/rfc/rfc8610>
+2. Errata to include: <https://www.ietf.org/archive/id/draft-ietf-cbor-update-8610-grammar-01.html>
+3. Extensions: <https://www.rfc-editor.org/rfc/rfc9165>
+4. Modules: <https://cbor-wg.github.io/cddl-modules/draft-ietf-cbor-cddl-modules.html> and <https://github.com/cabo/cddlc>
+
+There are semantic rules about well formed CDDL files that are not enforced by the grammar.
+The full parser will also need to validate those rules.
+The primary rule is that the very first definition in the file is the base type.
+
+We should also be able to detect if there are orphaned definitions in the CDDL file.
+
+There may be other checks we need to perform on the parsed AST for validity.
@@ -0,0 +1,13 @@
+[package]
+name = "cddl-parser"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+derive_more =  { workspace = true }
+
+pest = { version = "2.7.2", features = ["std", "pretty-print", "memchr", "const_prec_climber"] }
+pest_derive = { version = "2.7.2", features = ["grammar-extras"] }
+
@@ -0,0 +1,187 @@
+//! CDDL Grammar adapted from RFC8610 Appendix B
+//! https://www.rfc-editor.org/rfc/rfc8610#appendix-B
+
+
+cddl = {
+    SOI
+    ~ S ~ rule+
+    ~ EOI
+}
+
+rule = {
+    ( typename ~ assignt ~ type)
+    | ( groupname ~ assigng ~ grpent)
+}
+
+typename = ${ id ~ genericparm? }
+groupname = ${ id ~ genericparm? }
+
+assignt = { "=" | "/=" }
+assigng = { "=" | "//=" }
+
+genericparm = { "<" ~ id ~ ( "," ~ id )* ~ ">" }
+genericarg = { "<" ~ type1 ~ ( "," ~ type1)* ~ ">" }
+
+type = { type1 ~ ( S ~ "/" ~ type1)* }
+
+type1 = { type2 ~ ( S ~ ( rangeop | ctlop ) ~ type2)? }
+
+typename_arg = ${ typename ~ genericarg? }
+groupname_arg = ${ groupname ~ genericarg? }
+
+tag6 = ${ "#" ~ "6" ~ ("." ~ uint)? ~ "(" ~ S ~ type ~ S ~ ")" }
+tag_generic = ${ "#" ~ ASCII_DIGIT ~ ("." ~ uint)? }
+
+type2 = {
+    value
+    | typename_arg
+    | ( "(" ~ type ~ ")" )
+    | ( "{" ~ group ~ "}" )
+    | ( "[" ~ group ~ "]" )
+    | ( "~" ~ typename_arg )
+    | ( "&" ~ "(" ~ group ~ ")" )
+    | ( "&" ~ groupname_arg )
+    | tag6
+    | tag_generic
+    | "#"
+}
+
+rangeop = { "..." | ".." }
+ctlop = ${ "." ~ id }
+
+group = { grpchoice ~ ( S ~ "//" ~ grpchoice)* }
+
+grpchoice = { ( grpent ~ ","? )* }
+
+grpent = ${
+    ( (occur ~ S)? ~ (memberkey ~ S)? ~ type )
+    | ( (occur ~ S)? ~ groupname ~ genericarg? )
+    | ( (occur ~ S)? ~ "(" ~ S ~ group ~ S ~ ")" )
+}
+
+memberkey = {
+    ( type1 ~ "^"? ~ "=>" )
+    | ( bareword ~ ":" )
+    | ( value ~ ":" )
+}
+
+bareword = { id }
+
+occur = {
+    ( uint? ~ "*" ~ uint? )
+    | "+"
+    | "?"
+}
+
+// -----------------------------------------------------------------------------
+// Literal Values
+
+/// All Literal Values
+value = { number | text | bytes }
+
+/// Literal Numbers  - A float if it has fraction or exponent; int otherwise
+number = { hexfloat | intfloat }
+
+/// Hex floats of the form -0x123.abc0p+12
+hexfloat = ${ "-"? ~ "0x" ~ ASCII_HEX_DIGIT+ ~ ("." ~ ASCII_HEX_DIGIT+)? ~ "p" ~ exponent }
+
+/// Ints or Int floats
+intfloat = ${ int ~ ("." ~ fraction)? ~ ("e" ~ exponent)? }
+
+/// Fractional part of a number
+fraction = ${ ASCII_DIGIT+ }
+
+/// Exponent for a number
+exponent = ${ ("+" | "-") ~ ASCII_DIGIT+ }
+
+/// All integers, singed and unsigned
+int = ${ "-"? ~ uint }
+
+
+/// Unsigned Integers
+uint = ${
+    ( ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* )
+    | ( "0x" ~ ASCII_HEX_DIGIT+ )
+    | ( "0b" ~ ASCII_BIN_DIGIT+ )
+    | "0"
+}
+
+/// Literal Text
+text = ${ "\"" ~ SCHAR* ~ "\"" }
+
+/// Literal Bytes - Note CDDL Spec incorrectly defines b64''.
+bytes = ${ bytes_hex | bytes_b64 | bytes_text }
+bytes_hex = ${ "h" ~ "'" ~ HEX_PAIR* ~ "'" }
+bytes_b64 = ${ "b64" ~ "'" ~ URL_BASE64 ~ "'" }
+bytes_text = ${ "'" ~ BCHAR* ~ "'" }
+
+// -----------------------------------------------------------------------------
+// Simple multiple character sequences
+
+/// identifier,  called the `name` in the CDDL spec.
+id = ${
+    group_socket |
+    type_socket |
+    name
+}
+
+/// Special form of a name that represents a Group Socket.
+group_socket = ${ "$$" ~ ( ( "-" | "." )* ~ NAME_END )* }
+/// Special form of a name that represents a Type Socket.
+type_socket = ${ "$" ~ ( ( "-" | "." )* ~ NAME_END )* }
+/// General form of a name.
+name = ${ NAME_START ~ ( ( "-" | "." )* ~ NAME_END )* }
+
+/// A pair of hex digits. (Must always have even numbers of hex digits.)
+HEX_PAIR = _{ S ~ ASCII_HEX_DIGIT ~ S ~ ASCII_HEX_DIGIT ~ S }
+
+/// Whitespace is allowed and is ignored.
+/// This token will keep the whitespace, so it will need to handled when converted to binary.
+URL_BASE64 = _{ S ~ ( URL_BASE64_ALPHA ~ S)* ~ URL_BASE64_PAD? }
+
+
+// -----------------------------------------------------------------------------
+// Characters, Whitespace and Comments
+
+S = _{ WHITESPACE* }
+WHITESPACE = _{ " " | "\t" | NEWLINE }
+COMMENT = _{ ";" ~ (PCHAR | "\t")* ~ NEWLINE }
+
+// URL Base64 Characterset.
+URL_BASE64_ALPHA = _{ ASCII_ALPHA | ASCII_DIGIT |  "-" | "_"  }
+// Optional Padding that goes at the end of Base64.
+URL_BASE64_PAD = _{ "~" }
+
+// Identifier Name Character sets.
+
+/// A name can start with an alphabetic character (including "@", "_", "$")
+/// The body of the name can consist of any of the characters from the set
+///   {"A" to "Z", "a" to "z", "0" to "9", "_", "-", "@", ".", "$"}
+// NAME_BODY = _{ NAME_END | "-" | "." }  -- Unused Rule
+NAME_START = _{ ASCII_ALPHA | "@" | "_" | "$" }
+/// A name can end with an alphabetic character (including "@", "_", "$") or a digit.
+NAME_END = _{ NAME_START | ASCII_DIGIT }
+
+/// All Visible characters.
+PCHAR = _{ ASCII_VISIBLE | UNICODE_CHAR }
+
+/// The set of characters valid for a text string.
+SCHAR = _{ SCHAR_ASCII_VISIBLE | UNICODE_CHAR | SESC }
+
+/// The set of characters valid for a byte string.
+BCHAR = _{ BCHAR_ASCII_VISIBLE | UNICODE_CHAR | SESC | NEWLINE }
+
+/// Escaping code to allow invalid characters to be used in text or byte strings.
+SESC = ${ "\\" ~ (ASCII_VISIBLE | UNICODE_CHAR) }
+
+/// All Visiable Ascii characters.
+ASCII_VISIBLE = _{ ' '..'~' }
+
+/// Ascii subset valid for text strings.
+SCHAR_ASCII_VISIBLE = _{ ' '..'!' | '#'..'[' | ']'..'~' }
+
+/// Ascii subset valid for byte text strings.
+BCHAR_ASCII_VISIBLE = _{ ' '..'&' | '('..'[' | ']'..'~' }
+
+/// Valid non ascii unicode Characters
+UNICODE_CHAR = _{ '\u{80}'..'\u{10FFFD}' }
@@ -0,0 +1,40 @@
+
+// -----------------------------------------------------------------------------
+// Test Expressions ONLY TO Be USED by Unit Tests.
+// Extends `cddl.pest` with rules needed to properly check sub-rules.
+
+/// Test Expression for the S Rule.
+S_TEST = ${ SOI ~ S ~ EOI }
+
+/// Test Expression for the COMMENT Rule.
+COMMENT_TEST = { SOI ~ COMMENT* ~ EOI }
+
+/// Test expression for the URL_BASE64 Rule.
+URL_BASE64_TEST = { SOI ~ URL_BASE64 ~ EOI }
+
+/// Test expression to the id Rule.
+id_TEST = ${ SOI ~ id ~ EOI}
+
+/// Test expression to the bytes Rule.
+bytes_TEST = ${ SOI ~ bytes ~ EOI}
+
+/// Test expression to the text Rule.
+text_TEST = ${ SOI ~ text ~ EOI}
+
+/// Test expression to the uint Rule.
+uint_TEST = ${ SOI ~ uint ~ EOI}
+
+/// Test expression to the int Rule.
+int_TEST = ${ SOI ~ int ~ EOI}
+
+/// Test expression to the intfloat Rule.
+intfloat_TEST = ${ SOI ~ intfloat ~ EOI}
+
+/// Test expression to the hexfloat Rule.
+hexfloat_TEST = ${ SOI ~ hexfloat ~ EOI}
+
+/// Test expression to the number Rule.
+number_TEST = ${ SOI ~ number ~ EOI}
+
+/// Test expression to the value Rule.
+value_TEST = ${ SOI ~ value ~ EOI}
@@ -0,0 +1,48 @@
+; CDDL Standard Postlude as defined by Appendix D of RFC8610
+; https://www.rfc-editor.org/rfc/rfc8610#appendix-D
+
+any = #
+
+uint = #0
+nint = #1
+int = uint / nint
+
+bstr = #2
+bytes = bstr
+tstr = #3
+text = tstr
+
+tdate = #6.0(tstr)
+time = #6.1(number)
+number = int / float
+biguint = #6.2(bstr)
+bignint = #6.3(bstr)
+bigint = biguint / bignint
+integer = int / bigint
+unsigned = uint / biguint
+decfrac = #6.4([e10: int, m: integer])
+bigfloat = #6.5([e2: int, m: integer])
+eb64url = #6.21(any)
+eb64legacy = #6.22(any)
+eb16 = #6.23(any)
+encoded-cbor = #6.24(bstr)
+uri = #6.32(tstr)
+b64url = #6.33(tstr)
+b64legacy = #6.34(tstr)
+regexp = #6.35(tstr)
+mime-message = #6.36(tstr)
+cbor-any = #6.55799(any)
+
+float16 = #7.25
+float32 = #7.26
+float64 = #7.27
+float16-32 = float16 / float32
+float32-64 = float32 / float64
+float = float16-32 / float64
+
+false = #7.20
+true = #7.21
+bool = false / true
+nil = #7.22
+null = nil
+undefined = #7.23
@@ -0,0 +1,52 @@
+use std::fmt::Debug;
+
+pub use pest::Parser;
+use pest_derive::Parser;
+
+extern crate derive_more;
+use derive_more::{Display, From};
+
+// Parser with DEBUG rules.  These rules are only used in tests.
+#[derive(Parser)]
+#[grammar = "grammar/cddl.pest"]
+#[grammar = "grammar/cddl_test.pest"]  // Ideally this would only be used in tests.
+pub struct CDDLParser;
+
+#[derive(Display, Debug, From)]
+pub struct CDDLError(pest::error::Error<Rule>);
+
+// CDDL Standard Postlude - read from an external file
+const POSTLUDE: &str = include_str!("grammar/postlude.cddl");
+
+pub fn parse_cddl(input: &str) -> Result<(), Box<CDDLError>> {
+    let result = CDDLParser::parse(Rule::cddl, input);
+
+    match result {
+        Ok(c) => println!("{c:?}"),
+        Err(e) => {
+            println!("{e:?}");
+            println!("{e}");
+            return Err(Box::new(CDDLError::from(e)));
+        }
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{parse_cddl, POSTLUDE};
+
+    #[test]
+    fn it_works() {
+        let result = parse_cddl(POSTLUDE);
+
+        match result {
+            Ok(c) => println!("{c:?}"),
+            Err(e) => {
+                println!("{e:?}");
+                println!("{e}");
+            }
+        }
+    }
+}