|
| 1 | +//! CDDL Grammar adapted from RFC8610 Appendix B |
| 2 | +//! https://www.rfc-editor.org/rfc/rfc8610#appendix-B |
| 3 | + |
| 4 | + |
| 5 | +cddl = { |
| 6 | + SOI |
| 7 | + ~ S ~ rule+ |
| 8 | + ~ EOI |
| 9 | +} |
| 10 | + |
| 11 | +rule = { |
| 12 | + ( typename ~ assignt ~ type) |
| 13 | + | ( groupname ~ assigng ~ grpent) |
| 14 | +} |
| 15 | + |
| 16 | +typename = ${ id ~ genericparm? } |
| 17 | +groupname = ${ id ~ genericparm? } |
| 18 | + |
| 19 | +assignt = { "=" | "/=" } |
| 20 | +assigng = { "=" | "//=" } |
| 21 | + |
| 22 | +genericparm = { "<" ~ id ~ ( "," ~ id )* ~ ">" } |
| 23 | +genericarg = { "<" ~ type1 ~ ( "," ~ type1)* ~ ">" } |
| 24 | + |
| 25 | +type = { type1 ~ ( S ~ "/" ~ type1)* } |
| 26 | + |
| 27 | +type1 = { type2 ~ ( S ~ ( rangeop | ctlop ) ~ type2)? } |
| 28 | + |
| 29 | +typename_arg = ${ typename ~ genericarg? } |
| 30 | +groupname_arg = ${ groupname ~ genericarg? } |
| 31 | + |
| 32 | +tag6 = ${ "#" ~ "6" ~ ("." ~ uint)? ~ "(" ~ S ~ type ~ S ~ ")" } |
| 33 | +tag_generic = ${ "#" ~ ASCII_DIGIT ~ ("." ~ uint)? } |
| 34 | + |
| 35 | +type2 = { |
| 36 | + value |
| 37 | + | typename_arg |
| 38 | + | ( "(" ~ type ~ ")" ) |
| 39 | + | ( "{" ~ group ~ "}" ) |
| 40 | + | ( "[" ~ group ~ "]" ) |
| 41 | + | ( "~" ~ typename_arg ) |
| 42 | + | ( "&" ~ "(" ~ group ~ ")" ) |
| 43 | + | ( "&" ~ groupname_arg ) |
| 44 | + | tag6 |
| 45 | + | tag_generic |
| 46 | + | "#" |
| 47 | +} |
| 48 | + |
| 49 | +rangeop = { "..." | ".." } |
| 50 | +ctlop = ${ "." ~ id } |
| 51 | + |
| 52 | +group = { grpchoice ~ ( S ~ "//" ~ grpchoice)* } |
| 53 | + |
| 54 | +grpchoice = { ( grpent ~ ","? )* } |
| 55 | + |
| 56 | +grpent = ${ |
| 57 | + ( (occur ~ S)? ~ (memberkey ~ S)? ~ type ) |
| 58 | + | ( (occur ~ S)? ~ groupname ~ genericarg? ) |
| 59 | + | ( (occur ~ S)? ~ "(" ~ S ~ group ~ S ~ ")" ) |
| 60 | +} |
| 61 | + |
| 62 | +memberkey = { |
| 63 | + ( type1 ~ "^"? ~ "=>" ) |
| 64 | + | ( bareword ~ ":" ) |
| 65 | + | ( value ~ ":" ) |
| 66 | +} |
| 67 | + |
| 68 | +bareword = { id } |
| 69 | + |
| 70 | +occur = { |
| 71 | + ( uint? ~ "*" ~ uint? ) |
| 72 | + | "+" |
| 73 | + | "?" |
| 74 | +} |
| 75 | + |
| 76 | +// ----------------------------------------------------------------------------- |
| 77 | +// Literal Values |
| 78 | + |
| 79 | +/// All Literal Values |
| 80 | +value = { number | text | bytes } |
| 81 | + |
| 82 | +/// Literal Numbers - A float if it has fraction or exponent; int otherwise |
| 83 | +number = { hexfloat | intfloat } |
| 84 | + |
| 85 | +/// Hex floats of the form -0x123.abc0p+12 |
| 86 | +hexfloat = ${ "-"? ~ "0x" ~ ASCII_HEX_DIGIT+ ~ ("." ~ ASCII_HEX_DIGIT+)? ~ "p" ~ exponent } |
| 87 | + |
| 88 | +/// Ints or Int floats |
| 89 | +intfloat = ${ int ~ ("." ~ fraction)? ~ ("e" ~ exponent)? } |
| 90 | + |
| 91 | +/// Fractional part of a number |
| 92 | +fraction = ${ ASCII_DIGIT+ } |
| 93 | + |
| 94 | +/// Exponent for a number |
| 95 | +exponent = ${ ("+" | "-") ~ ASCII_DIGIT+ } |
| 96 | + |
| 97 | +/// All integers, singed and unsigned |
| 98 | +int = ${ "-"? ~ uint } |
| 99 | + |
| 100 | + |
| 101 | +/// Unsigned Integers |
| 102 | +uint = ${ |
| 103 | + ( ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* ) |
| 104 | + | ( "0x" ~ ASCII_HEX_DIGIT+ ) |
| 105 | + | ( "0b" ~ ASCII_BIN_DIGIT+ ) |
| 106 | + | "0" |
| 107 | +} |
| 108 | + |
| 109 | +/// Literal Text |
| 110 | +text = ${ "\"" ~ SCHAR* ~ "\"" } |
| 111 | + |
| 112 | +/// Literal Bytes - Note CDDL Spec incorrectly defines b64''. |
| 113 | +bytes = ${ bytes_hex | bytes_b64 | bytes_text } |
| 114 | +bytes_hex = ${ "h" ~ "'" ~ HEX_PAIR* ~ "'" } |
| 115 | +bytes_b64 = ${ "b64" ~ "'" ~ URL_BASE64 ~ "'" } |
| 116 | +bytes_text = ${ "'" ~ BCHAR* ~ "'" } |
| 117 | + |
| 118 | +// ----------------------------------------------------------------------------- |
| 119 | +// Simple multiple character sequences |
| 120 | + |
| 121 | +/// identifier, called the `name` in the CDDL spec. |
| 122 | +id = ${ |
| 123 | + group_socket | |
| 124 | + type_socket | |
| 125 | + name |
| 126 | +} |
| 127 | + |
| 128 | +/// Special form of a name that represents a Group Socket. |
| 129 | +group_socket = ${ "$$" ~ ( ( "-" | "." )* ~ NAME_END )* } |
| 130 | +/// Special form of a name that represents a Type Socket. |
| 131 | +type_socket = ${ "$" ~ ( ( "-" | "." )* ~ NAME_END )* } |
| 132 | +/// General form of a name. |
| 133 | +name = ${ NAME_START ~ ( ( "-" | "." )* ~ NAME_END )* } |
| 134 | + |
| 135 | +/// A pair of hex digits. (Must always have even numbers of hex digits.) |
| 136 | +HEX_PAIR = _{ S ~ ASCII_HEX_DIGIT ~ S ~ ASCII_HEX_DIGIT ~ S } |
| 137 | + |
| 138 | +/// Whitespace is allowed and is ignored. |
| 139 | +/// This token will keep the whitespace, so it will need to handled when converted to binary. |
| 140 | +URL_BASE64 = _{ S ~ ( URL_BASE64_ALPHA ~ S)* ~ URL_BASE64_PAD? } |
| 141 | + |
| 142 | + |
| 143 | +// ----------------------------------------------------------------------------- |
| 144 | +// Characters, Whitespace and Comments |
| 145 | + |
| 146 | +S = _{ WHITESPACE* } |
| 147 | +WHITESPACE = _{ " " | "\t" | NEWLINE } |
| 148 | +COMMENT = _{ ";" ~ (PCHAR | "\t")* ~ NEWLINE } |
| 149 | + |
| 150 | +// URL Base64 Characterset. |
| 151 | +URL_BASE64_ALPHA = _{ ASCII_ALPHA | ASCII_DIGIT | "-" | "_" } |
| 152 | +// Optional Padding that goes at the end of Base64. |
| 153 | +URL_BASE64_PAD = _{ "~" } |
| 154 | + |
| 155 | +// Identifier Name Character sets. |
| 156 | + |
| 157 | +/// A name can start with an alphabetic character (including "@", "_", "$") |
| 158 | +/// The body of the name can consist of any of the characters from the set |
| 159 | +/// {"A" to "Z", "a" to "z", "0" to "9", "_", "-", "@", ".", "$"} |
| 160 | +// NAME_BODY = _{ NAME_END | "-" | "." } -- Unused Rule |
| 161 | +NAME_START = _{ ASCII_ALPHA | "@" | "_" | "$" } |
| 162 | +/// A name can end with an alphabetic character (including "@", "_", "$") or a digit. |
| 163 | +NAME_END = _{ NAME_START | ASCII_DIGIT } |
| 164 | + |
| 165 | +/// All Visible characters. |
| 166 | +PCHAR = _{ ASCII_VISIBLE | UNICODE_CHAR } |
| 167 | + |
| 168 | +/// The set of characters valid for a text string. |
| 169 | +SCHAR = _{ SCHAR_ASCII_VISIBLE | UNICODE_CHAR | SESC } |
| 170 | + |
| 171 | +/// The set of characters valid for a byte string. |
| 172 | +BCHAR = _{ BCHAR_ASCII_VISIBLE | UNICODE_CHAR | SESC | NEWLINE } |
| 173 | + |
| 174 | +/// Escaping code to allow invalid characters to be used in text or byte strings. |
| 175 | +SESC = ${ "\\" ~ (ASCII_VISIBLE | UNICODE_CHAR) } |
| 176 | + |
| 177 | +/// All Visiable Ascii characters. |
| 178 | +ASCII_VISIBLE = _{ ' '..'~' } |
| 179 | + |
| 180 | +/// Ascii subset valid for text strings. |
| 181 | +SCHAR_ASCII_VISIBLE = _{ ' '..'!' | '#'..'[' | ']'..'~' } |
| 182 | + |
| 183 | +/// Ascii subset valid for byte text strings. |
| 184 | +BCHAR_ASCII_VISIBLE = _{ ' '..'&' | '('..'[' | ']'..'~' } |
| 185 | + |
| 186 | +/// Valid non ascii unicode Characters |
| 187 | +UNICODE_CHAR = _{ '\u{80}'..'\u{10FFFD}' } |
0 commit comments