Merge pull request #340 from bvssvni/indent

Added indention support
PistonDevelopers · Apr 10, 2021 · df200ca · df200ca
2 parents 57b2ba3 + 15377eb
commit df200ca
Show file tree

Hide file tree

Showing 20 changed files with 592 additions and 88 deletions.
diff --git a/README.md b/README.md
@@ -1,15 +1,6 @@
-# piston_meta
+# Piston-Meta
 A DSL parsing library for human readable text documents
 
-[![Travis](https://img.shields.io/travis/PistonDevelopers/meta.svg?style=flat-square)](https://travis-ci.org/PistonDevelopers/meta)
-[![Crates.io](https://img.shields.io/crates/v/piston_meta.svg?style=flat-square)](https://crates.io/crates/piston_meta)
-
-[Documentation](https://docs.rs/piston_meta/)
-
-[Why Piston-Meta?](https://github.com/PistonDevelopers/meta/issues/1)
-
-[self-syntax](https://raw.githubusercontent.com/PistonDevelopers/meta/master/assets/self-syntax.txt)
-
 ### Introduction
 
 Piston-Meta makes it easy to write parsers for human readable text documents.
@@ -63,6 +54,7 @@ If you get an error `#4003`, then it was caused by a rule in the node starting w
 |Rule|Description|
 |----|-----------|
 |.l(rule)|Separates sub rule with lines.|
+|.l+(rule)|Separates sub rule with lines, with indention (whitespace sensitive)|
 |.r?(rule)|Repeats sub rule until it fails, allows zero repetitions.|
 |.r!(rule)|Repeats sub rule until it fails, requires at least one repetition.|
 |...any_characters?:name|Reads a string until any characters, allows zero characters. Name is optional.|

diff --git a/assets/self-syntax-1.0.txt b/assets/self-syntax-1.0.txt
@@ -0,0 +1,164 @@
+/* PISTON-META syntax
+
+Piston-Meta is a high level meta language for transforming text into a tree.
+It is designed for rapid prototyping and infrastructure in game engines.
+Developed and maintained as part of the Piston project.
+
+Strings:
+
+    You can reuse strings in the rules.
+    All strings start with `_`.
+
+Built-in rules:
+
+    All built-in rules start with `.`.
+
+    .w      Whitespace.
+            `.w?` is optional and `.w!` is required.
+
+    .t      Text string.
+            `.t?` allows empty string and `.t!` disallow empty string.
+            `.t?:"message"` generates a meta string with name "message".
+
+    .$      Floating number of double precision (64 bit).
+            `.$_` allows underscore `_` as visible seperator, eg. `1_000`
+            `.$:"message"` generates a meta number with name "message".
+
+    .._any   Reads until whitespace or any of the characters in the string.
+            `.._any?` allows empty
+
+    ..._any  Reads until any of the characters in the string.
+             `..._any?` allows empty and `..._any!` disallows empty.
+
+Tokens:
+
+    A token is a sequence of characters defined by a string.
+    Generates boolean values `true` or `false`.
+
+    "hello"     Expects "hello".
+                `"hello":"message"` generates `true` with name "message".
+                `"hello":!"message"` generates `false` with name "message".
+
+    !"hello"    Expects anything but "hello".
+                `!"hello":"message"` generates `true` with name "message".
+                `!"hello":!"message"` generates `false` with name "message".
+
+Composition rules:
+
+    Rules are separated using whitespace, e.g. `[a b c]` or `{a b c}`.
+
+    [...]       Parses a sequence of rules.
+
+    {...}       Tries the first rule, then the second if the first fails etc.
+
+    ?           Parses a rule optionally.
+                `?$:"value"` generates a number with name "value", if any.
+
+    !           Fails if rule gets parsed.
+
+    .s          Separates a rule by another rule.
+                `.s?.("," $)`   Allows zero repetitions, allows trailing.
+                `.s?("," $)`    Allows zero repetitions, no trailing.
+                `.s!.("," $)`   At least one repetition, allows trailing.
+                `.s!("," $)`    At least one repetition, no trailing.
+
+    .r          Repeats a rule until it fails parsing.
+                `.r?("ha")`     Allows zero repetitions.
+                `.r!("ha")`     At least one repetition.
+
+    .l          Separates a rule by one or more new lines.
+                Handles edge cases nicely for the intended use.
+                `.l($)`         List of numbers, one per line.
+
+Generating meta data:
+
+    The following parses the sentences "hi James!" and "hi Peter!".
+
+        1 say_hi = ["hi" .w! {"James" "Peter"} "!"]
+        2 document = say_hi
+
+    To generate data, we need to assign `say_hi` to a message.
+
+        1 say_hi = ["hi" .w! {"James":"james" "Peter":"peter"}]
+        2 document = say_hi:"say_hi"
+
+    The sentence "hi James!" then generates the equivalent JSON:
+
+        {
+            "say_hi": {
+                "james": true
+            }
+        }
+
+    By removing `:"say_hi"`, you can "lift" the sub data up one level:
+
+        {
+            "james":true
+        }
+
+    Numbers in front of the rules are used to improve error reporting.
+    For example, the following was reported by the 1st rule and 6th sub rule.
+
+        Error #1006, Expected: `!`
+        1,9: hi James
+        1,9:         ^
+
+    The last node is used to parse the entire document.
+
+Each sub rule in the node is assigned a debug id used in error reporting.
+The debug ids for a sub rule starts with `1000n` where `n` is the id.
+
+*/
+
+_opt: "optional"
+_inv: "inverted"
+_prop: "property"
+_any: "any_characters"
+_seps: "[]{}():.!?\""
+0 multi_line_comment = ["/*" ..."*/"? .r?({
+    [!"*/" "*" ..."*/"?]
+    [multi_line_comment ..."*/"?]
+    ["/" ..."*/"?]
+}) "*/"]
+1 comment = {multi_line_comment ["//" ..."\n"?]}
+2 string = ["_" .._seps!:"name" ":" .w? .t?:"text"]
+3 node = [.$:"id" .w! !"_" !"." .._seps!:"name" .w? "=" .w? rule:"rule"]
+4 set = {.t!:"value" ["_" .._seps!:"ref"]}
+5 set_opt = {.t?:"value" ["_" .._seps!:"ref"]}
+6 opt = {"?":_opt "!":!_opt}
+7 number = [".$" ?"_":"underscore" ?[":" set:_prop]]
+8 text = [".t" {"?":"allow_empty" "!":!"allow_empty"} ?[":" set:_prop]]
+9 reference = [!"_" !"." .._seps!:"name" ?[":" set:_prop]]
+10 sequence = ["[" .w? .s!.(.w! rule:"rule") "]"]
+11 select = ["{" .w? .s!.(.w! rule:"rule") "}"]
+12 separated_by = [".s" opt ?".":"allow_trail"
+  "(" .w? rule:"by" .w! rule:"rule" .w? ")"]
+13 tag = [?"!":"not" set:"text" ?[":" ?"!":_inv set:_prop]]
+14 optional = ["?" rule:"rule"]
+15 not = ["!" rule:"rule"]
+16 whitespace = [".w" opt]
+17 until_any_or_whitespace = [".." set_opt:_any opt ?[":" set:_prop]]
+18 until_any = ["..." set_opt:_any opt ?[":" set:_prop]]
+19 repeat = [".r" opt "(" rule:"rule" ")"]
+20 lines = [".l(" .w? rule:"rule" .w? ")"]
+21 rule = {
+  whitespace:"whitespace"
+  until_any_or_whitespace:"until_any_or_whitespace"
+  until_any:"until_any"
+  lines:"lines"
+  repeat:"repeat"
+  number:"number"
+  text:"text"
+  reference:"reference"
+  sequence:"sequence"
+  select:"select"
+  separated_by:"separated_by"
+  tag:"tag"
+  optional:"optional"
+  not:"not"
+}
+22 document = [
+    .l([.w? {string:"string" comment}])
+    .l([.w? {node:"node" comment}])
+    .w?
+]
diff --git a/assets/self-syntax.txt b/assets/self-syntax.txt
@@ -140,7 +140,7 @@ _seps: "[]{}():.!?\""
 17 until_any_or_whitespace = [".." set_opt:_any opt ?[":" set:_prop]]
 18 until_any = ["..." set_opt:_any opt ?[":" set:_prop]]
 19 repeat = [".r" opt "(" rule:"rule" ")"]
-20 lines = [".l(" .w? rule:"rule" .w? ")"]
+20 lines = [".l" ?"+":"indent" "(" .w? rule:"rule" .w? ")"]
 21 rule = {
   whitespace:"whitespace"
   until_any_or_whitespace:"until_any_or_whitespace"

diff --git a/examples/indent.rs b/examples/indent.rs
@@ -0,0 +1,35 @@
+extern crate piston_meta;
+
+use piston_meta::*;
+
+fn main() {
+    let text = r#"
+1
+ 2
+  3
+  4
+ 5
+  6
+    "#;
+    let rules = r#"
+        2 node = [.$:"num" .l+(node:"node")]
+        1 document = [.w? node:"node" .w?]
+    "#;
+    // Parse rules with meta language and convert to rules for parsing text.
+    let rules = match syntax_errstr(rules) {
+        Err(err) => {
+            println!("{}", err);
+            return;
+        }
+        Ok(rules) => rules
+    };
+    let mut data = vec![];
+    match parse_errstr(&rules, text, &mut data) {
+        Err(err) => {
+            println!("{}", err);
+            return;
+        }
+        Ok(()) => {}
+    };
+    json::print(&data);
+}
diff --git a/src/bootstrap/convert.rs b/src/bootstrap/convert.rs
@@ -680,6 +680,10 @@ pub fn convert(
         let start = convert.clone();
         let range = convert.start_node("lines")?;
         convert.update(range);
+        let indent = if let Ok((range, val)) = convert.meta_bool("indent") {
+            convert.update(range);
+            val
+        } else {false};
         let (range, rule) = read_rule(
             debug_id, "rule", convert, strings, ignored
         )?;
@@ -690,7 +694,8 @@ pub fn convert(
         Ok((convert.subtract(start),
         Rule::Lines(Box::new(Lines {
             debug_id: *debug_id,
-            rule: rule,
+            rule,
+            indent,
         }))))
     }
 

diff --git a/src/bootstrap/rules.rs b/src/bootstrap/rules.rs
@@ -876,33 +876,50 @@ pub fn rules() -> Syntax {
         ]
     });
 
-    // 20 lines = [".l(" .w? rule:"rule" .w? ")"]
+    // 20 lines = [".l" ?"+":"indent" "(" .w? rule:"rule" .w? ")"]
     let lines_rule = Rule::Sequence(Sequence {
         debug_id: 20001,
         args: vec![
             Rule::Tag(Tag {
                 debug_id: 20002,
-                text: Arc::new(".l(".into()),
+                text: Arc::new(".l".into()),
                 not: false,
                 inverted: false,
                 property: None,
             }),
-            Rule::Whitespace(Whitespace {
+            Rule::Optional(Box::new(Optional {
                 debug_id: 20003,
+                rule: Rule::Tag(Tag {
+                    debug_id: 20004,
+                    text: Arc::new("+".into()),
+                    not: false,
+                    inverted: false,
+                    property: Some(Arc::new("indent".into())),
+                }),
+            })),
+            Rule::Tag(Tag {
+                debug_id: 20005,
+                text: Arc::new("(".into()),
+                not: false,
+                inverted: false,
+                property: None
+            }),
+            Rule::Whitespace(Whitespace {
+                debug_id: 20006,
                 optional: true,
             }),
             Rule::Node(Node {
-                debug_id: 20004,
+                debug_id: 20007,
                 name: Arc::new("rule".into()),
                 index: None,
                 property: Some(Arc::new("rule".into())),
             }),
             Rule::Whitespace(Whitespace {
-                debug_id: 20005,
+                debug_id: 20008,
                 optional: true,
             }),
             Rule::Tag(Tag {
-                debug_id: 20006,
+                debug_id: 20009,
                 text: Arc::new(")".into()),
                 not: false,
                 inverted: false,
@@ -1030,6 +1047,7 @@ pub fn rules() -> Syntax {
         args: vec![
             Rule::Lines(Box::new(Lines {
                 debug_id: 22002,
+                indent: false,
                 rule: Rule::Sequence(Sequence {
                     debug_id: 22003,
                     args: vec![
@@ -1059,6 +1077,7 @@ pub fn rules() -> Syntax {
             })),
             Rule::Lines(Box::new(Lines {
                 debug_id: 22008,
+                indent: false,
                 rule: Rule::Sequence(Sequence {
                     debug_id: 22009,
                     args: vec![

diff --git a/src/lib.rs b/src/lib.rs
@@ -56,6 +56,7 @@
 //! |Rule|Description|
 //! |----|-----------|
 //! |.l(rule)|Separates sub rule with lines.|
+//! |.l+(rule)|Separates sub rule with lines, with indention (whitespace sensitive)|
 //! |.r?(rule)|Repeats sub rule until it fails, allows zero repetitions.|
 //! |.r!(rule)|Repeats sub rule until it fails, requires at least one repetition.|
 //! |...any_characters?:name|Reads a string until any characters, allows zero characters. Name is optional.|
@@ -136,7 +137,13 @@ pub use parse_error_handler::{
     ParseErrorHandler
 };
 pub use parse_error::ParseError;
-pub use meta_rules::{ parse, parse_errstr, Rule };
+pub use meta_rules::{
+    parse,
+    parse_errstr,
+    parse_errstr_with_indent,
+    parse_with_indent,
+    Rule
+};
 pub use bootstrap::Convert;
 
 /// The type of debug id used to track down errors in rules.

diff --git a/src/meta_rules/fast_select.rs b/src/meta_rules/fast_select.rs
@@ -4,6 +4,7 @@ use read_token::ReadToken;
 use super::{
     ret_err,
     err_update,
+    IndentSettings,
     ParseResult,
 };
 use {
@@ -45,7 +46,8 @@ impl FastSelect {
         tokens: &mut Vec<Range<MetaData>>,
         state: &TokenizerState,
         read_token: &ReadToken,
-        refs: &[Rule]
+        refs: &[Rule],
+        indent_settings: &mut IndentSettings,
     ) -> ParseResult<TokenizerState> {
         if self.tail {
             if let Some(ch) = read_token.src.chars().next() {
@@ -55,7 +57,7 @@ impl FastSelect {
                 let ind = self.table[buf[0] as usize];
                 if ind != 255 {
                     let sub_rule = &self.args[ind as usize];
-                    match sub_rule.parse(tokens, state, read_token, refs) {
+                    match sub_rule.parse(tokens, state, read_token, refs, indent_settings) {
                         Ok((range, state, err)) => {
                             err_update(err, &mut opt_error);
                             return Ok((read_token.peek(range.length),
@@ -67,7 +69,7 @@ impl FastSelect {
                     }
                 }
                 let sub_rule = &self.args[self.args.len()-1];
-                match sub_rule.parse(tokens, state, read_token, refs) {
+                match sub_rule.parse(tokens, state, read_token, refs, indent_settings) {
                     Ok((range, state, err)) => {
                         err_update(err, &mut opt_error);
                         Ok((read_token.peek(range.length),
@@ -88,7 +90,7 @@ impl FastSelect {
                 let ind = self.table[buf[0] as usize];
                 if ind != 255 {
                     let sub_rule = &self.args[ind as usize];
-                    match sub_rule.parse(tokens, state, read_token, refs) {
+                    match sub_rule.parse(tokens, state, read_token, refs, indent_settings) {
                         Ok((range, state, err)) => {
                             Ok((read_token.peek(range.length),
                                 state, err))