diff --git a/.gitignore b/.gitignore index b9ca6af..72d0940 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ build temp*/ log.html examples/corpus_*.tar.gz +*.egg-info diff --git a/Makefile b/Makefile index 0ae076d..ba7c2f5 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,8 @@ test_pkg: $(EXAMPLES_DIR)/temp_corpus_pkg_$(CORPUS_VERSION) compile test_all: test_quick $(EXAMPLES_DIR)/temp_corpus_gap_$(CORPUS_VERSION) $(EXAMPLES_DIR)/temp_corpus_pkg_$(CORPUS_VERSION) tree-sitter parse '$(EXAMPLES_DIR)/temp_corpus_*/*.g*' --quiet --stat +image-example-parse.svg: grammar.js src/scanner.c ./etc/visualize_parse_tree.py + echo 'G := Group((1, 2, 3), (1, 2)(3, 4)); IsNormal(SymmetricGroup(4), G);' | ./etc/visualize_parse_tree.py -o ./image-example-parse.svg clean: rm -rf $(EXAMPLES_DIR)/temp_* diff --git a/README.md b/README.md index 4a9223d..773afc8 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,21 @@ [tree-sitter](https://github.com/tree-sitter/tree-sitter) grammar for [GAP system](https://www.gap-system.org/) files. -## Want to help complete this? +## Example + +![Example of a parse tree generated with `tree-sitter-gap`](image-example-parse.svg) + +The above is a parse tree generated using the `tree-sitter-gap` grammar for the following code snippet: + +```gap +G := Group((1, 2, 3), (1, 2)(3, 4)); +IsNormal(SymmetricGroup(4), G); +``` + +## Want to help improve this? - Install `tree-sitter` (version >= 0.22.2), [official instructions](https://tree-sitter.github.io/tree-sitter/creating-parsers#installation); - Read ["how to create a parser"](https://tree-sitter.github.io/tree-sitter/creating-parsers); -- Make the existing tests pass; - Resolve the TODOs in source and test files; - Add more missing language features; - Validate by running on the whole `GAP` library and on packages, see [Tests](#tests) section below. diff --git a/etc/extract_g.py b/etc/extract_g.py index ee2a00e..f76b111 100755 --- a/etc/extract_g.py +++ b/etc/extract_g.py @@ -5,6 +5,8 @@ are actually .tst files and files that are passed as input to `ReadAsFunction`. """ +import argparse + def is_tst_file(lines: list[str]) -> bool: """Check if lines correspond to a `tst` file. @@ -94,8 +96,6 @@ def extract_g_lines_from_tst_lines(lines: list[str]) -> list[str]: return result_lines -import argparse - if __name__ == "__main__": parser = argparse.ArgumentParser( description="Extract or fixup GAP code from a .g or .tst file." diff --git a/etc/visualize_parse_tree.py b/etc/visualize_parse_tree.py new file mode 100755 index 0000000..ba16f15 --- /dev/null +++ b/etc/visualize_parse_tree.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +"""Script to visualize parse trees generated by `tree-sitter-gap` + +Make sure to run `python3 -m pip install .` in the project root to install the +python bindings for the development version of the `tree-sitter-gap` grammar. +""" + +import argparse +import tree_sitter_gap as tsgap +from tree_sitter import Language, Parser, Tree, Node +import pydot + + +def traverse_tree(tree: Tree): + cursor = tree.walk() + + nodes: list[Node] = [] + idx_of: dict[Node, int] = {} + visited_children = False + while True: + if not visited_children: + node = cursor.node + assert node is not None + idx_of[node] = len(nodes) + nodes.append(node) + if not cursor.goto_first_child(): + visited_children = True + elif cursor.goto_next_sibling(): + visited_children = False + elif not cursor.goto_parent(): + break + + return nodes, idx_of + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Visualize the parse tree of a file, outputs svg" + ) + parser.add_argument( + "-i", + "--in_file", + type=str, + default=None, + help="Name of the file to process. If omitted, input is taken from stdin instead.", + required=False, + ) + parser.add_argument( + "-o", + "--out_file", + type=str, + help="Name of the output file.", + ) + args = parser.parse_args() + + GAP_LANGUAGE = Language(tsgap.language()) + parser = Parser(GAP_LANGUAGE) + + if args.in_file is None: + text = input().encode("utf-8") + else: + with open(args.in_file, "rb") as in_file: + text = in_file.read() + + tree = parser.parse(text) + nodes, idx_of = traverse_tree(tree) + + dot = pydot.Dot(graph_name="D", graph_type="digraph") + for node_idx, node in enumerate(nodes): + if not node.is_named: + continue + else: + dot.add_node( + pydot.Node( + f"node_{node_idx}", + label=node.type, + ) + ) + + for node_idx, node in enumerate(nodes): + for relative_idx, child in enumerate(node.children): + child_idx = idx_of[child] + if not node.is_named or not child.is_named: + continue + field = node.field_name_for_child(relative_idx) + if field is None: + dot.add_edge(pydot.Edge(f"node_{node_idx}", f"node_{child_idx}")) + else: + dot.add_edge( + pydot.Edge( + f"node_{node_idx}", f"node_{child_idx}", label=f"{field}" + ) + ) + + dot.write_svg(args.out_file, prog="dot") diff --git a/image-example-parse.svg b/image-example-parse.svg new file mode 100644 index 0000000..f84154b --- /dev/null +++ b/image-example-parse.svg @@ -0,0 +1,327 @@ + + + + + + +D + + + +node_0 + +source_file + + + +node_1 + +assignment_statement + + + +node_0->node_1 + + + + + +node_33 + +call + + + +node_0->node_33 + + + + + +node_2 + +identifier + + + +node_1->node_2 + + +left + + + +node_4 + +call + + + +node_1->node_4 + + +right + + + +node_5 + +identifier + + + +node_4->node_5 + + +function + + + +node_6 + +argument_list + + + +node_4->node_6 + + +arguments + + + +node_8 + +permutation_expression + + + +node_6->node_8 + + + + + +node_18 + +permutation_expression + + + +node_6->node_18 + + + + + +node_9 + +permutation_cycle_expression + + + +node_8->node_9 + + + + + +node_11 + +integer + + + +node_9->node_11 + + + + + +node_13 + +integer + + + +node_9->node_13 + + + + + +node_15 + +integer + + + +node_9->node_15 + + + + + +node_19 + +permutation_cycle_expression + + + +node_18->node_19 + + + + + +node_25 + +permutation_cycle_expression + + + +node_18->node_25 + + + + + +node_21 + +integer + + + +node_19->node_21 + + + + + +node_23 + +integer + + + +node_19->node_23 + + + + + +node_27 + +integer + + + +node_25->node_27 + + + + + +node_29 + +integer + + + +node_25->node_29 + + + + + +node_34 + +identifier + + + +node_33->node_34 + + +function + + + +node_35 + +argument_list + + + +node_33->node_35 + + +arguments + + + +node_37 + +call + + + +node_35->node_37 + + + + + +node_44 + +identifier + + + +node_35->node_44 + + + + + +node_38 + +identifier + + + +node_37->node_38 + + +function + + + +node_39 + +argument_list + + + +node_37->node_39 + + +arguments + + + +node_41 + +integer + + + +node_39->node_41 + + + + +