trailofbits · ESultanik · Jun 9, 2020 · Jun 9, 2020 · Jun 10, 2020 · Jun 10, 2020
@@ -5,7 +5,7 @@
 [![Slack Status](https://slack.empirehacking.nyc/badge.svg)](https://slack.empirehacking.nyc)
 
 Graphtage is a command-line utility and [underlying library](https://trailofbits.github.io/graphtage/latest/library.html)
-for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, plist, and CSS files. Its name is a
+for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, TOML, plist, and CSS files. Its name is a
 portmanteau of “graph” and “graftage”—the latter being the horticultural practice of joining two trees together such
 that they grow as one.
 

@@ -9,7 +9,7 @@
     ast, bounds, builder, constraints, dataclasses, edits, expressions, fibonacci, formatter, levenshtein, matching,
     object_set, pickle, printer, pydiff, search, sequences, tree, utils
 )
-from . import csv, json, xml, yaml, plist
+from . import csv, json, plist, toml, xml, yaml
 
 import inspect
 

@@ -225,6 +225,8 @@ def printer_type(*pos_args, **kwargs):
         mimetypes.suffix_map['.yaml'] = '.yml'
     if '.json5' not in mimetypes.types_map:
         mimetypes.add_type('application/json5', '.json5')
+    if '.toml' not in mimetypes.types_map:
+        mimetypes.add_type('application/toml', '.toml')
     if '.plist' not in mimetypes.types_map:
         mimetypes.add_type('application/x-plist', '.plist')
     if '.pkl' not in mimetypes.types_map and '.pickle' not in mimetypes.types_map:

@@ -0,0 +1,199 @@
+import itertools
+import os
+from typing import Iterator, Optional, Tuple, Union
+
+import toml
+
+from . import json
+from .graphtage import BuildOptions, Filetype, KeyValuePairNode, LeafNode, MappingNode, StringFormatter, StringNode
+from .printer import Printer
+from .sequences import SequenceFormatter
+from .tree import GraphtageFormatter, TreeNode
+
+
+def build_tree(path: str, options: Optional[BuildOptions]) -> TreeNode:
+    with open(path, 'r') as f:
+        return json.build_tree(toml.load(f), options)
+
+
+class TOMLListFormatter(SequenceFormatter):
+    """A sub-formatter for TOML lists."""
+    is_partial = True
+
+    def __init__(self):
+        """Initializes the TOML list formatter.
+
+        Equivalent to::
+
+            super().__init__('[', ']', ',')
+
+        """
+        super().__init__('[', ']', ',')
+
+    def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
+        pass
+
+    def print_ListNode(self, *args, **kwargs):
+        """Prints a :class:`graphtage.ListNode`.
+
+        Equivalent to::
+
+            super().print_SequenceNode(*args, **kwargs)
+
+        """
+        super().print_SequenceNode(*args, **kwargs)
+
+    def print_SequenceNode(self, *args, **kwargs):
+        """Prints a non-List sequence.
+
+        This delegates to the parent formatter's implementation::
+
+            self.parent.print(*args, **kwargs)
+
+        which should invoke :meth:`TOMLFormatter.print`, thereby delegating to the :class:`TOMLDictFormatter` in
+        instances where a list contains a dict (the TOML format doesn't allow this, but it might be necessary if
+        formatting from another format into TOML)
+
+        """
+        self.parent.print(*args, **kwargs)
+
+
+def toml_dumps(obj) -> str:
+    s = toml.dumps({'result': obj})
+    expected_prefix = 'result = '
+    expected_suffix = '\n'
+    assert s.startswith(expected_prefix)
+    assert s.endswith(expected_suffix)
+    return s[len(expected_prefix):-len(expected_suffix)]
+
+
+class TOMLStringFormatter(StringFormatter):
+    """A TOML formatter for strings."""
+    is_partial = True
+
+    def escape(self, c: str) -> str:
+        s = toml_dumps(c)
+        if s.startswith('"') and s.endswith('"'):
+            return s[1:-1]
+        else:
+            return s
+
+
+class TOMLMapping:
+    def __init__(
+            self,
+            mapping: MappingNode,
+            parent: Optional['TOMLMapping'] = None,
+            parent_name: Optional[TreeNode] = None
+    ):
+        self.mapping: MappingNode = mapping
+        self.parent: Optional[TOMLMapping] = parent
+        self.parent_name: Optional[TreeNode] = parent_name
+
+    @property
+    def name_segments(self) -> Tuple[TreeNode, ...]:
+        if self.parent is None:
+            return ()
+        else:
+            return self.parent.name_segments + (self.parent_name,)
+
+    def items(self) -> Iterator[KeyValuePairNode]:
+        inserted = ()
+        if self.mapping.edited and self.mapping.inserted:
+            inserted = self.mapping.inserted
+        for kvp in itertools.chain(self.mapping, inserted):
+            if not isinstance(kvp.value, MappingNode):
+                yield kvp
+
+    def __bool__(self):
+        try:
+            next(self.items())
+            return True
+        except StopIteration:
+            try:
+                next(self.children())
+                return False
+            except StopIteration:
+                return True
+
+    def children(self) -> Iterator['TOMLMapping']:
+        inserted = ()
+        if self.mapping.edited and self.mapping.inserted:
+            inserted = self.mapping.inserted
+        for kvp in itertools.chain(self.mapping, inserted):
+            if isinstance(kvp.value, MappingNode):
+                yield TOMLMapping(mapping=kvp.value, parent=self, parent_name=kvp.key)
+
+
+class TOMLFormatter(GraphtageFormatter):
+    sub_format_types = [TOMLListFormatter, TOMLStringFormatter]
+
+    def print(self, printer: Printer, *args, **kwargs):
+        # TOML has optional indentation; make it only two spaces, if we use it:
+        printer.indent_str = '  '
+        super().print(printer, *args, **kwargs)
+
+    def print_LeafNode(self, printer: Printer, node: LeafNode):
+        printer.write(toml_dumps(node.object))
+
+    def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode):
+        if isinstance(node.key, StringNode):
+            node.key.quoted = False
+        self.print(printer, node.key)
+        printer.write(' = ')
+        if isinstance(node.value, StringNode):
+            node.value.quoted = True
+        self.print(printer, node.value)
+        printer.newline()
+
+    def print_MappingNode(self, printer: Printer, node: MappingNode):
+        mappings = [TOMLMapping(node)]
+        while mappings:
+            m: TOMLMapping = mappings.pop()
+            if m:
+                name = m.name_segments
+                if name:
+                    printer.write('[')
+                    first = True
+                    for s in name:
+                        if first:
+                            first = False
+                        else:
+                            printer.write('.')
+                        if isinstance(s, StringNode):
+                            s.quoted = False
+                        self.print(printer, s)
+                    printer.write(']')
+                    printer.newline()
+                for kvp in m.items():
+                    self.print(printer, kvp)
+                printer.newline()
+            mappings.extend(m.children())
+
+
+class TOML(Filetype):
+    """The TOML filetype."""
+    def __init__(self):
+        """Initializes the TOML filetype.
+
+        TOML identifies itself with the MIME types `application/toml` and `text/toml`.
+
+        """
+        super().__init__(
+            'toml',
+            'application/toml',
+            'text/toml'
+        )
+
+    def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
+        """Equivalent to :func:`build_tree`"""
+        return build_tree(path, options=options)
+
+    def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
+        try:
+            return self.build_tree(path=path, options=options)
+        except (IndexError, TypeError, ValueError) as e:
+            return f'Error parsing {os.path.basename(path)}: {e})'
+
+    def get_default_formatter(self) -> json.JSONFormatter:
+        return TOMLFormatter.DEFAULT_INSTANCE
@@ -33,6 +33,7 @@ dependencies = [
     "numpy>=1.19.4",
     "PyYAML",
     "scipy>=1.4.0",
+    "toml>=0.10.2",
     "tqdm",
     "typing_extensions>=3.7.4.3",
 ]

@@ -7,6 +7,7 @@
 from typing import FrozenSet, Optional, Tuple, Type, Union
 from unittest import TestCase
 
+import toml
 import yaml
 from tqdm import trange
 
@@ -103,11 +104,17 @@ def make_random_non_container(exclude_bytes: FrozenSet[str] = frozenset(), allow
         ])()
 
     @staticmethod
-    def _make_random_obj(obj_stack, force_container_type: Optional[Type[Union[dict, list]]] = None, *args, **kwargs):
+    def _make_random_obj(
+            obj_stack,
+            force_container_type: Optional[Type[Union[dict, list]]] = None,
+            allow_non_container: bool = True,
+            *args,
+            **kwargs
+    ):
         r = random.random()
-        NON_CONTAINER_PROB = 0.1
+        NON_CONTAINER_PROB = [0.0, 0.1][allow_non_container]
         CONTAINER_PROB = (1.0 - NON_CONTAINER_PROB) / 2.0
-        if r <= NON_CONTAINER_PROB:
+        if r <= NON_CONTAINER_PROB and allow_non_container:
             ret = TestFormatting.make_random_non_container(*args, **kwargs)
         elif r <= NON_CONTAINER_PROB + CONTAINER_PROB:
             if force_container_type is not None:
@@ -128,9 +135,18 @@ def make_random_obj(
             force_string_keys: bool = False,
             allow_empty_containers: bool = True,
             alternate_containers: bool = False,
+            lists_can_contain_dicts: bool = True,
+            force_outer_container_type: Optional[Type[Union[dict, list]]] = None,
+            allow_lists: bool = True,
             *args, **kwargs):
         obj_stack = []
-        ret = TestFormatting._make_random_obj(obj_stack, *args, **kwargs)
+        ret = TestFormatting._make_random_obj(
+            obj_stack,
+            force_container_type=force_outer_container_type,
+            allow_non_container=force_outer_container_type is None,
+            *args,
+            **kwargs
+        )
 
         while obj_stack:
             expanding = obj_stack.pop()
@@ -144,7 +160,9 @@ def make_random_obj(
                         expanding[TestFormatting.make_random_non_container(*args, **kwargs)] = \
                             TestFormatting.make_random_non_container(*args, **kwargs)
                 else:
-                    if alternate_containers:
+                    if not allow_lists:
+                        force_container_type = dict
+                    elif alternate_containers:
                         force_container_type = list
                     else:
                         force_container_type = None
@@ -163,7 +181,9 @@ def make_random_obj(
                 if size == 0 and not allow_empty_containers:
                     expanding.append(TestFormatting.make_random_non_container(*args, **kwargs))
                 else:
-                    if alternate_containers:
+                    if not lists_can_contain_dicts and allow_lists:
+                        force_container_type = list
+                    elif alternate_containers:
                         force_container_type = dict
                     else:
                         force_container_type = None
@@ -197,6 +217,22 @@ def test_csv_formatting(self):
             writer.writerow(row)
         return orig_obj, s.getvalue()
 
+    @filetype_test(iterations=200)
+    def test_toml_formatting(self):
+        orig_obj = TestFormatting.make_random_obj(
+            force_string_keys=True,
+            exclude_bytes=frozenset('\t \\\'"\r:[]{}&\n()`|+%<>#*^%$@!~_+-=.,;?/'),
+            allow_empty_containers=False,
+            force_outer_container_type=dict,
+            lists_can_contain_dicts=False,
+            allow_lists=False,
+            allow_empty_strings=False
+        )
+        try:
+            return orig_obj, toml.dumps(orig_obj)
+        except (TypeError, ValueError, IndexError) as e:
+            self.fail(f"""Invalid random TOML object {orig_obj!r}: {e}""")
+
     @staticmethod
     def make_random_xml() -> xml.XMLElementObj:
         ret = xml.XMLElementObj('', {})