Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[![Slack Status](https://slack.empirehacking.nyc/badge.svg)](https://slack.empirehacking.nyc)

Graphtage is a command-line utility and [underlying library](https://trailofbits.github.io/graphtage/latest/library.html)
for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, plist, and CSS files. Its name is a
for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, TOML, plist, and CSS files. Its name is a
portmanteau of “graph” and “graftage”—the latter being the horticultural practice of joining two trees together such
that they grow as one.

Expand Down
2 changes: 1 addition & 1 deletion graphtage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
ast, bounds, builder, constraints, dataclasses, edits, expressions, fibonacci, formatter, levenshtein, matching,
object_set, pickle, printer, pydiff, search, sequences, tree, utils
)
from . import csv, json, xml, yaml, plist
from . import csv, json, plist, toml, xml, yaml

import inspect

Expand Down
2 changes: 2 additions & 0 deletions graphtage/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ def printer_type(*pos_args, **kwargs):
mimetypes.suffix_map['.yaml'] = '.yml'
if '.json5' not in mimetypes.types_map:
mimetypes.add_type('application/json5', '.json5')
if '.toml' not in mimetypes.types_map:
mimetypes.add_type('application/toml', '.toml')
if '.plist' not in mimetypes.types_map:
mimetypes.add_type('application/x-plist', '.plist')
if '.pkl' not in mimetypes.types_map and '.pickle' not in mimetypes.types_map:
Expand Down
199 changes: 199 additions & 0 deletions graphtage/toml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import itertools
import os
from typing import Iterator, Optional, Tuple, Union

import toml

from . import json
from .graphtage import BuildOptions, Filetype, KeyValuePairNode, LeafNode, MappingNode, StringFormatter, StringNode
from .printer import Printer
from .sequences import SequenceFormatter
from .tree import GraphtageFormatter, TreeNode


def build_tree(path: str, options: Optional[BuildOptions]) -> TreeNode:
with open(path, 'r') as f:
return json.build_tree(toml.load(f), options)


class TOMLListFormatter(SequenceFormatter):
"""A sub-formatter for TOML lists."""
is_partial = True

def __init__(self):
"""Initializes the TOML list formatter.

Equivalent to::

super().__init__('[', ']', ',')

"""
super().__init__('[', ']', ',')

def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False):
pass

def print_ListNode(self, *args, **kwargs):
"""Prints a :class:`graphtage.ListNode`.

Equivalent to::

super().print_SequenceNode(*args, **kwargs)

"""
super().print_SequenceNode(*args, **kwargs)

def print_SequenceNode(self, *args, **kwargs):
"""Prints a non-List sequence.

This delegates to the parent formatter's implementation::

self.parent.print(*args, **kwargs)

which should invoke :meth:`TOMLFormatter.print`, thereby delegating to the :class:`TOMLDictFormatter` in
instances where a list contains a dict (the TOML format doesn't allow this, but it might be necessary if
formatting from another format into TOML)

"""
self.parent.print(*args, **kwargs)


def toml_dumps(obj) -> str:
s = toml.dumps({'result': obj})
expected_prefix = 'result = '
expected_suffix = '\n'
assert s.startswith(expected_prefix)
assert s.endswith(expected_suffix)
return s[len(expected_prefix):-len(expected_suffix)]


class TOMLStringFormatter(StringFormatter):
"""A TOML formatter for strings."""
is_partial = True

def escape(self, c: str) -> str:
s = toml_dumps(c)
if s.startswith('"') and s.endswith('"'):
return s[1:-1]
else:
return s


class TOMLMapping:
def __init__(
self,
mapping: MappingNode,
parent: Optional['TOMLMapping'] = None,
parent_name: Optional[TreeNode] = None
):
self.mapping: MappingNode = mapping
self.parent: Optional[TOMLMapping] = parent
self.parent_name: Optional[TreeNode] = parent_name

@property
def name_segments(self) -> Tuple[TreeNode, ...]:
if self.parent is None:
return ()
else:
return self.parent.name_segments + (self.parent_name,)

def items(self) -> Iterator[KeyValuePairNode]:
inserted = ()
if self.mapping.edited and self.mapping.inserted:
inserted = self.mapping.inserted
for kvp in itertools.chain(self.mapping, inserted):
if not isinstance(kvp.value, MappingNode):
yield kvp

def __bool__(self):
try:
next(self.items())
return True
except StopIteration:
try:
next(self.children())
return False
except StopIteration:
return True

def children(self) -> Iterator['TOMLMapping']:
inserted = ()
if self.mapping.edited and self.mapping.inserted:
inserted = self.mapping.inserted
for kvp in itertools.chain(self.mapping, inserted):
if isinstance(kvp.value, MappingNode):
yield TOMLMapping(mapping=kvp.value, parent=self, parent_name=kvp.key)


class TOMLFormatter(GraphtageFormatter):
sub_format_types = [TOMLListFormatter, TOMLStringFormatter]

def print(self, printer: Printer, *args, **kwargs):
# TOML has optional indentation; make it only two spaces, if we use it:
printer.indent_str = ' '
super().print(printer, *args, **kwargs)

def print_LeafNode(self, printer: Printer, node: LeafNode):
printer.write(toml_dumps(node.object))

def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode):
if isinstance(node.key, StringNode):
node.key.quoted = False
self.print(printer, node.key)
printer.write(' = ')
if isinstance(node.value, StringNode):
node.value.quoted = True
self.print(printer, node.value)
printer.newline()

def print_MappingNode(self, printer: Printer, node: MappingNode):
mappings = [TOMLMapping(node)]
while mappings:
m: TOMLMapping = mappings.pop()
if m:
name = m.name_segments
if name:
printer.write('[')
first = True
for s in name:
if first:
first = False
else:
printer.write('.')
if isinstance(s, StringNode):
s.quoted = False
self.print(printer, s)
printer.write(']')
printer.newline()
for kvp in m.items():
self.print(printer, kvp)
printer.newline()
mappings.extend(m.children())


class TOML(Filetype):
"""The TOML filetype."""
def __init__(self):
"""Initializes the TOML filetype.

TOML identifies itself with the MIME types `application/toml` and `text/toml`.

"""
super().__init__(
'toml',
'application/toml',
'text/toml'
)

def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode:
"""Equivalent to :func:`build_tree`"""
return build_tree(path, options=options)

def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]:
try:
return self.build_tree(path=path, options=options)
except (IndexError, TypeError, ValueError) as e:
return f'Error parsing {os.path.basename(path)}: {e})'

def get_default_formatter(self) -> json.JSONFormatter:
return TOMLFormatter.DEFAULT_INSTANCE
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dependencies = [
"numpy>=1.19.4",
"PyYAML",
"scipy>=1.4.0",
"toml>=0.10.2",
"tqdm",
"typing_extensions>=3.7.4.3",
]
Expand Down
48 changes: 42 additions & 6 deletions test/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import FrozenSet, Optional, Tuple, Type, Union
from unittest import TestCase

import toml
import yaml
from tqdm import trange

Expand Down Expand Up @@ -103,11 +104,17 @@ def make_random_non_container(exclude_bytes: FrozenSet[str] = frozenset(), allow
])()

@staticmethod
def _make_random_obj(obj_stack, force_container_type: Optional[Type[Union[dict, list]]] = None, *args, **kwargs):
def _make_random_obj(
obj_stack,
force_container_type: Optional[Type[Union[dict, list]]] = None,
allow_non_container: bool = True,
*args,
**kwargs
):
r = random.random()
NON_CONTAINER_PROB = 0.1
NON_CONTAINER_PROB = [0.0, 0.1][allow_non_container]
CONTAINER_PROB = (1.0 - NON_CONTAINER_PROB) / 2.0
if r <= NON_CONTAINER_PROB:
if r <= NON_CONTAINER_PROB and allow_non_container:
ret = TestFormatting.make_random_non_container(*args, **kwargs)
elif r <= NON_CONTAINER_PROB + CONTAINER_PROB:
if force_container_type is not None:
Expand All @@ -128,9 +135,18 @@ def make_random_obj(
force_string_keys: bool = False,
allow_empty_containers: bool = True,
alternate_containers: bool = False,
lists_can_contain_dicts: bool = True,
force_outer_container_type: Optional[Type[Union[dict, list]]] = None,
allow_lists: bool = True,
*args, **kwargs):
obj_stack = []
ret = TestFormatting._make_random_obj(obj_stack, *args, **kwargs)
ret = TestFormatting._make_random_obj(
obj_stack,
force_container_type=force_outer_container_type,
allow_non_container=force_outer_container_type is None,
*args,
**kwargs
)

while obj_stack:
expanding = obj_stack.pop()
Expand All @@ -144,7 +160,9 @@ def make_random_obj(
expanding[TestFormatting.make_random_non_container(*args, **kwargs)] = \
TestFormatting.make_random_non_container(*args, **kwargs)
else:
if alternate_containers:
if not allow_lists:
force_container_type = dict
elif alternate_containers:
force_container_type = list
else:
force_container_type = None
Expand All @@ -163,7 +181,9 @@ def make_random_obj(
if size == 0 and not allow_empty_containers:
expanding.append(TestFormatting.make_random_non_container(*args, **kwargs))
else:
if alternate_containers:
if not lists_can_contain_dicts and allow_lists:
force_container_type = list
elif alternate_containers:
force_container_type = dict
else:
force_container_type = None
Expand Down Expand Up @@ -197,6 +217,22 @@ def test_csv_formatting(self):
writer.writerow(row)
return orig_obj, s.getvalue()

@filetype_test(iterations=200)
def test_toml_formatting(self):
orig_obj = TestFormatting.make_random_obj(
force_string_keys=True,
exclude_bytes=frozenset('\t \\\'"\r:[]{}&\n()`|+%<>#*^%$@!~_+-=.,;?/'),
allow_empty_containers=False,
force_outer_container_type=dict,
lists_can_contain_dicts=False,
allow_lists=False,
allow_empty_strings=False
)
try:
return orig_obj, toml.dumps(orig_obj)
except (TypeError, ValueError, IndexError) as e:
self.fail(f"""Invalid random TOML object {orig_obj!r}: {e}""")

@staticmethod
def make_random_xml() -> xml.XMLElementObj:
ret = xml.XMLElementObj('', {})
Expand Down