Skip to content

Commit

Permalink
Add edn_format.loads_all to parse all expressions in a string
Browse files Browse the repository at this point in the history
  • Loading branch information
bfontaine committed Sep 27, 2019
1 parent a885ee5 commit c465aa1
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 60 deletions.
3 changes: 2 additions & 1 deletion edn_format/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals

from .edn_lex import Keyword, Symbol
from .edn_parse import parse as loads
from .edn_parse import parse as loads, parse_all as loads_all
from .edn_parse import add_tag, remove_tag, tag, TaggedElement
from .edn_dump import dump as dumps
from .exceptions import EDNDecodeError
Expand All @@ -19,6 +19,7 @@
'add_tag',
'dumps',
'loads',
'loads_all',
'remove_tag',
'tag',
)
18 changes: 15 additions & 3 deletions edn_format/edn_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
if tokens:
pass

start = 'expression'
start = 'expressions'

_serializers = {}

Expand Down Expand Up @@ -180,12 +180,24 @@ def p_error(p):
raise EDNDecodeError(p)


def parse(text, input_encoding='utf-8'):
def parse_all(text, input_encoding='utf-8'):
"""
Parse all objects from the text and return a (possibly empty) list.
"""
if not isinstance(text, unicode):
text = text.decode(input_encoding)

kwargs = ImmutableDict({})
if __debug__:
kwargs = dict(debug=True)
p = ply.yacc.yacc(**kwargs)
return p.parse(text, lexer=lex())
expressions = p.parse(text, lexer=lex())
return list(expressions)


def parse(text, input_encoding='utf-8'):
"""
Parse one object from the text. Return None if the text is empty.
"""
expressions = parse_all(text, input_encoding=input_encoding)
return expressions[0] if expressions else None
119 changes: 63 additions & 56 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,64 +84,67 @@ def test_lexer(self):
def check_parse(self, expected_output, actual_input):
self.assertEqual(expected_output, edn_parse.parse(actual_input))

def check_parse_all(self, expected_output, actual_input):
self.assertEqual(expected_output, edn_parse.parse_all(actual_input))

def check_dumps(self, expected_output, actual_input, **kw):
self.assertEqual(expected_output, dumps(actual_input, **kw))

def test_parser(self):
self.check_parse(1,
"1")
self.check_parse(Symbol("a*b"),
'a*b')
self.check_parse("ab",
'"ab"')
self.check_parse('a"b',
r'"a\"b"')
self.check_parse("blah\n",
'"blah\n"')
self.check_parse([1, 2, 3],
"[1 2 3]")
self.check_parse({1, 2, 3},
"#{1 2 3}")
self.check_parse([1, True, None],
"[1 true nil]")
self.check_parse("c",
r"\c")
self.check_parse("\n",
r"\newline")
self.check_parse(u"Σ",
u"\\Σ")
self.check_parse(u"λ",
r"\u03bB")
self.check_parse(Keyword("abc"),
":abc")
self.check_parse([Keyword("abc"), 1, True, None],
"[:abc 1 true nil]")
self.check_parse((Keyword("abc"), 1, True, None),
"(:abc 1 true nil)")
self.check_parse(tuple(), "()")
self.check_parse(set(), "#{}")
self.check_parse({}, "{}")
self.check_parse([], "[]")
self.check_parse({"a": [1, 2, 3]},
'{"a" [1 2 3]}')
self.check_parse(datetime.datetime(2012, 12, 22, 19, 40, 18, 0,
tzinfo=pytz.utc),
'#inst "2012-12-22T19:40:18Z"')
self.check_parse(datetime.date(2011, 10, 9),
'#inst "2011-10-09"')
self.check_parse("|", "\"|\"")
self.check_parse("%", "\"%\"")
self.check_parse(['bl"ah'], r"""["bl\"ah"]""")
self.check_parse("blah\n", '"blah\n"')
self.check_parse('"', r'"\""')
self.check_parse('\\', r'"\\"')
self.check_parse(["abc", "123"], '["abc", "123"]')
self.check_parse({"key": "value"}, '{"key" "value"}')
self.check_parse(frozenset({ImmutableList([u"ab", u"cd"]),
ImmutableList([u"ef"])}),
'#{["ab", "cd"], ["ef"]}')
self.check_parse(fractions.Fraction(2, 3), "2/3")
self.check_parse((2, Symbol('/'), 3), "(2 / 3)")
def test_parser_single_expressions(self):
for expected, edn_string in (
(1, "1"),
(Symbol("a*b"), 'a*b'),
("ab", '"ab"'),
('a"b', r'"a\"b"'),
("blah\n", '"blah\n"'),
([1, 2, 3], "[1 2 3]"),
({1, 2, 3}, "#{1 2 3}"),
([1, True, None], "[1 true nil]"),
("c", r"\c"),
("\n", r"\newline"),
(u"Σ", u"\\Σ"),
(u"λ", r"\u03bB"),
(Keyword("abc"), ":abc"),
([Keyword("abc"), 1, True, None], "[:abc 1 true nil]"),
((Keyword("abc"), 1, True, None), "(:abc 1 true nil)"),
(tuple(), "()"),
(set(), "#{}"),
({}, "{}"),
([], "[]"),
({"a": [1, 2, 3]}, '{"a" [1 2 3]}'),
(datetime.datetime(2012, 12, 22, 19, 40, 18, 0, tzinfo=pytz.utc),
'#inst "2012-12-22T19:40:18Z"'),
(datetime.date(2011, 10, 9),
'#inst "2011-10-09"'),
("|", "\"|\""),
("%", "\"%\""),
(['bl"ah'], r"""["bl\"ah"]"""),
("blah\n", '"blah\n"'),
('"', r'"\""'),
('\\', r'"\\"'),
(["abc", "123"], '["abc", "123"]'),
({"key": "value"}, '{"key" "value"}'),
(frozenset({ImmutableList([u"ab", u"cd"]), ImmutableList([u"ef"])}),
'#{["ab", "cd"], ["ef"]}'),
(fractions.Fraction(2, 3), "2/3"),
((2, Symbol('/'), 3), "(2 / 3)"),
):
self.check_parse(expected, edn_string)
self.check_parse_all([expected], edn_string)

def test_parser_multiple_expressions(self):
for expected, edn_string in (
([], ""),
([], " ,,,, ,, , "),
([1], ",,,,,,,1,,,,,,,,,"),
([1, 2], "1 2"),
([1, 2], "1 2"),
([True, 42, False, Symbol('end')], "true 42 false end"),
([Symbol("a*b"), 42], 'a*b 42'),
):
self.check_parse_all(expected, edn_string)
if expected:
self.check_parse(expected[0], edn_string)

def check_roundtrip(self, data_input, **kw):
self.assertEqual(data_input, loads(dumps(data_input, **kw)))
Expand All @@ -152,6 +155,10 @@ def check_eof(self, data_input, **kw):

self.assertEqual('EOF Reached', str(ctx.exception))

def check_mismatched_delimiters(self):
for bad_string in ("[", "(", "{", "(((((())", '"', '"\\"'):
self.check_eof(bad_string)

def test_dump(self):
self.check_roundtrip({1, 2, 3})
self.check_roundtrip({1, 2, 3}, sort_sets=True)
Expand Down Expand Up @@ -408,7 +415,7 @@ def test_discard_all(self):
self.assertEqual([1], loads('[1 #_ {}]'.format(edn_data)), edn_data)
self.assertEqual([1], loads('[#_ {} 1]'.format(edn_data)), edn_data)

self.check_eof('#_ {}'.format(edn_data))
self.assertEqual(None, loads('#_ {}'.format(edn_data)))

for coll in ('[%s]', '(%s)', '{%s}', '#{%s}'):
expected = coll % ""
Expand Down

0 comments on commit c465aa1

Please sign in to comment.