Skip to content

Commit

Permalink
A bit of cleanup, improve test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
erezsh committed Oct 19, 2021
1 parent d0d67b8 commit 4c1cfb2
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 91 deletions.
3 changes: 1 addition & 2 deletions lark/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from .utils import logger, NO_VALUE
from typing import Mapping, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, \
TYPE_CHECKING
from typing import Mapping, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING

if TYPE_CHECKING:
from .lexer import Token
Expand Down
106 changes: 53 additions & 53 deletions lark/parsers/earley.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ..utils import logger
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal
from .earley_common import Item, TransitiveItem
from .earley_common import Item
from .earley_forest import ForestSumVisitor, SymbolNode, TokenNode, ForestToParseTree

class Parser:
Expand Down Expand Up @@ -169,58 +169,58 @@ def is_quasi_complete(item):
quasi = quasi.advance()
return True

def create_leo_transitives(origin, start):
visited = set()
to_create = []
trule = None
previous = None

### Recursively walk backwards through the Earley sets until we find the
# first transitive candidate. If this is done continuously, we shouldn't
# have to walk more than 1 hop.
while True:
if origin in transitives[start]:
previous = trule = transitives[start][origin]
break

is_empty_rule = not self.FIRST[origin]
if is_empty_rule:
break

candidates = [ candidate for candidate in columns[start] if candidate.expect is not None and origin == candidate.expect ]
if len(candidates) != 1:
break
originator = next(iter(candidates))

if originator is None or originator in visited:
break

visited.add(originator)
if not is_quasi_complete(originator):
break

trule = originator.advance()
if originator.start != start:
visited.clear()

to_create.append((origin, start, originator))
origin = originator.rule.origin
start = originator.start

# If a suitable Transitive candidate is not found, bail.
if trule is None:
return

#### Now walk forwards and create Transitive Items in each set we walked through; and link
# each transitive item to the next set forwards.
while to_create:
origin, start, originator = to_create.pop()
titem = None
if previous is not None:
titem = previous.next_titem = TransitiveItem(origin, trule, originator, previous.column)
else:
titem = TransitiveItem(origin, trule, originator, start)
previous = transitives[start][origin] = titem
# def create_leo_transitives(origin, start):
# visited = set()
# to_create = []
# trule = None
# previous = None

# ### Recursively walk backwards through the Earley sets until we find the
# # first transitive candidate. If this is done continuously, we shouldn't
# # have to walk more than 1 hop.
# while True:
# if origin in transitives[start]:
# previous = trule = transitives[start][origin]
# break

# is_empty_rule = not self.FIRST[origin]
# if is_empty_rule:
# break

# candidates = [ candidate for candidate in columns[start] if candidate.expect is not None and origin == candidate.expect ]
# if len(candidates) != 1:
# break
# originator = next(iter(candidates))

# if originator is None or originator in visited:
# break

# visited.add(originator)
# if not is_quasi_complete(originator):
# break

# trule = originator.advance()
# if originator.start != start:
# visited.clear()

# to_create.append((origin, start, originator))
# origin = originator.rule.origin
# start = originator.start

# # If a suitable Transitive candidate is not found, bail.
# if trule is None:
# return

# #### Now walk forwards and create Transitive Items in each set we walked through; and link
# # each transitive item to the next set forwards.
# while to_create:
# origin, start, originator = to_create.pop()
# titem = None
# if previous is not None:
# titem = previous.next_titem = TransitiveItem(origin, trule, originator, previous.column)
# else:
# titem = TransitiveItem(origin, trule, originator, start)
# previous = transitives[start][origin] = titem



Expand Down
44 changes: 22 additions & 22 deletions lark/parsers/earley_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,25 @@ def __repr__(self):
return '%s (%d)' % (symbol, self.start)


class TransitiveItem(Item):
__slots__ = ('recognized', 'reduction', 'column', 'next_titem')
def __init__(self, recognized, trule, originator, start):
super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start)
self.recognized = recognized
self.reduction = originator
self.column = start
self.next_titem = None
self._hash = hash((self.s, self.start, self.recognized))

def __eq__(self, other):
if not isinstance(other, TransitiveItem):
return False
return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized)

def __hash__(self):
return self._hash

def __repr__(self):
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start)
# class TransitiveItem(Item):
# __slots__ = ('recognized', 'reduction', 'column', 'next_titem')
# def __init__(self, recognized, trule, originator, start):
# super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start)
# self.recognized = recognized
# self.reduction = originator
# self.column = start
# self.next_titem = None
# self._hash = hash((self.s, self.start, self.recognized))

# def __eq__(self, other):
# if not isinstance(other, TransitiveItem):
# return False
# return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized)

# def __hash__(self):
# return self._hash

# def __repr__(self):
# before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
# after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
# return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start)
4 changes: 2 additions & 2 deletions lark/parsers/lalr_interactive_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from copy import copy

from .. import Token
from ..exceptions import UnexpectedToken
from lark.exceptions import UnexpectedToken
from lark.lexer import Token


class InteractiveParser:
Expand Down
6 changes: 0 additions & 6 deletions lark/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,6 @@ def find_data(self, data: str) -> 'Iterator[Tree]':

###}

def expand_kids_by_index(self, *indices: int) -> None:
"""Expand (inline) children at the given indices"""
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
kid = self.children[i]
self.children[i:i+1] = kid.children

def expand_kids_by_data(self, *data_values):
"""Expand (inline) children with any of the given data values. Returns True if anything changed"""
changed = False
Expand Down
15 changes: 9 additions & 6 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,15 @@ def lex(self, text):
self.assertEqual(r, Tree('start', [Token('A', 'A')]))


def test_lexer_token_limit(self):
"Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
g = """start: %s
%s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items()))

p = Lark(g, parser='lalr')



def _make_full_earley_test(LEXER):
def _Lark(grammar, **kwargs):
Expand Down Expand Up @@ -1590,12 +1599,6 @@ def test_join_regex_flags(self):
self.assertRaises(UnexpectedCharacters, g.parse, "C")


def test_lexer_token_limit(self):
"Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
g = _Lark("""start: %s
%s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))

def test_float_without_lexer(self):
expected_error = UnexpectedCharacters if 'dynamic' in LEXER else UnexpectedToken
if PARSER == 'cyk':
Expand Down
7 changes: 7 additions & 0 deletions tests/test_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ class TestTrees(TestCase):
def setUp(self):
self.tree1 = Tree('a', [Tree(x, y) for x, y in zip('bcd', 'xyz')])

def test_eq(self):
assert self.tree1 == self.tree1
assert self.tree1 != 0

def test_copy(self):
assert self.tree1 == copy.copy(self.tree1)

def test_deepcopy(self):
assert self.tree1 == copy.deepcopy(self.tree1)

Expand Down

0 comments on commit 4c1cfb2

Please sign in to comment.