Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A bit of cleanup, improve test coverage #1020

Merged
merged 2 commits into from
Oct 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions lark/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from .utils import logger, NO_VALUE
from typing import Mapping, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, \
TYPE_CHECKING
from typing import Mapping, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING

if TYPE_CHECKING:
from .lexer import Token
Expand Down
58 changes: 3 additions & 55 deletions lark/parsers/earley.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ..utils import logger
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal
from .earley_common import Item, TransitiveItem
from .earley_common import Item
from .earley_forest import ForestSumVisitor, SymbolNode, TokenNode, ForestToParseTree

class Parser:
Expand Down Expand Up @@ -169,60 +169,8 @@ def is_quasi_complete(item):
quasi = quasi.advance()
return True

def create_leo_transitives(origin, start):
visited = set()
to_create = []
trule = None
previous = None

### Recursively walk backwards through the Earley sets until we find the
# first transitive candidate. If this is done continuously, we shouldn't
# have to walk more than 1 hop.
while True:
if origin in transitives[start]:
previous = trule = transitives[start][origin]
break

is_empty_rule = not self.FIRST[origin]
if is_empty_rule:
break

candidates = [ candidate for candidate in columns[start] if candidate.expect is not None and origin == candidate.expect ]
if len(candidates) != 1:
break
originator = next(iter(candidates))

if originator is None or originator in visited:
break

visited.add(originator)
if not is_quasi_complete(originator):
break

trule = originator.advance()
if originator.start != start:
visited.clear()

to_create.append((origin, start, originator))
origin = originator.rule.origin
start = originator.start

# If a suitable Transitive candidate is not found, bail.
if trule is None:
return

#### Now walk forwards and create Transitive Items in each set we walked through; and link
# each transitive item to the next set forwards.
while to_create:
origin, start, originator = to_create.pop()
titem = None
if previous is not None:
titem = previous.next_titem = TransitiveItem(origin, trule, originator, previous.column)
else:
titem = TransitiveItem(origin, trule, originator, start)
previous = transitives[start][origin] = titem


# def create_leo_transitives(origin, start):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why leave it as a comment? If we need it later, we still have git history

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll probably forget it even exists..

Now it's almost like a TODO. But perhaps I can replace it with a TODO and a git-ref

# ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420

def scan(i, token, to_scan):
"""The core Earley Scanner.
Expand Down
24 changes: 2 additions & 22 deletions lark/parsers/earley_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,5 @@ def __repr__(self):
return '%s (%d)' % (symbol, self.start)


class TransitiveItem(Item):
__slots__ = ('recognized', 'reduction', 'column', 'next_titem')
def __init__(self, recognized, trule, originator, start):
super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start)
self.recognized = recognized
self.reduction = originator
self.column = start
self.next_titem = None
self._hash = hash((self.s, self.start, self.recognized))

def __eq__(self, other):
if not isinstance(other, TransitiveItem):
return False
return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized)

def __hash__(self):
return self._hash

def __repr__(self):
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start)
# class TransitiveItem(Item):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

# ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420
4 changes: 2 additions & 2 deletions lark/parsers/lalr_interactive_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from copy import copy

from .. import Token
from ..exceptions import UnexpectedToken
from lark.exceptions import UnexpectedToken
from lark.lexer import Token


class InteractiveParser:
Expand Down
6 changes: 0 additions & 6 deletions lark/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,6 @@ def find_data(self, data: str) -> 'Iterator[Tree]':

###}

def expand_kids_by_index(self, *indices: int) -> None:
"""Expand (inline) children at the given indices"""
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
kid = self.children[i]
self.children[i:i+1] = kid.children

def expand_kids_by_data(self, *data_values):
"""Expand (inline) children with any of the given data values. Returns True if anything changed"""
changed = False
Expand Down
15 changes: 9 additions & 6 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,15 @@ def lex(self, text):
self.assertEqual(r, Tree('start', [Token('A', 'A')]))


def test_lexer_token_limit(self):
"Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
g = """start: %s
%s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items()))

p = Lark(g, parser='lalr')



def _make_full_earley_test(LEXER):
def _Lark(grammar, **kwargs):
Expand Down Expand Up @@ -1590,12 +1599,6 @@ def test_join_regex_flags(self):
self.assertRaises(UnexpectedCharacters, g.parse, "C")


def test_lexer_token_limit(self):
"Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
g = _Lark("""start: %s
%s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))

def test_float_without_lexer(self):
expected_error = UnexpectedCharacters if 'dynamic' in LEXER else UnexpectedToken
if PARSER == 'cyk':
Expand Down
7 changes: 7 additions & 0 deletions tests/test_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ class TestTrees(TestCase):
def setUp(self):
self.tree1 = Tree('a', [Tree(x, y) for x, y in zip('bcd', 'xyz')])

def test_eq(self):
assert self.tree1 == self.tree1
assert self.tree1 != 0

def test_copy(self):
assert self.tree1 == copy.copy(self.tree1)

def test_deepcopy(self):
assert self.tree1 == copy.deepcopy(self.tree1)

Expand Down