diff --git a/lark/common.py b/lark/common.py index d6be890a..71b6a4c1 100644 --- a/lark/common.py +++ b/lark/common.py @@ -8,10 +8,7 @@ from .lexer import Lexer from .grammar import Rule from typing import Union, Type - if sys.version_info >= (3, 8): - from typing import Literal - else: - from typing_extensions import Literal + from typing import Literal if sys.version_info >= (3, 10): from typing import TypeAlias else: diff --git a/lark/lark.py b/lark/lark.py index 6d34aa62..7ae1f240 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -12,14 +12,11 @@ from .parsers.lalr_interactive_parser import InteractiveParser from .tree import ParseTree from .visitors import Transformer - if sys.version_info >= (3, 8): - from typing import Literal - else: - from typing_extensions import Literal + from typing import Literal from .parser_frontends import ParsingFrontend from .exceptions import ConfigurationError, assert_config, UnexpectedInput -from .utils import Serialize, SerializeMemoizer, FS, isascii, logger +from .utils import Serialize, SerializeMemoizer, FS, logger from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, sha256_digest from .tree import Tree from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType @@ -303,7 +300,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: if isinstance(grammar, str): self.source_grammar = grammar if self.options.use_bytes: - if not isascii(grammar): + if not grammar.isascii(): raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") if self.options.cache: diff --git a/lark/tools/__init__.py b/lark/tools/__init__.py index c6995c69..eeb40e10 100644 --- a/lark/tools/__init__.py +++ b/lark/tools/__init__.py @@ -28,9 +28,8 @@ lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times") lalr_argparser.add_argument('-s', '--start', action='append', default=[]) lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('basic', 'contextual')) -encoding: Optional[str] = 'utf-8' if sys.version_info > (3, 4) else None -lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding=encoding), default=sys.stdout, help='the output file (default=stdout)') -lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding=encoding), help='A valid .lark file') +lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding='utf-8'), default=sys.stdout, help='the output file (default=stdout)') +lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding='utf-8'), help='A valid .lark file') for flag in flags: if isinstance(flag, tuple): diff --git a/lark/tree.py b/lark/tree.py index 438837eb..76f8738e 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -9,13 +9,9 @@ import rich except ImportError: pass - if sys.version_info >= (3, 8): - from typing import Literal - else: - from typing_extensions import Literal + from typing import Literal ###{standalone -from collections import OrderedDict class Meta: @@ -140,11 +136,10 @@ def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]': Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). """ queue = [self] - subtrees = OrderedDict() + subtrees = dict() for subtree in queue: subtrees[id(subtree)] = subtree - # Reason for type ignore https://github.com/python/mypy/issues/10999 - queue += [c for c in reversed(subtree.children) # type: ignore[misc] + queue += [c for c in reversed(subtree.children) if isinstance(c, Tree) and id(c) not in subtrees] del queue @@ -242,7 +237,7 @@ def pydot__tree_to_graph(tree: Tree, rankdir="LR", **kwargs): possible attributes, see https://www.graphviz.org/doc/info/attrs.html. """ - import pydot # type: ignore[import] + import pydot # type: ignore[import-not-found] graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) i = [0] diff --git a/lark/utils.py b/lark/utils.py index 70ac27e2..04d6eae2 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -68,7 +68,7 @@ def serialize(self, memo = None) -> Dict[str, Any]: res = {f: _serialize(getattr(self, f), memo) for f in fields} res['__type__'] = type(self).__name__ if hasattr(self, '_serialize'): - self._serialize(res, memo) # type: ignore[attr-defined] + self._serialize(res, memo) return res @classmethod @@ -89,7 +89,7 @@ def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T raise KeyError("Cannot find key for class", cls, e) if hasattr(inst, '_deserialize'): - inst._deserialize() # type: ignore[attr-defined] + inst._deserialize() return inst @@ -141,7 +141,7 @@ def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]: regexp_final = expr try: # Fixed in next version (past 0.960) of typeshed - return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] # type: ignore[attr-defined] + return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] except sre_constants.error: if not _has_regex: raise ValueError(expr) @@ -188,11 +188,7 @@ def dedup_list(l: Sequence[T]) -> List[T]: """Given a list (l) will removing duplicates from the list, preserving the original order of the list. Assumes that the list entries are hashable.""" - dedup = set() - # This returns None, but that's expected - return [x for x in l if not (x in dedup or dedup.add(x))] # type: ignore[func-returns-value] - # 2x faster (ordered in PyPy and CPython 3.6+, guaranteed to be ordered in Python 3.7+) - # return list(dict.fromkeys(l)) + return list(dict.fromkeys(l)) class Enumerator(Serialize): @@ -234,8 +230,7 @@ def combine_alternatives(lists): return list(product(*lists)) try: - # atomicwrites doesn't have type bindings - import atomicwrites # type: ignore[import] + import atomicwrites _has_atomicwrites = True except ImportError: _has_atomicwrites = False @@ -251,19 +246,6 @@ def open(name, mode="r", **kwargs): return open(name, mode, **kwargs) - -def isascii(s: str) -> bool: - """ str.isascii only exists in python3.7+ """ - if sys.version_info >= (3, 7): - return s.isascii() - else: - try: - s.encode('ascii') - return True - except (UnicodeDecodeError, UnicodeEncodeError): - return False - - class fzset(frozenset): def __repr__(self): return '{%s}' % ', '.join(map(repr, self)) diff --git a/lark/visitors.py b/lark/visitors.py index 2d30c53e..18455d9e 100644 --- a/lark/visitors.py +++ b/lark/visitors.py @@ -474,8 +474,7 @@ class _VArgsWrapper: def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]): if isinstance(func, _VArgsWrapper): func = func.base_func - # https://github.com/python/mypy/issues/708 - self.base_func = func # type: ignore[assignment] + self.base_func = func self.visit_wrapper = visit_wrapper update_wrapper(self, func) diff --git a/tests/test_cache.py b/tests/test_cache.py index 24e05bb4..e10a17b6 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -7,17 +7,14 @@ from lark.lexer import Lexer, Token import lark.lark as lark_module -try: - from StringIO import StringIO -except ImportError: - from io import BytesIO as StringIO +from io import BytesIO try: import regex except ImportError: regex = None -class MockFile(StringIO): +class MockFile(BytesIO): def close(self): pass def __enter__(self): diff --git a/tests/test_logger.py b/tests/test_logger.py index e52a2c83..023ad6e9 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -3,10 +3,7 @@ from lark import Lark, logger from unittest import TestCase, main, skipIf -try: - from StringIO import StringIO -except ImportError: - from io import StringIO +from io import StringIO try: import interegular diff --git a/tests/test_parser.py b/tests/test_parser.py index 1ab705b0..f10b2a12 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -7,8 +7,6 @@ import sys from copy import copy, deepcopy -from lark.utils import isascii - from lark import Token, Transformer_NonRecursive, LexError from io import ( @@ -968,7 +966,7 @@ class DualBytesLark: def __init__(self, g, *args, **kwargs): self.text_lexer = Lark(g, *args, use_bytes=False, **kwargs) g = self.text_lexer.grammar_source.lower() - if '\\u' in g or not isascii(g): + if '\\u' in g or not g.isascii(): # Bytes re can't deal with uniode escapes self.bytes_lark = None else: @@ -977,7 +975,7 @@ def __init__(self, g, *args, **kwargs): def parse(self, text, start=None): # TODO: Easy workaround, more complex checks would be beneficial - if not isascii(text) or self.bytes_lark is None: + if not text.isascii() or self.bytes_lark is None: return self.text_lexer.parse(text, start) try: rv = self.text_lexer.parse(text, start) diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py index 0ffca23a..a3cdcbd8 100644 --- a/tests/test_reconstructor.py +++ b/tests/test_reconstructor.py @@ -154,7 +154,6 @@ def test_keep_all_tokens(self): for code in examples: self.assert_reconstruct(g, code, keep_all_tokens=True) - @unittest.skipIf(sys.version_info < (3, 0), "Python 2 does not play well with Unicode.") def test_switch_grammar_unicode_terminal(self): """ This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed