Drop python3.6 & 3.7 from code

lark-parser · Jun 20, 2024 · 236db15 · 236db15
1 parent bb9b01e
commit 236db15
Show file tree

Hide file tree

Showing 10 changed files with 21 additions and 61 deletions.
diff --git a/lark/common.py b/lark/common.py
@@ -8,10 +8,7 @@
     from .lexer import Lexer
     from .grammar import Rule
     from typing import Union, Type
-    if sys.version_info >= (3, 8):
-        from typing import Literal
-    else:
-        from typing_extensions import Literal
+    from typing import Literal
     if sys.version_info >= (3, 10):
         from typing import TypeAlias
     else:

diff --git a/lark/lark.py b/lark/lark.py
@@ -12,14 +12,11 @@
     from .parsers.lalr_interactive_parser import InteractiveParser
     from .tree import ParseTree
     from .visitors import Transformer
-    if sys.version_info >= (3, 8):
-        from typing import Literal
-    else:
-        from typing_extensions import Literal
+    from typing import Literal
     from .parser_frontends import ParsingFrontend
 
 from .exceptions import ConfigurationError, assert_config, UnexpectedInput
-from .utils import Serialize, SerializeMemoizer, FS, isascii, logger
+from .utils import Serialize, SerializeMemoizer, FS, logger
 from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, sha256_digest
 from .tree import Tree
 from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType
@@ -303,7 +300,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
         if isinstance(grammar, str):
             self.source_grammar = grammar
             if self.options.use_bytes:
-                if not isascii(grammar):
+                if not grammar.isascii():
                     raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
 
             if self.options.cache:

diff --git a/lark/tools/__init__.py b/lark/tools/__init__.py
@@ -28,9 +28,8 @@
 lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times")
 lalr_argparser.add_argument('-s', '--start', action='append', default=[])
 lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('basic', 'contextual'))
-encoding: Optional[str] = 'utf-8' if sys.version_info > (3, 4) else None
-lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding=encoding), default=sys.stdout, help='the output file (default=stdout)')
-lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding=encoding), help='A valid .lark file')
+lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding='utf-8'), default=sys.stdout, help='the output file (default=stdout)')
+lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding='utf-8'), help='A valid .lark file')
 
 for flag in flags:
     if isinstance(flag, tuple):

diff --git a/lark/tree.py b/lark/tree.py
@@ -9,13 +9,9 @@
         import rich
     except ImportError:
         pass
-    if sys.version_info >= (3, 8):
-        from typing import Literal
-    else:
-        from typing_extensions import Literal
+    from typing import Literal
 
 ###{standalone
-from collections import OrderedDict
 
 class Meta:
 
@@ -140,11 +136,10 @@ def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]':
         Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
         """
         queue = [self]
-        subtrees = OrderedDict()
+        subtrees = dict()
         for subtree in queue:
             subtrees[id(subtree)] = subtree
-            # Reason for type ignore https://github.com/python/mypy/issues/10999
-            queue += [c for c in reversed(subtree.children)  # type: ignore[misc]
+            queue += [c for c in reversed(subtree.children)
                       if isinstance(c, Tree) and id(c) not in subtrees]
 
         del queue
@@ -242,7 +237,7 @@ def pydot__tree_to_graph(tree: Tree, rankdir="LR", **kwargs):
     possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
     """
 
-    import pydot  # type: ignore[import]
+    import pydot  # type: ignore[import-not-found]
     graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)
 
     i = [0]

diff --git a/lark/utils.py b/lark/utils.py
@@ -68,7 +68,7 @@ def serialize(self, memo = None) -> Dict[str, Any]:
         res = {f: _serialize(getattr(self, f), memo) for f in fields}
         res['__type__'] = type(self).__name__
         if hasattr(self, '_serialize'):
-            self._serialize(res, memo)  # type: ignore[attr-defined]
+            self._serialize(res, memo)
         return res
 
     @classmethod
@@ -89,7 +89,7 @@ def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T
                 raise KeyError("Cannot find key for class", cls, e)
 
         if hasattr(inst, '_deserialize'):
-            inst._deserialize()  # type: ignore[attr-defined]
+            inst._deserialize()
 
         return inst
 
@@ -141,7 +141,7 @@ def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]:
         regexp_final = expr
     try:
         # Fixed in next version (past 0.960) of typeshed
-        return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]   # type: ignore[attr-defined]
+        return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
     except sre_constants.error:
         if not _has_regex:
             raise ValueError(expr)
@@ -188,11 +188,7 @@ def dedup_list(l: Sequence[T]) -> List[T]:
     """Given a list (l) will removing duplicates from the list,
        preserving the original order of the list. Assumes that
        the list entries are hashable."""
-    dedup = set()
-    # This returns None, but that's expected
-    return [x for x in l if not (x in dedup or dedup.add(x))]  # type: ignore[func-returns-value]
-    # 2x faster (ordered in PyPy and CPython 3.6+, guaranteed to be ordered in Python 3.7+)
-    # return list(dict.fromkeys(l))
+    return list(dict.fromkeys(l))
 
 
 class Enumerator(Serialize):
@@ -234,8 +230,7 @@ def combine_alternatives(lists):
     return list(product(*lists))
 
 try:
-    # atomicwrites doesn't have type bindings
-    import atomicwrites     # type: ignore[import]
+    import atomicwrites
     _has_atomicwrites = True
 except ImportError:
     _has_atomicwrites = False
@@ -251,19 +246,6 @@ def open(name, mode="r", **kwargs):
             return open(name, mode, **kwargs)
 
 
-
-def isascii(s: str) -> bool:
-    """ str.isascii only exists in python3.7+ """
-    if sys.version_info >= (3, 7):
-        return s.isascii()
-    else:
-        try:
-            s.encode('ascii')
-            return True
-        except (UnicodeDecodeError, UnicodeEncodeError):
-            return False
-
-
 class fzset(frozenset):
     def __repr__(self):
         return '{%s}' % ', '.join(map(repr, self))

diff --git a/lark/visitors.py b/lark/visitors.py
@@ -474,8 +474,7 @@ class _VArgsWrapper:
     def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]):
         if isinstance(func, _VArgsWrapper):
             func = func.base_func
-        # https://github.com/python/mypy/issues/708
-        self.base_func = func  # type: ignore[assignment]
+        self.base_func = func
         self.visit_wrapper = visit_wrapper
         update_wrapper(self, func)
 

diff --git a/tests/test_cache.py b/tests/test_cache.py
@@ -7,17 +7,14 @@
 from lark.lexer import Lexer, Token
 import lark.lark as lark_module
 
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import BytesIO as StringIO
+from io import BytesIO
 
 try:
     import regex
 except ImportError:
     regex = None
 
-class MockFile(StringIO):
+class MockFile(BytesIO):
     def close(self):
         pass
     def __enter__(self):

diff --git a/tests/test_logger.py b/tests/test_logger.py
@@ -3,10 +3,7 @@
 from lark import Lark, logger
 from unittest import TestCase, main, skipIf
 
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
+from io import StringIO
 
 try:
     import interegular

diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -7,8 +7,6 @@
 import sys
 from copy import copy, deepcopy
 
-from lark.utils import isascii
-
 from lark import Token, Transformer_NonRecursive, LexError
 
 from io import (
@@ -968,7 +966,7 @@ class DualBytesLark:
     def __init__(self, g, *args, **kwargs):
         self.text_lexer = Lark(g, *args, use_bytes=False, **kwargs)
         g = self.text_lexer.grammar_source.lower()
-        if '\\u' in g or not isascii(g):
+        if '\\u' in g or not g.isascii():
             # Bytes re can't deal with uniode escapes
             self.bytes_lark = None
         else:
@@ -977,7 +975,7 @@ def __init__(self, g, *args, **kwargs):
 
     def parse(self, text, start=None):
         # TODO: Easy workaround, more complex checks would be beneficial
-        if not isascii(text) or self.bytes_lark is None:
+        if not text.isascii() or self.bytes_lark is None:
             return self.text_lexer.parse(text, start)
         try:
             rv = self.text_lexer.parse(text, start)

diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py
@@ -154,7 +154,6 @@ def test_keep_all_tokens(self):
         for code in examples:
             self.assert_reconstruct(g, code, keep_all_tokens=True)
 
-    @unittest.skipIf(sys.version_info < (3, 0), "Python 2 does not play well with Unicode.")
     def test_switch_grammar_unicode_terminal(self):
         """
         This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed