Merge pull request #1430 from MegaIng/update-config

Update config and drop python < 3.8
lark-parser · Jun 22, 2024 · 13a97aa · 13a97aa
2 parents 8611d69 + 964ce00
commit 13a97aa
Show file tree

Hide file tree

Showing 16 changed files with 41 additions and 80 deletions.
diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
@@ -35,6 +35,6 @@ jobs:
         flags: unittests
         env_vars: OS,PYTHON
         name: codecov-umbrella
-        fail_ci_if_error: true
+        fail_ci_if_error: false
         path_to_write_report: ./coverage/codecov_report.txt
         verbose: true
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -3,11 +3,10 @@ on: [push, pull_request]
 
 jobs:
   build:
-    # runs-on: ubuntu-latest
-    runs-on: ubuntu-20.04   # See https://github.com/actions/setup-python/issues/544
+    runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "pypy-3.7"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev", "pypy-3.10"]
 
     steps:
       - uses: actions/checkout@v3

diff --git a/lark/__init__.py b/lark/__init__.py
@@ -14,7 +14,7 @@
 from .utils import logger
 from .visitors import Discard, Transformer, Transformer_NonRecursive, Visitor, v_args
 
-__version__: str = "1.1.9"
+__version__: str = "1.2.0"
 
 __all__ = (
     "GrammarError",

diff --git a/lark/common.py b/lark/common.py
@@ -8,10 +8,7 @@
     from .lexer import Lexer
     from .grammar import Rule
     from typing import Union, Type
-    if sys.version_info >= (3, 8):
-        from typing import Literal
-    else:
-        from typing_extensions import Literal
+    from typing import Literal
     if sys.version_info >= (3, 10):
         from typing import TypeAlias
     else:

diff --git a/lark/lark.py b/lark/lark.py
@@ -12,14 +12,11 @@
     from .parsers.lalr_interactive_parser import InteractiveParser
     from .tree import ParseTree
     from .visitors import Transformer
-    if sys.version_info >= (3, 8):
-        from typing import Literal
-    else:
-        from typing_extensions import Literal
+    from typing import Literal
     from .parser_frontends import ParsingFrontend
 
 from .exceptions import ConfigurationError, assert_config, UnexpectedInput
-from .utils import Serialize, SerializeMemoizer, FS, isascii, logger
+from .utils import Serialize, SerializeMemoizer, FS, logger
 from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, sha256_digest
 from .tree import Tree
 from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType
@@ -303,7 +300,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
         if isinstance(grammar, str):
             self.source_grammar = grammar
             if self.options.use_bytes:
-                if not isascii(grammar):
+                if not grammar.isascii():
                     raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
 
             if self.options.cache:

diff --git a/lark/tools/__init__.py b/lark/tools/__init__.py
@@ -28,9 +28,8 @@
 lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times")
 lalr_argparser.add_argument('-s', '--start', action='append', default=[])
 lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('basic', 'contextual'))
-encoding: Optional[str] = 'utf-8' if sys.version_info > (3, 4) else None
-lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding=encoding), default=sys.stdout, help='the output file (default=stdout)')
-lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding=encoding), help='A valid .lark file')
+lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding='utf-8'), default=sys.stdout, help='the output file (default=stdout)')
+lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding='utf-8'), help='A valid .lark file')
 
 for flag in flags:
     if isinstance(flag, tuple):

diff --git a/lark/tree.py b/lark/tree.py
@@ -9,13 +9,9 @@
         import rich
     except ImportError:
         pass
-    if sys.version_info >= (3, 8):
-        from typing import Literal
-    else:
-        from typing_extensions import Literal
+    from typing import Literal
 
 ###{standalone
-from collections import OrderedDict
 
 class Meta:
 
@@ -140,11 +136,10 @@ def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]':
         Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
         """
         queue = [self]
-        subtrees = OrderedDict()
+        subtrees = dict()
         for subtree in queue:
             subtrees[id(subtree)] = subtree
-            # Reason for type ignore https://github.com/python/mypy/issues/10999
-            queue += [c for c in reversed(subtree.children)  # type: ignore[misc]
+            queue += [c for c in reversed(subtree.children)
                       if isinstance(c, Tree) and id(c) not in subtrees]
 
         del queue
@@ -242,7 +237,7 @@ def pydot__tree_to_graph(tree: Tree, rankdir="LR", **kwargs):
     possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
     """
 
-    import pydot  # type: ignore[import]
+    import pydot  # type: ignore[import-not-found]
     graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)
 
     i = [0]

diff --git a/lark/utils.py b/lark/utils.py
@@ -68,7 +68,7 @@ def serialize(self, memo = None) -> Dict[str, Any]:
         res = {f: _serialize(getattr(self, f), memo) for f in fields}
         res['__type__'] = type(self).__name__
         if hasattr(self, '_serialize'):
-            self._serialize(res, memo)  # type: ignore[attr-defined]
+            self._serialize(res, memo)
         return res
 
     @classmethod
@@ -89,7 +89,7 @@ def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T
                 raise KeyError("Cannot find key for class", cls, e)
 
         if hasattr(inst, '_deserialize'):
-            inst._deserialize()  # type: ignore[attr-defined]
+            inst._deserialize()
 
         return inst
 
@@ -141,7 +141,7 @@ def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]:
         regexp_final = expr
     try:
         # Fixed in next version (past 0.960) of typeshed
-        return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]   # type: ignore[attr-defined]
+        return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
     except sre_constants.error:
         if not _has_regex:
             raise ValueError(expr)
@@ -188,11 +188,7 @@ def dedup_list(l: Sequence[T]) -> List[T]:
     """Given a list (l) will removing duplicates from the list,
        preserving the original order of the list. Assumes that
        the list entries are hashable."""
-    dedup = set()
-    # This returns None, but that's expected
-    return [x for x in l if not (x in dedup or dedup.add(x))]  # type: ignore[func-returns-value]
-    # 2x faster (ordered in PyPy and CPython 3.6+, guaranteed to be ordered in Python 3.7+)
-    # return list(dict.fromkeys(l))
+    return list(dict.fromkeys(l))
 
 
 class Enumerator(Serialize):
@@ -234,8 +230,7 @@ def combine_alternatives(lists):
     return list(product(*lists))
 
 try:
-    # atomicwrites doesn't have type bindings
-    import atomicwrites     # type: ignore[import]
+    import atomicwrites
     _has_atomicwrites = True
 except ImportError:
     _has_atomicwrites = False
@@ -251,19 +246,6 @@ def open(name, mode="r", **kwargs):
             return open(name, mode, **kwargs)
 
 
-
-def isascii(s: str) -> bool:
-    """ str.isascii only exists in python3.7+ """
-    if sys.version_info >= (3, 7):
-        return s.isascii()
-    else:
-        try:
-            s.encode('ascii')
-            return True
-        except (UnicodeDecodeError, UnicodeEncodeError):
-            return False
-
-
 class fzset(frozenset):
     def __repr__(self):
         return '{%s}' % ', '.join(map(repr, self))

diff --git a/lark/visitors.py b/lark/visitors.py
@@ -474,8 +474,7 @@ class _VArgsWrapper:
     def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]):
         if isinstance(func, _VArgsWrapper):
             func = func.base_func
-        # https://github.com/python/mypy/issues/708
-        self.base_func = func  # type: ignore[assignment]
+        self.base_func = func
         self.visit_wrapper = visit_wrapper
         update_wrapper(self, func)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -17,7 +17,7 @@ classifiers = [
     "Topic :: Text Processing :: Linguistic",
     "License :: OSI Approved :: MIT License",
 ]
-requires-python = ">=3.6"
+requires-python = ">=3.8"
 dependencies = []
 dynamic = ["version"]
 
@@ -41,7 +41,7 @@ Main Features:
 - Import grammars from Nearley.js
 - Extensive test suite
 - And much more!
-Since version 1.0, only Python versions 3.6 and up are supported."""
+Since version 1.2, only Python versions 3.8 and up are supported."""
 content-type = "text/markdown"
 
 [project.urls]
@@ -76,9 +76,9 @@ version = {attr = "lark.__version__"}
 
 [tool.mypy]
 files = "lark"
-python_version = "3.6"
+python_version = "3.8"
 show_error_codes = true
-enable_error_code = ["ignore-without-code"]
+enable_error_code = ["ignore-without-code", "unused-ignore"]
 exclude = [
   "^lark/__pyinstaller",
 ]
@@ -95,3 +95,11 @@ exclude_lines = [
 ]
 [tool.pyright]
 include = ["lark"]
+
+[tool.pytest.ini_options]
+minversion = 6.0
+addopts = "-ra -q"
+testpaths =[
+    "tests"
+]
+python_files = "__main__.py"
diff --git a/pytest.ini b/pytest.ini
diff --git a/tests/test_cache.py b/tests/test_cache.py
@@ -7,17 +7,14 @@
 from lark.lexer import Lexer, Token
 import lark.lark as lark_module
 
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import BytesIO as StringIO
+from io import BytesIO
 
 try:
     import regex
 except ImportError:
     regex = None
 
-class MockFile(StringIO):
+class MockFile(BytesIO):
     def close(self):
         pass
     def __enter__(self):

diff --git a/tests/test_logger.py b/tests/test_logger.py
@@ -3,10 +3,7 @@
 from lark import Lark, logger
 from unittest import TestCase, main, skipIf
 
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
+from io import StringIO
 
 try:
     import interegular

diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -7,8 +7,6 @@
 import sys
 from copy import copy, deepcopy
 
-from lark.utils import isascii
-
 from lark import Token, Transformer_NonRecursive, LexError
 
 from io import (
@@ -968,7 +966,7 @@ class DualBytesLark:
     def __init__(self, g, *args, **kwargs):
         self.text_lexer = Lark(g, *args, use_bytes=False, **kwargs)
         g = self.text_lexer.grammar_source.lower()
-        if '\\u' in g or not isascii(g):
+        if '\\u' in g or not g.isascii():
             # Bytes re can't deal with uniode escapes
             self.bytes_lark = None
         else:
@@ -977,7 +975,7 @@ def __init__(self, g, *args, **kwargs):
 
     def parse(self, text, start=None):
         # TODO: Easy workaround, more complex checks would be beneficial
-        if not isascii(text) or self.bytes_lark is None:
+        if not text.isascii() or self.bytes_lark is None:
             return self.text_lexer.parse(text, start)
         try:
             rv = self.text_lexer.parse(text, start)

diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py
@@ -154,7 +154,6 @@ def test_keep_all_tokens(self):
         for code in examples:
             self.assert_reconstruct(g, code, keep_all_tokens=True)
 
-    @unittest.skipIf(sys.version_info < (3, 0), "Python 2 does not play well with Unicode.")
     def test_switch_grammar_unicode_terminal(self):
         """
         This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed

diff --git a/tox.ini b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = lint, type, py36, py37, py38, py39, py310, py311, py312, pypy3
+envlist = lint, type, py38, py39, py310, py311, py312, py313, pypy3
 skip_missing_interpreters = true
 
 [testenv]
@@ -25,10 +25,10 @@ description = run type check on code base
 skip_install = true
 recreate = false
 deps =
-    mypy==0.950
-    interegular>=0.2.4
+    mypy==1.10
+    interegular>=0.3.1,<0.4.0
     types-atomicwrites
-    types-regex==2023.12.25.20240106
+    types-regex
     rich<=13.4.1
 commands =
     mypy