Python-Markdown · oprypin · Nov 2, 2023 · Nov 2, 2023 · Nov 2, 2023 · Nov 2, 2023
diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml
@@ -71,7 +71,7 @@ jobs:
       fail-fast: false
       max-parallel: 4
       matrix:
-        tox-env: [flake8, pep517check, checkspelling]
+        tox-env: [mypy, flake8, pep517check, checkspelling]
 
     env:
       TOXENV: ${{ matrix.tox-env }}

diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
@@ -175,7 +175,7 @@ def test(self, parent: etree.Element, block: str) -> bool:
         return block.startswith(' '*self.tab_length) and \
             not self.parser.state.isstate('detabbed') and \
             (parent.tag in self.ITEM_TYPES or
-                (len(parent) and parent[-1] is not None and
+                (len(parent) > 0 and parent[-1] is not None and
                     (parent[-1].tag in self.LIST_TYPES)))
 
     def run(self, parent: etree.Element, blocks: list[str]) -> None:

diff --git a/markdown/core.py b/markdown/core.py
@@ -85,6 +85,14 @@ class Markdown:
     callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`.
     """
 
+    tab_length: int
+    ESCAPED_CHARS: list[str]
+    block_level_elements: list[str]
+    registeredExtensions: list[Extension]
+    stripTopLevelTags: bool
+    references: dict[str, tuple[str, str]]
+    htmlStash: util.HtmlStash
+
     def __init__(self, **kwargs):
         """
         Creates a new Markdown instance.
@@ -106,23 +114,23 @@ def __init__(self, **kwargs):
 
         """
 
-        self.tab_length: int = kwargs.get('tab_length', 4)
+        self.tab_length = kwargs.get('tab_length', 4)
 
-        self.ESCAPED_CHARS: list[str] = [
+        self.ESCAPED_CHARS = [
             '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!'
         ]
         """ List of characters which get the backslash escape treatment. """
 
-        self.block_level_elements: list[str] = BLOCK_LEVEL_ELEMENTS.copy()
+        self.block_level_elements = BLOCK_LEVEL_ELEMENTS.copy()
 
-        self.registeredExtensions: list[Extension] = []
+        self.registeredExtensions = []
         self.docType = ""  # TODO: Maybe delete this. It does not appear to be used anymore.
-        self.stripTopLevelTags: bool = True
+        self.stripTopLevelTags = True
 
         self.build_parser()
 
-        self.references: dict[str, tuple[str, str]] = {}
-        self.htmlStash: util.HtmlStash = util.HtmlStash()
+        self.references = {}
+        self.htmlStash = util.HtmlStash()
         self.registerExtensions(extensions=kwargs.get('extensions', []),
                                 configs=kwargs.get('extension_configs', {}))
         self.set_output_format(kwargs.get('output_format', 'xhtml'))
@@ -446,7 +454,7 @@ def convertFile(
         else:
             # Encode manually and write bytes to stdout.
             html = html.encode(encoding, "xmlcharrefreplace")
-            sys.stdout.buffer.write(html)
+            sys.stdout.buffer.write(html)  # type: ignore
 
         return self
 

diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
@@ -94,7 +94,7 @@ def __init__(self, pattern: str, title: str):
         super().__init__(pattern)
         self.title = title
 
-    def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
+    def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:  # type: ignore[override]
         abbr = etree.Element('abbr')
         abbr.text = AtomicString(m.group('abbr'))
         abbr.set('title', self.title)

diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py
@@ -56,7 +56,7 @@ def _handle_word(s, t):
     return t, t
 
 
-_scanner = re.Scanner([
+_scanner = re.Scanner([  # type: ignore[attr-defined]
     (r'[^ =]+=".*?"', _handle_double_quote),
     (r"[^ =]+='.*?'", _handle_single_quote),
     (r'[^ =]+=[^ =]+', _handle_key_value),

diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py
@@ -161,7 +161,7 @@ def hilite(self, shebang: bool = True) -> str:
                     lexer = get_lexer_by_name('text', **self.options)
             if not self.lang:
                 # Use the guessed lexer's language instead
-                self.lang = lexer.aliases[0]
+                self.lang = lexer.aliases[0]  # type: ignore[attr-defined]
             lang_str = f'{self.lang_prefix}{self.lang}'
             if isinstance(self.pygments_formatter, str):
                 try:

diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py
@@ -159,7 +159,7 @@ def handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str]
         """ Return tuple: `(id, [list, of, classes], {configs})` """
         id = ''
         classes = []
-        configs = {}
+        configs: dict[str, Any] = {}
         for k, v in attrs:
             if k == 'id':
                 id = v

diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py
@@ -38,6 +38,9 @@
 class FootnoteExtension(Extension):
     """ Footnote Extension. """
 
+    found_refs: dict[str, int]
+    used_refs: set[str]
+
     def __init__(self, **kwargs):
         """ Setup configs. """
 
@@ -68,8 +71,8 @@ def __init__(self, **kwargs):
 
         # In multiple invocations, emit links that don't get tangled.
         self.unique_prefix = 0
-        self.found_refs: dict[str, int] = {}
-        self.used_refs: set[str] = set()
+        self.found_refs = {}
+        self.used_refs = set()
 
         self.reset()
 
@@ -290,7 +293,7 @@ def detectTabbed(self, blocks: list[str]) -> list[str]:
                 break
         return fn_blocks
 
-    def detab(self, block: str) -> str:
+    def detab(self, block: str) -> str:  # type: ignore[override]
         """ Remove one level of indent from a block.
 
         Preserve lazily indented blocks by only removing indent from indented lines.

diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py
@@ -40,6 +40,10 @@ class HTMLExtractorExtra(HTMLExtractor):
     Markdown.
     """
 
+    mdstack: list[str] = []  # When markdown=1, stack contains a list of tags
+    treebuilder: etree.TreeBuilder
+    mdstate: list[Literal['block', 'span', 'off', None]]
+
     def __init__(self, md: Markdown, *args, **kwargs):
         # All block-level tags.
         self.block_level_tags = set(md.block_level_elements.copy())
@@ -58,9 +62,9 @@ def __init__(self, md: Markdown, *args, **kwargs):
 
     def reset(self):
         """Reset this instance.  Loses all unprocessed data."""
-        self.mdstack: list[str] = []  # When markdown=1, stack contains a list of tags
+        self.mdstack = []  # When markdown=1, stack contains a list of tags
         self.treebuilder = etree.TreeBuilder()
-        self.mdstate: list[Literal['block', 'span', 'off', None]] = []
+        self.mdstate = []
         super().reset()
 
     def close(self):

diff --git a/markdown/extensions/meta.py b/markdown/extensions/meta.py
@@ -78,7 +78,7 @@ def run(self, lines: list[str]) -> list[str]:
                 else:
                     lines.insert(0, line)
                     break  # no meta data - done
-        self.md.Meta = meta
+        self.md.Meta = meta  # type: ignore[attr-defined]
         return lines
 
 

diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py
@@ -179,6 +179,9 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
 
 class SmartyExtension(Extension):
     """ Add Smarty to Markdown. """
+
+    substitutions: dict[str, str]
+
     def __init__(self, **kwargs):
         self.config = {
             'smart_quotes': [True, 'Educate quotes'],
@@ -189,7 +192,7 @@ def __init__(self, **kwargs):
         }
         """ Default configuration options. """
         super().__init__(**kwargs)
-        self.substitutions: dict[str, str] = dict(substitutions)
+        self.substitutions = dict(substitutions)
         self.substitutions.update(self.getConfig('substitutions', default={}))
 
     def _addPatterns(
@@ -199,9 +202,8 @@ def _addPatterns(
         serie: str,
         priority: int,
     ):
-        for ind, pattern in enumerate(patterns):
-            pattern += (md,)
-            pattern = SubstituteTextPattern(*pattern)
+        for ind, pattern_args in enumerate(patterns):
+            pattern = SubstituteTextPattern(*pattern_args, md)
             name = 'smarty-%s-%d' % (serie, ind)
             self.inlinePatterns.register(pattern, name, priority-ind)
 
@@ -253,7 +255,7 @@ def educateQuotes(self, md: Markdown) -> None:
         )
         self._addPatterns(md, patterns, 'quotes', 30)
 
-    def extendMarkdown(self, md):
+    def extendMarkdown(self, md: Markdown):
         configs = self.getConfigs()
         self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry()
         if configs['smart_ellipses']:

diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py
@@ -80,7 +80,7 @@ def stashedHTML2text(text: str, md: Markdown, strip_entities: bool = True) -> st
     def _html_sub(m: re.Match[str]) -> str:
         """ Substitute raw html with plain text. """
         try:
-            raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
+            raw: str = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
         except (IndexError, TypeError):  # pragma: no cover
             return m.group(0)
         # Strip out tags and/or entities - leaving text
@@ -335,8 +335,8 @@ def run(self, doc: etree.Element) -> None:
         toc = self.md.serializer(div)
         for pp in self.md.postprocessors:
             toc = pp.run(toc)
-        self.md.toc_tokens = toc_tokens
-        self.md.toc = toc
+        self.md.toc_tokens = toc_tokens  # type: ignore[attr-defined]
+        self.md.toc = toc  # type: ignore[attr-defined]
 
 
 class TocExtension(Extension):

diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py
@@ -65,6 +65,7 @@ def __init__(self, pattern: str, config: dict[str, Any]):
         self.config = config
 
     def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]:
+        a: etree.Element | str
         if m.group(1).strip():
             base_url, end_url, html_class = self._getMeta()
             label = m.group(1).strip()

diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
@@ -28,7 +28,7 @@
 import re
 import importlib.util
 import sys
-from typing import TYPE_CHECKING, Sequence
+from typing import TYPE_CHECKING, Any, Sequence
 
 if TYPE_CHECKING:  # pragma: no cover
     from markdown import Markdown
@@ -37,7 +37,7 @@
 # Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
 # Users can still do `from html import parser` and get the default behavior.
 spec = importlib.util.find_spec('html.parser')
-htmlparser = importlib.util.module_from_spec(spec)
+htmlparser: Any = importlib.util.module_from_spec(spec)
 spec.loader.exec_module(htmlparser)
 sys.modules['htmlparser'] = htmlparser
 
@@ -80,6 +80,9 @@ class HTMLExtractor(htmlparser.HTMLParser):
     is stored in `cleandoc` as a list of strings.
     """
 
+    stack: list[str]
+    cleandoc: list[str]
+
     def __init__(self, md: Markdown, *args, **kwargs):
         if 'convert_charrefs' not in kwargs:
             kwargs['convert_charrefs'] = False
@@ -97,9 +100,9 @@ def reset(self):
         """Reset this instance.  Loses all unprocessed data."""
         self.inraw = False
         self.intail = False
-        self.stack: list[str] = []  # When `inraw==True`, stack contains a list of tags
-        self._cache: list[str] = []
-        self.cleandoc: list[str] = []
+        self.stack = []  # When `inraw==True`, stack contains a list of tags
+        self._cache = []
+        self.cleandoc = []
         self.lineno_start_cache = [0]
 
         super().reset()

diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
@@ -188,7 +188,7 @@ class EmStrongItem(NamedTuple):
 # -----------------------------------------------------------------------------
 
 
-class Pattern:  # pragma: no cover
+class _BasePattern:
     """
     Base class that inline patterns subclass.
 
@@ -238,31 +238,18 @@ def getCompiledRegExp(self) -> re.Pattern:
         """ Return a compiled regular expression. """
         return self.compiled_re
 
-    def handleMatch(self, m: re.Match[str]) -> etree.Element | str:
-        """Return a ElementTree element from the given match.
-
-        Subclasses should override this method.
-
-        Arguments:
-            m: A match object containing a match of the pattern.
-
-        Returns: An ElementTree Element object.
-
-        """
-        pass  # pragma: no cover
-
     def type(self) -> str:
         """ Return class name, to define pattern type """
         return self.__class__.__name__
 
     def unescape(self, text: str) -> str:
         """ Return unescaped text given text with an inline placeholder. """
         try:
-            stash = self.md.treeprocessors['inline'].stashed_nodes
+            stash = self.md.treeprocessors['inline'].stashed_nodes  # type: ignore[attr-defined]
         except KeyError:  # pragma: no cover
             return text
 
-        def get_stash(m):
+        def get_stash(m: re.Match[str]) -> str:
             id = m.group(1)
             if id in stash:
                 value = stash.get(id)
@@ -274,6 +261,27 @@ def get_stash(m):
         return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
 
 
+class LegacyPattern(_BasePattern):
+    def handleMatch(self, m: re.Match[str]) -> etree.Element | str:
+        """Return a ElementTree element from the given match.
+
+        Subclasses should override this method.
+
+        Arguments:
+            m: A match object containing a match of the pattern.
+
+        Returns: An ElementTree Element object.
+
+        """
+        pass  # pragma: no cover
+
+
+if TYPE_CHECKING:  # pragma: no cover
+    Pattern = _BasePattern
+else:
+    Pattern = LegacyPattern
+
+
 class InlineProcessor(Pattern):
     """
     Base class that inline processors subclass.
@@ -505,13 +513,13 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
     def unescape(self, text: str) -> str:
         """ Return unescaped text given text with an inline placeholder. """
         try:
-            stash = self.md.treeprocessors['inline'].stashed_nodes
+            stash = self.md.treeprocessors['inline'].stashed_nodes  # type: ignore[attr-defined]
         except KeyError:  # pragma: no cover
             return text
 
         def get_stash(m: re.Match[str]) -> str:
             id = m.group(1)
-            value = stash.get(id)
+            value: etree.Element | None = stash.get(id)
             if value is not None:
                 try:
                     return self.md.serializer(value)
@@ -523,7 +531,7 @@ def get_stash(m: re.Match[str]) -> str:
     def backslash_unescape(self, text: str) -> str:
         """ Return text with backslash escapes undone (backslashes are restored). """
         try:
-            RE = self.md.treeprocessors['unescape'].RE
+            RE = self.md.treeprocessors['unescape'].RE  # type: ignore[attr-defined]
         except KeyError:  # pragma: no cover
             return text
 

diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
@@ -75,7 +75,7 @@ def run(self, text: str) -> str:
         """ Iterate over html stash and restore html. """
         replacements = OrderedDict()
         for i in range(self.md.htmlStash.html_counter):
-            html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i])
+            html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i])  # type: ignore[arg-type]
             if self.isblocklevel(html):
                 replacements["<p>{}</p>".format(
                     self.md.htmlStash.get_placeholder(i))] = html

diff --git a/markdown/serializers.py b/markdown/serializers.py
@@ -45,8 +45,8 @@
 
 from __future__ import annotations
 
-from xml.etree.ElementTree import ProcessingInstruction
-from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY
+from xml.etree.ElementTree import ProcessingInstruction, Comment, ElementTree, Element, QName
+from xml.etree.ElementTree import HTML_EMPTY  # type: ignore[attr-defined]
 import re
 from typing import Callable, Literal, NoReturn