fix: avoid infinite loop when parsing specific Footnotes (#124)

* also cover more scenarios by unit tests * also add some inline documentation
miyuchina · Nov 13, 2021 · e9cfc39 · e9cfc39
1 parent 2324700
commit e9cfc39
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 3 deletions.
diff --git a/mistletoe/block_token.py b/mistletoe/block_token.py
@@ -706,11 +706,13 @@ def __init__(self, content, align=None):
 
 class Footnote(BlockToken):
     """
-    Footnote token.
+    Footnote token. A "link reference definition" according to the spec.
 
     The constructor returns None, because the footnote information
     is stored in Footnote.read.
     """
+    # Not used, matched manually instead.
+    # We also rely on code block and similar being parsed beforehand here.
     label_pattern = re.compile(r'[ \n]{0,3}\[(.+?)\]', re.DOTALL)
 
     def __new__(cls, _):
@@ -868,7 +870,11 @@ def append_footnotes(matches, root):
 
     @staticmethod
     def backtrack(lines, string, offset):
-        lines._index -= string[offset+1:].count('\n')
+        """
+        Called when we peeked some lines and found nothing
+        relevant on them. This returns those lines back to the parsing process.
+        """
+        lines._index -= string[offset+1:].lstrip().count('\n')
 
 
 class ThematicBreak(BlockToken):

diff --git a/test/test_block_token.py b/test/test_block_token.py
@@ -322,12 +322,39 @@ def test_match(self):
 
 
 class TestFootnote(unittest.TestCase):
-    def test_store(self):
+    def test_parse_simple(self):
         lines = ['[key 1]: value1\n',
                  '[key 2]: value2\n']
         token = block_token.Document(lines)
         self.assertEqual(token.footnotes, {"key 1": ("value1", ""),
                                            "key 2": ("value2", "")})
+    def test_parse_with_title(self):
+        lines = ['[key 1]: value1 "title1"\n',
+                 '[key 2]: value2\n',
+                 '"title2"\n']
+        token = block_token.Document(lines)
+        self.assertEqual(token.footnotes, {"key 1": ("value1", "title1"),
+                                           "key 2": ("value2", "title2")})
+
+    # this tests an edge case, it shouldn't occur in normal documents
+    def test_parse_with_para_right_after(self):
+        lines = ['[key 1]: value1\n',
+                 # 'something1\n', # if uncommented,
+                 #     this and the next line should be treated as a paragraph
+                 #     - this line gets skipped instead now
+                 '[key 2]: value2\n',
+                 'something2\n',
+                 '\n',
+                 '[key 3]: value3\r\n', # '\r', or any other whitespace
+                 'something3\n']
+        token = block_token.Document(lines)
+        self.assertEqual(token.footnotes, {"key 1": ("value1", ""),
+                                           "key 2": ("value2", ""),
+                                           "key 3": ("value3", "")})
+        self.assertEqual(len(token.children), 2)
+        self.assertIsInstance(token.children[0], block_token.Paragraph)
+        self.assertEqual(token.children[0].children[0].content, "something2")
+        self.assertEqual(token.children[1].children[0].content, "something3")
 
 
 class TestDocument(unittest.TestCase):