Skip to content

Commit

Permalink
fix: avoid infinite loop when parsing specific Footnotes (#124)
Browse files Browse the repository at this point in the history
* also cover more scenarios by unit tests
* also add some inline documentation
  • Loading branch information
pbodnar committed Nov 13, 2021
1 parent 2324700 commit e9cfc39
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 3 deletions.
10 changes: 8 additions & 2 deletions mistletoe/block_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,11 +706,13 @@ def __init__(self, content, align=None):

class Footnote(BlockToken):
"""
Footnote token.
Footnote token. A "link reference definition" according to the spec.
The constructor returns None, because the footnote information
is stored in Footnote.read.
"""
# Not used, matched manually instead.
# We also rely on code block and similar being parsed beforehand here.
label_pattern = re.compile(r'[ \n]{0,3}\[(.+?)\]', re.DOTALL)

def __new__(cls, _):
Expand Down Expand Up @@ -868,7 +870,11 @@ def append_footnotes(matches, root):

@staticmethod
def backtrack(lines, string, offset):
lines._index -= string[offset+1:].count('\n')
"""
Called when we peeked some lines and found nothing
relevant on them. This returns those lines back to the parsing process.
"""
lines._index -= string[offset+1:].lstrip().count('\n')


class ThematicBreak(BlockToken):
Expand Down
29 changes: 28 additions & 1 deletion test/test_block_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,12 +322,39 @@ def test_match(self):


class TestFootnote(unittest.TestCase):
def test_store(self):
def test_parse_simple(self):
lines = ['[key 1]: value1\n',
'[key 2]: value2\n']
token = block_token.Document(lines)
self.assertEqual(token.footnotes, {"key 1": ("value1", ""),
"key 2": ("value2", "")})
def test_parse_with_title(self):
lines = ['[key 1]: value1 "title1"\n',
'[key 2]: value2\n',
'"title2"\n']
token = block_token.Document(lines)
self.assertEqual(token.footnotes, {"key 1": ("value1", "title1"),
"key 2": ("value2", "title2")})

# this tests an edge case, it shouldn't occur in normal documents
def test_parse_with_para_right_after(self):
lines = ['[key 1]: value1\n',
# 'something1\n', # if uncommented,
# this and the next line should be treated as a paragraph
# - this line gets skipped instead now
'[key 2]: value2\n',
'something2\n',
'\n',
'[key 3]: value3\r\n', # '\r', or any other whitespace
'something3\n']
token = block_token.Document(lines)
self.assertEqual(token.footnotes, {"key 1": ("value1", ""),
"key 2": ("value2", ""),
"key 3": ("value3", "")})
self.assertEqual(len(token.children), 2)
self.assertIsInstance(token.children[0], block_token.Paragraph)
self.assertEqual(token.children[0].children[0].content, "something2")
self.assertEqual(token.children[1].children[0].content, "something3")


class TestDocument(unittest.TestCase):
Expand Down

0 comments on commit e9cfc39

Please sign in to comment.