Skip to content

Commit

Permalink
feat: make output markdown keep original indentation of the 1st line …
Browse files Browse the repository at this point in the history
…in a list item (miyuchina#196)

record indentation on list item to make Markdown renderer be able to render list markers followed by 1~4 spaces.
  • Loading branch information
allets committed Nov 6, 2023
1 parent 70e8e8d commit 6f70602
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 18 deletions.
29 changes: 17 additions & 12 deletions mistletoe/block_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ def check_interrupts_paragraph(cls, lines):
# and the list must either be unordered or start from 1.
marker_tuple = ListItem.parse_marker(lines.peek())
if (marker_tuple is not None):
_, leader, content = marker_tuple
_, _, leader, content = marker_tuple
if not content.strip() == '':
return not leader[0].isdigit() or leader in ['1.', '1)']
return False
Expand All @@ -499,7 +499,7 @@ def read(cls, lines):
while True:
anchor = lines.get_pos()
output, next_marker = ListItem.read(lines, next_marker)
item_leader = output[2]
item_leader = output[3]
if leader is None:
leader = item_leader
elif not cls.same_marker_type(leader, item_leader):
Expand Down Expand Up @@ -532,16 +532,18 @@ class ListItem(BlockToken):
Attributes:
leader (string): a bullet list marker or an ordered list marker.
indentation (int): spaces before the leader.
prepend (int): the start position of the content, i.e., the indentation required
for continuation lines.
loose (bool): whether the list is loose.
"""
repr_attributes = ("leader", "prepend", "loose")
pattern = re.compile(r' {0,3}(\d{0,9}[.)]|[+\-*])($|\s+)')
repr_attributes = ("leader", "indentation", "prepend", "loose")
pattern = re.compile(r'( {0,3})(\d{0,9}[.)]|[+\-*])($|\s+)')
continuation_pattern = re.compile(r'([ \t]*)(\S.*\n|\n)')

def __init__(self, parse_buffer, prepend, leader):
def __init__(self, parse_buffer, indentation, prepend, leader):
self.leader = leader
self.indentation = indentation
self.prepend = prepend
self.children = tokenizer.make_tokens(parse_buffer)
self.loose = parse_buffer.loose
Expand Down Expand Up @@ -574,22 +576,25 @@ def parse_marker(cls, line):
The leader is a bullet list marker, or an ordered list marker.
The indentation is spaces before the leader.
The prepend is the start position of the content, i.e., the indentation required
for continuation lines.
"""
match_obj = cls.pattern.match(line)
if match_obj is None:
return None
indentation = len(match_obj.group(1))
prepend = len(match_obj.group(0).expandtabs(4))
leader = match_obj.group(1)
leader = match_obj.group(2)
content = line[match_obj.end(0):]
n_spaces = prepend - match_obj.end(1)
n_spaces = prepend - match_obj.end(2)
if n_spaces > 4:
# if there are more than 4 spaces after the leader, we treat them as part of the content
# with the exception of the first (marker separator) space.
prepend -= n_spaces - 1
content = ' ' * (n_spaces - 1) + content
return prepend, leader, content
return indentation, prepend, leader, content

@classmethod
def read(cls, lines, prev_marker=None):
Expand All @@ -599,10 +604,10 @@ def read(cls, lines, prev_marker=None):
# first line
line = next(lines)
next_line = lines.peek()
prepend, leader, content = prev_marker if prev_marker else cls.parse_marker(line)
indentation, prepend, leader, content = prev_marker if prev_marker else cls.parse_marker(line)
if content.strip() == '':
# item starting with a blank line: look for the next non-blank line
prepend = len(leader) + 1
prepend = indentation + len(leader) + 1
blanks = 1
while next_line is not None and next_line.strip() == '':
blanks += 1
Expand All @@ -614,7 +619,7 @@ def read(cls, lines, prev_marker=None):
parse_buffer = tokenizer.ParseBuffer()
parse_buffer.loose = True
next_marker = cls.parse_marker(next_line) if next_line is not None else None
return (parse_buffer, prepend, leader), next_marker
return (parse_buffer, indentation, prepend, leader), next_marker
else:
line_buffer.append(content)

Expand Down Expand Up @@ -659,7 +664,7 @@ def read(cls, lines, prev_marker=None):
# block-level tokens are parsed here, so that footnotes can be
# recognized before span-level parsing.
parse_buffer = tokenizer.tokenize_block(line_buffer, _token_types)
return (parse_buffer, prepend, leader), next_marker
return (parse_buffer, indentation, prepend, leader), next_marker


class Table(BlockToken):
Expand Down
6 changes: 4 additions & 2 deletions mistletoe/markdown_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,15 +302,17 @@ def render_list(
def render_list_item(
self, token: block_token.ListItem, max_line_length: int
) -> Iterable[str]:
indentation = len(token.leader) + 1
indentation = token.prepend - token.indentation
max_child_line_length = (
max_line_length - indentation if max_line_length else None
)
lines = self.blocks_to_lines(
token.children, max_line_length=max_child_line_length
)
return self.prefix_lines(
list(lines) or [""], token.leader + " ", " " * indentation
list(lines) or [""],
token.leader + " " * (indentation - len(token.leader)),
" " * indentation
)

def render_table(
Expand Down
50 changes: 48 additions & 2 deletions test/test_markdown_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,11 @@ def test_numbered_list(self):
]
output = self.roundtrip(input)
expected = [
"22) *emphasized list item*\n",
"22) *emphasized list item*\n",
"96) \n",
"128) here begins a nested list.\n",
" + apples\n",
" + bananas\n",
" + bananas\n",
]
self.assertEqual(output, "".join(expected))

Expand All @@ -146,6 +146,52 @@ def test_bulleted_list(self):
output = self.roundtrip(input)
self.assertEqual(output, "".join(input))

def test_list_item_indented_from_the_margin(self):
# 0 to 4 spaces of indentation from the margin
input = [
"- 0 space: ok.\n",
" subsequent line.\n",
" - 1 space: ok.\n",
" subsequent line.\n",
" - 2 spaces: ok.\n",
" subsequent line.\n",
" - 3 spaces: ok.\n",
" subsequent line.\n",
" - 4 spaces: in the paragraph of the above list item.\n",
" subsequent line.\n",
]
output = self.roundtrip(input)
expected = [
"- 0 space: ok.\n",
" subsequent line.\n",
"- 1 space: ok.\n",
" subsequent line.\n",
"- 2 spaces: ok.\n",
" subsequent line.\n",
"- 3 spaces: ok.\n",
" subsequent line.\n",
" - 4 spaces: in the paragraph of the above list item.\n",
" subsequent line.\n",
]
self.assertEqual(output, "".join(expected))

def test_list_item_indented_from_the_leader(self):
# leaders followed by 1 to 5 spaces
input = [
"- 1 space: ok.\n",
" subsequent line.\n",
"- 2 spaces: ok.\n",
" subsequent line.\n",
"- 3 spaces: ok.\n",
" subsequent line.\n",
"- 4 spaces: ok.\n",
" subsequent line.\n",
"- 5 spaces: list item starting with indented code.\n",
" subsequent line.\n",
]
output = self.roundtrip(input)
self.assertEqual(output, "".join(input))

def test_code_blocks(self):
input = [
" this is an indented code block\n",
Expand Down
4 changes: 2 additions & 2 deletions test/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ def test_codefence(self):
def test_unordered_list(self):
doc = Document("* Foo\n* Bar\n* Baz")
self._check_repr_matches(doc.children[0], "block_token.List with 3 children loose=False start=None")
self._check_repr_matches(doc.children[0].children[0], "block_token.ListItem with 1 child leader='*' prepend=2 loose=False")
self._check_repr_matches(doc.children[0].children[0], "block_token.ListItem with 1 child leader='*' indentation=0 prepend=2 loose=False")

def test_ordered_list(self):
doc = Document("1. Foo\n2. Bar\n3. Baz")
self._check_repr_matches(doc.children[0], "block_token.List with 3 children loose=False start=1")
self._check_repr_matches(doc.children[0].children[0], "block_token.ListItem with 1 child leader='1.' prepend=3 loose=False")
self._check_repr_matches(doc.children[0].children[0], "block_token.ListItem with 1 child leader='1.' indentation=0 prepend=3 loose=False")

def test_table(self):
doc = Document("| Foo | Bar | Baz |\n|:--- |:---:| ---:|\n| Foo | Bar | Baz |\n")
Expand Down

0 comments on commit 6f70602

Please sign in to comment.