Skip to content

Commit 4fd03de

Browse files
committed
optimize empty-line handling for li and blockquote content
Signed-off-by: chrispy <chrispy@synopsys.com>
1 parent 6258f5c commit 4fd03de

File tree

3 files changed

+34
-14
lines changed

3 files changed

+34
-14
lines changed

markdownify/__init__.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
convert_heading_re = re.compile(r'convert_h(\d+)')
8-
line_beginning_re = re.compile(r'^', re.MULTILINE)
8+
line_with_content_re = re.compile(r'^(.*)', flags=re.MULTILINE)
99
whitespace_re = re.compile(r'[\t ]+')
1010
all_whitespace_re = re.compile(r'[\t \r\n]+')
1111
newline_whitespace_re = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
@@ -256,9 +256,6 @@ def escape(self, text):
256256
text = text.replace('_', r'\_')
257257
return text
258258

259-
def indent(self, text, columns):
260-
return line_beginning_re.sub(' ' * columns, text) if text else ''
261-
262259
def underline(self, text, pad_char):
263260
text = (text or '').rstrip()
264261
return '\n\n%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
@@ -284,11 +281,20 @@ def convert_a(self, el, text, convert_as_inline):
284281
convert_b = abstract_inline_conversion(lambda self: 2 * self.options['strong_em_symbol'])
285282

286283
def convert_blockquote(self, el, text, convert_as_inline):
287-
284+
# handle some early-exit scenarios
285+
text = (text or '').strip()
288286
if convert_as_inline:
289-
return ' ' + text.strip() + ' '
287+
return ' ' + text + ' '
288+
if not text:
289+
return "\n"
290+
291+
# indent lines with blockquote marker
292+
def _indent_for_blockquote(match):
293+
line_content = match.group(1)
294+
return '> ' + line_content if line_content else '>'
295+
text = line_with_content_re.sub(_indent_for_blockquote, text)
290296

291-
return '\n' + (line_beginning_re.sub('> ', text.strip()) + '\n\n') if text else ''
297+
return '\n' + text + '\n\n'
292298

293299
def convert_br(self, el, text, convert_as_inline):
294300
if convert_as_inline:
@@ -369,6 +375,12 @@ def convert_list(self, el, text, convert_as_inline):
369375
convert_ol = convert_list
370376

371377
def convert_li(self, el, text, convert_as_inline):
378+
# handle some early-exit scenarios
379+
text = (text or '').strip()
380+
if not text:
381+
return "\n"
382+
383+
# determine list item bullet character to use
372384
parent = el.parent
373385
if parent is not None and parent.name == 'ol':
374386
if parent.get("start") and str(parent.get("start")).isnumeric():
@@ -385,10 +397,18 @@ def convert_li(self, el, text, convert_as_inline):
385397
bullets = self.options['bullets']
386398
bullet = bullets[depth % len(bullets)]
387399
bullet = bullet + ' '
388-
text = (text or '').strip()
389-
text = self.indent(text, len(bullet))
390-
if text:
391-
text = bullet + text[len(bullet):]
400+
bullet_width = len(bullet)
401+
bullet_indent = ' ' * bullet_width
402+
403+
# indent content lines by bullet width
404+
def _indent_for_li(match):
405+
line_content = match.group(1)
406+
return bullet_indent + line_content if line_content else ''
407+
text = line_with_content_re.sub(_indent_for_li, text)
408+
409+
# insert bullet into first-line indent whitespace
410+
text = bullet + text[bullet_width:]
411+
392412
return '%s\n' % text
393413

394414
def convert_p(self, el, text, convert_as_inline):

tests/test_conversions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def test_blockquote():
5757

5858
def test_blockquote_with_nested_paragraph():
5959
assert md('<blockquote><p>Hello</p></blockquote>') == '\n> Hello\n\n'
60-
assert md('<blockquote><p>Hello</p><p>Hello again</p></blockquote>') == '\n> Hello\n> \n> Hello again\n\n'
60+
assert md('<blockquote><p>Hello</p><p>Hello again</p></blockquote>') == '\n> Hello\n>\n> Hello again\n\n'
6161

6262

6363
def test_blockquote_with_paragraph():

tests/test_lists.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_ol():
4747
assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
4848
assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
4949
assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
50-
assert md('<ol start="1234"><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ol>') == '\n\n1234. first para\n \n second para\n1235. third para\n \n fourth para\n'
50+
assert md('<ol start="1234"><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ol>') == '\n\n1234. first para\n\n second para\n1235. third para\n\n fourth para\n'
5151

5252

5353
def test_nested_ols():
@@ -64,7 +64,7 @@ def test_ul():
6464
<li> c
6565
</li>
6666
</ul>""") == '\n\n* a\n* b\n* c\n'
67-
assert md('<ul><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ul>') == '\n\n* first para\n \n second para\n* third para\n \n fourth para\n'
67+
assert md('<ul><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ul>') == '\n\n* first para\n\n second para\n* third para\n\n fourth para\n'
6868

6969

7070
def test_inline_ul():

0 commit comments

Comments
 (0)