Skip to content

Commit d5b3874

Browse files
authored
♻️ Parse entities to text_special token (#280)
Rather than adding directly to text. The `text_join` core rule then joins it to the text later, but after typographic rules have been applied. Implements upstream: https://github.com/markdown-it/markdown-it/commita/3fc0deb38b5a8b2eb8f46c727cc4e299e5ae5f9c
1 parent ea27cc8 commit d5b3874

File tree

3 files changed

+55
-32
lines changed

3 files changed

+55
-32
lines changed

markdown_it/rules_inline/entity.py

+35-32
Original file line numberDiff line numberDiff line change
@@ -16,35 +16,38 @@ def entity(state: StateInline, silent: bool) -> bool:
1616
if state.src[pos] != "&":
1717
return False
1818

19-
if (pos + 1) < maximum:
20-
if state.src[pos + 1] == "#":
21-
match = DIGITAL_RE.search(state.src[pos:])
22-
if match:
23-
if not silent:
24-
match1 = match.group(1)
25-
code = (
26-
int(match1[1:], 16)
27-
if match1[0].lower() == "x"
28-
else int(match1, 10)
29-
)
30-
state.pending += (
31-
fromCodePoint(code)
32-
if isValidEntityCode(code)
33-
else fromCodePoint(0xFFFD)
34-
)
35-
36-
state.pos += len(match.group(0))
37-
return True
38-
39-
else:
40-
match = NAMED_RE.search(state.src[pos:])
41-
if match and match.group(1) in entities:
42-
if not silent:
43-
state.pending += entities[match.group(1)]
44-
state.pos += len(match.group(0))
45-
return True
46-
47-
if not silent:
48-
state.pending += "&"
49-
state.pos += 1
50-
return True
19+
if pos + 1 >= maximum:
20+
return False
21+
22+
if state.src[pos + 1] == "#":
23+
if match := DIGITAL_RE.search(state.src[pos:]):
24+
if not silent:
25+
match1 = match.group(1)
26+
code = (
27+
int(match1[1:], 16) if match1[0].lower() == "x" else int(match1, 10)
28+
)
29+
30+
token = state.push("text_special", "", 0)
31+
token.content = (
32+
fromCodePoint(code)
33+
if isValidEntityCode(code)
34+
else fromCodePoint(0xFFFD)
35+
)
36+
token.markup = match.group(0)
37+
token.info = "entity"
38+
39+
state.pos += len(match.group(0))
40+
return True
41+
42+
else:
43+
if (match := NAMED_RE.search(state.src[pos:])) and match.group(1) in entities:
44+
if not silent:
45+
token = state.push("text_special", "", 0)
46+
token.content = entities[match.group(1)]
47+
token.markup = match.group(0)
48+
token.info = "entity"
49+
50+
state.pos += len(match.group(0))
51+
return True
52+
53+
return False

tests/test_port/fixtures/smartquotes.md

+13
Original file line numberDiff line numberDiff line change
@@ -177,3 +177,16 @@ Should be escapable:
177177
<p>&quot;foo&quot;</p>
178178
<p>&quot;foo&quot;</p>
179179
.
180+
181+
Should not replace entities:
182+
.
183+
&quot;foo&quot;
184+
185+
&quot;foo"
186+
187+
"foo&quot;
188+
.
189+
<p>&quot;foo&quot;</p>
190+
<p>&quot;foo&quot;</p>
191+
<p>&quot;foo&quot;</p>
192+
.

tests/test_port/fixtures/typographer.md

+7
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,10 @@ regression tests for #624
130130
<p>1–2–3</p>
131131
<p>1 – – 3</p>
132132
.
133+
134+
shouldn't replace entities
135+
.
136+
&#40;c) (c&#41; (c)
137+
.
138+
<p>(c) (c) ©</p>
139+
.

0 commit comments

Comments
 (0)