Skip to content

Commit 10d5b4d

Browse files
joker21663maximkurbatovchrisjsewell
authored
✨ Add allowed option for inline/block attributes (#115)
The `allowed` option accepts a list of allowed attribute names. If not ``None``, any attributes not in this list will be removed and placed in the token's meta under the key `"insecure_attrs"`. Co-authored-by: maximkurbatov <maximkurbatov@yandex-team.ru> Co-authored-by: Chris Sewell <chrisj_sewell@hotmail.com>
1 parent 3f7fcc6 commit 10d5b4d

File tree

3 files changed

+213
-29
lines changed

3 files changed

+213
-29
lines changed

mdit_py_plugins/attrs/index.py

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from typing import List, Optional, Sequence
1+
from __future__ import annotations
2+
3+
from functools import partial
4+
from typing import Any, Sequence
25

36
from markdown_it import MarkdownIt
47
from markdown_it.rules_block import StateBlock
@@ -17,6 +20,7 @@ def attrs_plugin(
1720
after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"),
1821
spans: bool = False,
1922
span_after: str = "link",
23+
allowed: Sequence[str] | None = None,
2024
) -> None:
2125
"""Parse inline attributes that immediately follow certain inline elements::
2226
@@ -48,36 +52,25 @@ def attrs_plugin(
4852
:param spans: If True, also parse attributes after spans of text, encapsulated by `[]`.
4953
Note Markdown link references take precedence over this syntax.
5054
:param span_after: The name of an inline rule after which spans may be specified.
55+
:param allowed: A list of allowed attribute names.
56+
If not ``None``, any attributes not in this list will be removed
57+
and placed in the token's meta under the key "insecure_attrs".
5158
"""
5259

53-
def _attr_inline_rule(state: StateInline, silent: bool) -> bool:
54-
if state.pending or not state.tokens:
55-
return False
56-
token = state.tokens[-1]
57-
if token.type not in after:
58-
return False
59-
try:
60-
new_pos, attrs = parse(state.src[state.pos :])
61-
except ParseError:
62-
return False
63-
token_index = _find_opening(state.tokens, len(state.tokens) - 1)
64-
if token_index is None:
65-
return False
66-
state.pos += new_pos + 1
67-
if not silent:
68-
attr_token = state.tokens[token_index]
69-
if "class" in attrs and "class" in token.attrs:
70-
attrs["class"] = f"{attr_token.attrs['class']} {attrs['class']}"
71-
attr_token.attrs.update(attrs)
72-
return True
73-
7460
if spans:
7561
md.inline.ruler.after(span_after, "span", _span_rule)
7662
if after:
77-
md.inline.ruler.push("attr", _attr_inline_rule)
63+
md.inline.ruler.push(
64+
"attr",
65+
partial(
66+
_attr_inline_rule,
67+
after=after,
68+
allowed=None if allowed is None else set(allowed),
69+
),
70+
)
7871

7972

80-
def attrs_block_plugin(md: MarkdownIt) -> None:
73+
def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) -> None:
8174
"""Parse block attributes.
8275
8376
Block attributes are attributes on a single line, with no other content.
@@ -93,12 +86,22 @@ def attrs_block_plugin(md: MarkdownIt) -> None:
9386
A paragraph, that will be assigned the class ``a b c``, and the identifier ``b``.
9487
9588
This syntax is inspired by Djot block attributes.
89+
90+
:param allowed: A list of allowed attribute names.
91+
If not ``None``, any attributes not in this list will be removed
92+
and placed in the token's meta under the key "insecure_attrs".
9693
"""
9794
md.block.ruler.before("fence", "attr", _attr_block_rule)
98-
md.core.ruler.after("block", "attr", _attr_resolve_block_rule)
95+
md.core.ruler.after(
96+
"block",
97+
"attr",
98+
partial(
99+
_attr_resolve_block_rule, allowed=None if allowed is None else set(allowed)
100+
),
101+
)
99102

100103

101-
def _find_opening(tokens: List[Token], index: int) -> Optional[int]:
104+
def _find_opening(tokens: Sequence[Token], index: int) -> int | None:
102105
"""Find the opening token index, if the token is closing."""
103106
if tokens[index].nesting != -1:
104107
return index
@@ -149,6 +152,34 @@ def _span_rule(state: StateInline, silent: bool) -> bool:
149152
return True
150153

151154

155+
def _attr_inline_rule(
156+
state: StateInline,
157+
silent: bool,
158+
after: Sequence[str],
159+
*,
160+
allowed: set[str] | None = None,
161+
) -> bool:
162+
if state.pending or not state.tokens:
163+
return False
164+
token = state.tokens[-1]
165+
if token.type not in after:
166+
return False
167+
try:
168+
new_pos, attrs = parse(state.src[state.pos :])
169+
except ParseError:
170+
return False
171+
token_index = _find_opening(state.tokens, len(state.tokens) - 1)
172+
if token_index is None:
173+
return False
174+
state.pos += new_pos + 1
175+
if not silent:
176+
attr_token = state.tokens[token_index]
177+
if "class" in attrs and "class" in token.attrs:
178+
attrs["class"] = f"{token.attrs['class']} {attrs['class']}"
179+
_add_attrs(attr_token, attrs, allowed)
180+
return True
181+
182+
152183
def _attr_block_rule(
153184
state: StateBlock, startLine: int, endLine: int, silent: bool
154185
) -> bool:
@@ -197,7 +228,7 @@ def _attr_block_rule(
197228
return True
198229

199230

200-
def _attr_resolve_block_rule(state: StateCore) -> None:
231+
def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> None:
201232
"""Find attribute block then move its attributes to the next block."""
202233
i = 0
203234
len_tokens = len(state.tokens)
@@ -221,8 +252,23 @@ def _attr_resolve_block_rule(state: StateCore) -> None:
221252
if key == "class" or key not in next_token.attrs:
222253
next_token.attrs[key] = value
223254
else:
224-
# attribute block takes precedence over attributes in other blocks
225-
next_token.attrs.update(state.tokens[i].attrs)
255+
_add_attrs(next_token, state.tokens[i].attrs, allowed)
226256

227257
state.tokens.pop(i)
228258
len_tokens -= 1
259+
260+
261+
def _add_attrs(
262+
token: Token,
263+
attrs: dict[str, Any],
264+
allowed: set[str] | None,
265+
) -> None:
266+
"""Add attributes to a token, skipping any disallowed attributes."""
267+
if allowed is not None and (
268+
disallowed := {k: v for k, v in attrs.items() if k not in allowed}
269+
):
270+
token.meta["insecure_attrs"] = disallowed
271+
attrs = {k: v for k, v in attrs.items() if k in allowed}
272+
273+
# attributes takes precedence over existing attributes
274+
token.attrs.update(attrs)

tests/test_attrs.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,20 @@ def test_attrs(line, title, input, expected):
2020
text = md.render(input)
2121
print(text)
2222
assert text.rstrip() == expected.rstrip()
23+
24+
25+
def test_attrs_allowed(data_regression):
26+
allowed = ["safe"]
27+
md = (
28+
MarkdownIt("commonmark")
29+
.use(attrs_plugin, allowed=allowed)
30+
.use(attrs_block_plugin, allowed=allowed)
31+
)
32+
tokens = md.parse("""
33+
{danger1=a safe=b}
34+
{danger2=c safe=d}
35+
# header
36+
37+
`inline`{safe=a danger=b}
38+
""")
39+
data_regression.check([t.as_dict() for t in tokens])
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
- attrs:
2+
- - safe
3+
- d
4+
block: true
5+
children: null
6+
content: ''
7+
hidden: false
8+
info: ''
9+
level: 0
10+
map:
11+
- 3
12+
- 4
13+
markup: '#'
14+
meta:
15+
insecure_attrs:
16+
danger1: a
17+
danger2: c
18+
nesting: 1
19+
tag: h1
20+
type: heading_open
21+
- attrs: null
22+
block: true
23+
children:
24+
- attrs: null
25+
block: false
26+
children: null
27+
content: header
28+
hidden: false
29+
info: ''
30+
level: 0
31+
map: null
32+
markup: ''
33+
meta: {}
34+
nesting: 0
35+
tag: ''
36+
type: text
37+
content: header
38+
hidden: false
39+
info: ''
40+
level: 1
41+
map:
42+
- 3
43+
- 4
44+
markup: ''
45+
meta: {}
46+
nesting: 0
47+
tag: ''
48+
type: inline
49+
- attrs: null
50+
block: true
51+
children: null
52+
content: ''
53+
hidden: false
54+
info: ''
55+
level: 0
56+
map: null
57+
markup: '#'
58+
meta: {}
59+
nesting: -1
60+
tag: h1
61+
type: heading_close
62+
- attrs: null
63+
block: true
64+
children: null
65+
content: ''
66+
hidden: false
67+
info: ''
68+
level: 0
69+
map:
70+
- 5
71+
- 6
72+
markup: ''
73+
meta: {}
74+
nesting: 1
75+
tag: p
76+
type: paragraph_open
77+
- attrs: null
78+
block: true
79+
children:
80+
- attrs:
81+
- - safe
82+
- a
83+
block: false
84+
children: null
85+
content: inline
86+
hidden: false
87+
info: ''
88+
level: 0
89+
map: null
90+
markup: '`'
91+
meta:
92+
insecure_attrs:
93+
danger: b
94+
nesting: 0
95+
tag: code
96+
type: code_inline
97+
content: '`inline`{safe=a danger=b}'
98+
hidden: false
99+
info: ''
100+
level: 1
101+
map:
102+
- 5
103+
- 6
104+
markup: ''
105+
meta: {}
106+
nesting: 0
107+
tag: ''
108+
type: inline
109+
- attrs: null
110+
block: true
111+
children: null
112+
content: ''
113+
hidden: false
114+
info: ''
115+
level: 0
116+
map: null
117+
markup: ''
118+
meta: {}
119+
nesting: -1
120+
tag: p
121+
type: paragraph_close

0 commit comments

Comments
 (0)