Skip to content

Commit b7e8da3

Browse files
authored
Merge pull request #694 from willkg/minor-fixes
Convert tags, skip_tags, recognized_tags to sets; fix doctests; f-strings
2 parents b2a0d57 + aec2c0e commit b7e8da3

File tree

8 files changed

+229
-237
lines changed

8 files changed

+229
-237
lines changed

bleach/html5lib_shim.py

Lines changed: 123 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -81,127 +81,129 @@
8181

8282
#: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
8383
#: https://html.spec.whatwg.org/multipage/indices.html#elements-3
84-
HTML_TAGS = [
85-
"a",
86-
"abbr",
87-
"address",
88-
"area",
89-
"article",
90-
"aside",
91-
"audio",
92-
"b",
93-
"base",
94-
"bdi",
95-
"bdo",
96-
"blockquote",
97-
"body",
98-
"br",
99-
"button",
100-
"canvas",
101-
"caption",
102-
"cite",
103-
"code",
104-
"col",
105-
"colgroup",
106-
"data",
107-
"datalist",
108-
"dd",
109-
"del",
110-
"details",
111-
"dfn",
112-
"dialog",
113-
"div",
114-
"dl",
115-
"dt",
116-
"em",
117-
"embed",
118-
"fieldset",
119-
"figcaption",
120-
"figure",
121-
"footer",
122-
"form",
123-
"h1",
124-
"h2",
125-
"h3",
126-
"h4",
127-
"h5",
128-
"h6",
129-
"head",
130-
"header",
131-
"hgroup",
132-
"hr",
133-
"html",
134-
"i",
135-
"iframe",
136-
"img",
137-
"input",
138-
"ins",
139-
"kbd",
140-
"keygen",
141-
"label",
142-
"legend",
143-
"li",
144-
"link",
145-
"map",
146-
"mark",
147-
"menu",
148-
"meta",
149-
"meter",
150-
"nav",
151-
"noscript",
152-
"object",
153-
"ol",
154-
"optgroup",
155-
"option",
156-
"output",
157-
"p",
158-
"param",
159-
"picture",
160-
"pre",
161-
"progress",
162-
"q",
163-
"rp",
164-
"rt",
165-
"ruby",
166-
"s",
167-
"samp",
168-
"script",
169-
"section",
170-
"select",
171-
"slot",
172-
"small",
173-
"source",
174-
"span",
175-
"strong",
176-
"style",
177-
"sub",
178-
"summary",
179-
"sup",
180-
"table",
181-
"tbody",
182-
"td",
183-
"template",
184-
"textarea",
185-
"tfoot",
186-
"th",
187-
"thead",
188-
"time",
189-
"title",
190-
"tr",
191-
"track",
192-
"u",
193-
"ul",
194-
"var",
195-
"video",
196-
"wbr",
197-
]
84+
HTML_TAGS = frozenset(
85+
(
86+
"a",
87+
"abbr",
88+
"address",
89+
"area",
90+
"article",
91+
"aside",
92+
"audio",
93+
"b",
94+
"base",
95+
"bdi",
96+
"bdo",
97+
"blockquote",
98+
"body",
99+
"br",
100+
"button",
101+
"canvas",
102+
"caption",
103+
"cite",
104+
"code",
105+
"col",
106+
"colgroup",
107+
"data",
108+
"datalist",
109+
"dd",
110+
"del",
111+
"details",
112+
"dfn",
113+
"dialog",
114+
"div",
115+
"dl",
116+
"dt",
117+
"em",
118+
"embed",
119+
"fieldset",
120+
"figcaption",
121+
"figure",
122+
"footer",
123+
"form",
124+
"h1",
125+
"h2",
126+
"h3",
127+
"h4",
128+
"h5",
129+
"h6",
130+
"head",
131+
"header",
132+
"hgroup",
133+
"hr",
134+
"html",
135+
"i",
136+
"iframe",
137+
"img",
138+
"input",
139+
"ins",
140+
"kbd",
141+
"keygen",
142+
"label",
143+
"legend",
144+
"li",
145+
"link",
146+
"map",
147+
"mark",
148+
"menu",
149+
"meta",
150+
"meter",
151+
"nav",
152+
"noscript",
153+
"object",
154+
"ol",
155+
"optgroup",
156+
"option",
157+
"output",
158+
"p",
159+
"param",
160+
"picture",
161+
"pre",
162+
"progress",
163+
"q",
164+
"rp",
165+
"rt",
166+
"ruby",
167+
"s",
168+
"samp",
169+
"script",
170+
"section",
171+
"select",
172+
"slot",
173+
"small",
174+
"source",
175+
"span",
176+
"strong",
177+
"style",
178+
"sub",
179+
"summary",
180+
"sup",
181+
"table",
182+
"tbody",
183+
"td",
184+
"template",
185+
"textarea",
186+
"tfoot",
187+
"th",
188+
"thead",
189+
"time",
190+
"title",
191+
"tr",
192+
"track",
193+
"u",
194+
"ul",
195+
"var",
196+
"video",
197+
"wbr",
198+
)
199+
)
198200

199201

200202
#: List of block level HTML tags, as per https://github.com/mozilla/bleach/issues/369
201203
#: from mozilla on 2019.07.11
202204
#: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
203205
HTML_TAGS_BLOCK_LEVEL = frozenset(
204-
[
206+
(
205207
"address",
206208
"article",
207209
"aside",
@@ -235,7 +237,7 @@
235237
"section",
236238
"table",
237239
"ul",
238-
]
240+
)
239241
)
240242

241243

@@ -476,15 +478,17 @@ class BleachHTMLParser(HTMLParser):
476478

477479
def __init__(self, tags, strip, consume_entities, **kwargs):
478480
"""
479-
:arg tags: list of allowed tags--everything else is either stripped or
481+
:arg tags: set of allowed tags--everything else is either stripped or
480482
escaped; if None, then this doesn't look at tags at all
481483
:arg strip: whether to strip disallowed tags (True) or escape them (False);
482484
if tags=None, then this doesn't have any effect
483485
:arg consume_entities: whether to consume entities (default behavior) or
484486
leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)
485487
486488
"""
487-
self.tags = [tag.lower() for tag in tags] if tags is not None else None
489+
self.tags = (
490+
frozenset((tag.lower() for tag in tags)) if tags is not None else None
491+
)
488492
self.strip = strip
489493
self.consume_entities = consume_entities
490494
super().__init__(**kwargs)
@@ -694,7 +698,7 @@ def escape_base_amp(self, stoken):
694698
# Only leave entities in that are not ambiguous. If they're
695699
# ambiguous, then we escape the ampersand.
696700
if entity is not None and convert_entity(entity) is not None:
697-
yield "&" + entity + ";"
701+
yield f"&{entity};"
698702

699703
# Length of the entity plus 2--one for & at the beginning
700704
# and one for ; at the end

bleach/linkifier.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -120,17 +120,18 @@ def __init__(
120120
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
121121
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
122122
123-
:arg list skip_tags: list of tags that you don't want to linkify the
124-
contents of; for example, you could set this to ``['pre']`` to skip
125-
linkifying contents of ``pre`` tags
123+
:arg set skip_tags: set of tags that you don't want to linkify the
124+
contents of; for example, you could set this to ``{'pre'}`` to skip
125+
linkifying contents of ``pre`` tags; ``None`` means you don't
126+
want linkify to skip any tags
126127
127128
:arg bool parse_email: whether or not to linkify email addresses
128129
129130
:arg url_re: url matching regex
130131
131132
:arg email_re: email matching regex
132133
133-
:arg list recognized_tags: the list of tags that linkify knows about;
134+
:arg set recognized_tags: the set of tags that linkify knows about;
134135
everything else gets escaped
135136
136137
:returns: linkified text as unicode
@@ -145,7 +146,7 @@ def __init__(
145146
# Create a parser/tokenizer that allows all HTML tags and escapes
146147
# anything not in that list.
147148
self.parser = html5lib_shim.BleachHTMLParser(
148-
tags=recognized_tags,
149+
tags=frozenset(recognized_tags),
149150
strip=False,
150151
consume_entities=False,
151152
namespaceHTMLElements=False,
@@ -221,8 +222,8 @@ def __init__(
221222
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
222223
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
223224
224-
:arg list skip_tags: list of tags that you don't want to linkify the
225-
contents of; for example, you could set this to ``['pre']`` to skip
225+
:arg set skip_tags: set of tags that you don't want to linkify the
226+
contents of; for example, you could set this to ``{'pre'}`` to skip
226227
linkifying contents of ``pre`` tags
227228
228229
:arg bool parse_email: whether or not to linkify email addresses
@@ -235,7 +236,7 @@ def __init__(
235236
super().__init__(source)
236237

237238
self.callbacks = callbacks or []
238-
self.skip_tags = skip_tags or []
239+
self.skip_tags = skip_tags or {}
239240
self.parse_email = parse_email
240241

241242
self.url_re = url_re

0 commit comments

Comments
 (0)