Skip to content

Commit 8f608b7

Browse files
committed
Merge pull request gjtorikian#98 from aroben/configurable-schemes
Allow customization of allowed <a href> URL schemes
2 parents 891463f + 99bc00e commit 8f608b7

File tree

2 files changed

+77
-5
lines changed

2 files changed

+77
-5
lines changed

lib/html/pipeline/sanitization_filter.rb

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,13 @@ class Pipeline
1515
# https://github.com/rgrove/sanitize/#readme
1616
#
1717
# Context options:
18-
# :whitelist - The sanitizer whitelist configuration to use. This can be one
19-
# of the options constants defined in this class or a custom
20-
# sanitize options hash.
18+
# :whitelist - The sanitizer whitelist configuration to use. This
19+
# can be one of the options constants defined in this
20+
# class or a custom sanitize options hash.
21+
# :anchor_schemes - The URL schemes to allow in <a href> attributes. The
22+
# default set is provided in the ANCHOR_SCHEMES
23+
# constant in this class. If passed, this overrides any
24+
# schemes specified in the whitelist configuration.
2125
#
2226
# This filter does not write additional information to the context.
2327
class SanitizationFilter < Filter
@@ -32,6 +36,9 @@ class SanitizationFilter < Filter
3236
TABLE = 'table'.freeze
3337
TABLE_SECTIONS = Set.new(%w(thead tbody tfoot).freeze)
3438

39+
# These schemes are the only ones allowed in <a href> attributes by default.
40+
ANCHOR_SCHEMES = ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac'].freeze
41+
3542
# The main sanitization whitelist. Only these elements and attributes are
3643
# allowed through by default.
3744
WHITELIST = {
@@ -64,7 +71,7 @@ class SanitizationFilter < Filter
6471
'vspace', 'width', 'itemprop']
6572
},
6673
:protocols => {
67-
'a' => {'href' => ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']},
74+
'a' => {'href' => ANCHOR_SCHEMES},
6875
'img' => {'src' => ['http', 'https', :relative]}
6976
},
7077
:transformers => [
@@ -104,7 +111,13 @@ def call
104111
# The whitelist to use when sanitizing. This can be passed in the context
105112
# hash to the filter but defaults to WHITELIST constant value above.
106113
def whitelist
107-
context[:whitelist] || WHITELIST
114+
whitelist = context[:whitelist] || WHITELIST
115+
anchor_schemes = context[:anchor_schemes]
116+
return whitelist unless anchor_schemes
117+
whitelist = whitelist.dup
118+
whitelist[:protocols] = (whitelist[:protocols] || {}).dup
119+
whitelist[:protocols]['a'] = (whitelist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
120+
whitelist
108121
end
109122
end
110123
end

test/html/pipeline/sanitization_filter_test.rb

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,65 @@ def test_github_specific_protocols_are_not_removed
4545
assert_equal stuff, SanitizationFilter.call(stuff).to_s
4646
end
4747

48+
def test_unknown_schemes_are_removed
49+
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
50+
html = SanitizationFilter.call(stuff).to_s
51+
assert_equal '<a>Wat</a> is this', html
52+
end
53+
54+
def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes
55+
stuff = '<a href="http://www.example.com/">No href for you</a>'
56+
filter = SanitizationFilter.new(stuff, {:anchor_schemes => []})
57+
html = filter.call.to_s
58+
assert_equal '<a>No href for you</a>', html
59+
end
60+
61+
def test_custom_anchor_schemes_are_not_removed
62+
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
63+
filter = SanitizationFilter.new(stuff, {:anchor_schemes => ['something-weird']})
64+
html = filter.call.to_s
65+
assert_equal stuff, html
66+
end
67+
68+
def test_anchor_schemes_are_merged_with_other_anchor_restrictions
69+
stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
70+
whitelist = {
71+
:elements => ['a'],
72+
:attributes => {'a' => ['href', 'ping']},
73+
:protocols => {'a' => {'ping' => ['http']}}
74+
}
75+
filter = SanitizationFilter.new(stuff, {:whitelist => whitelist, :anchor_schemes => ['something-weird']})
76+
html = filter.call.to_s
77+
assert_equal '<a href="something-weird://heyyy">Wat</a> is this', html
78+
end
79+
80+
def test_uses_anchor_schemes_from_whitelist_when_not_separately_specified
81+
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
82+
whitelist = {
83+
:elements => ['a'],
84+
:attributes => {'a' => ['href']},
85+
:protocols => {'a' => {'href' => ['something-weird']}}
86+
}
87+
filter = SanitizationFilter.new(stuff, {:whitelist => whitelist})
88+
html = filter.call.to_s
89+
assert_equal stuff, html
90+
end
91+
92+
def test_whitelist_contains_default_anchor_schemes
93+
assert_equal SanitizationFilter::WHITELIST[:protocols]['a']['href'], ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
94+
end
95+
96+
def test_whitelist_from_full_constant
97+
stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
98+
filter = SanitizationFilter.new(stuff, :whitelist => SanitizationFilter::FULL)
99+
html = filter.call.to_s
100+
assert_equal 'Wat is this', html
101+
end
102+
103+
def test_exports_default_anchor_schemes
104+
assert_equal SanitizationFilter::ANCHOR_SCHEMES, ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
105+
end
106+
48107
def test_script_contents_are_removed
49108
orig = '<script>JavaScript!</script>'
50109
assert_equal "", SanitizationFilter.call(orig).to_s

0 commit comments

Comments
 (0)