Skip to content

Commit

Permalink
Remove unused attributes from markdown parser (forem#13484)
Browse files Browse the repository at this point in the history
* Remove definitely unused attributes

* Refactor and remove unused attributes

* Add parentheses

* Prevent alt tag from having Liquid tags inside

* Add test

* Remove more unused attributes

* Scrub any valid attributes

* Remove unused tags and add more tests

* Remove <i> tag; <em> is used by parser

* Remove <center> b/c it's deprecated though still valid

* <cite> is a safe tag and used by some

* Eh I changed my mind about center lol

* Add underline <u> back in

* Don't allow width changes

* Use frozen constant for regex

* Use #remove over #gsub
  • Loading branch information
Zhao-Andy authored Apr 27, 2021
1 parent 2478bf7 commit dc30ead
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 22 deletions.
16 changes: 12 additions & 4 deletions app/sanitizers/rendered_markdown_scrubber.rb
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
class RenderedMarkdownScrubber < Rails::Html::PermitScrubber
LIQUID_TAG_SYNTAX_REGEX = /\{%|%\}/.freeze
def initialize
super

self.tags = %w[
a abbr add aside b blockquote br button center cite code col colgroup dd del dl dt em em figcaption
h1 h2 h3 h4 h5 h6 hr i img kbd li mark ol p pre q rp rt ruby small source span strong sub sup table
a abbr add b blockquote br button center cite code col colgroup dd del dl dt em figcaption
h1 h2 h3 h4 h5 h6 hr img kbd li mark ol p pre q rp rt ruby small source span strong sub sup table
tbody td tfoot th thead time tr u ul video
]

self.attributes = %w[
alt colspan data-conversation data-lang data-no-instant data-url em height href id loop
name ref rel rowspan size span src start strong title type value width controls
alt colspan data-conversation data-lang data-no-instant data-url href id loop
name ref rel rowspan span src start title type value controls
]
end

Expand All @@ -30,12 +31,19 @@ def scrub_attributes(node)

scrub_css_attribute(node)
else
scrub_valid_attributes(node)
super
end
end

private

def scrub_valid_attributes(node)
node.attributes.each_value do |attribute|
attribute.value = attribute.value.remove(LIQUID_TAG_SYNTAX_REGEX)
end
end

def inside_codeblock?(node)
node.attributes["class"]&.value&.include?("highlight") ||
(node.name == "span" && node.ancestors.first.name == "code")
Expand Down
23 changes: 10 additions & 13 deletions app/services/markdown_processor/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class Parser
].freeze

WORDS_READ_PER_MINUTE = 275.0
ALLOWED_ATTRIBUTES = %w[href src alt].freeze

def initialize(content, source: nil, user: nil)
@content = content
Expand Down Expand Up @@ -52,10 +53,9 @@ def evaluate_markdown
markdown = Redcarpet::Markdown.new(renderer, Constants::Redcarpet::CONFIG)
allowed_tags = %w[strong abbr aside em p h1 h2 h3 h4 h5 h6 i u b code pre
br ul ol li small sup sub img a span hr blockquote kbd]
allowed_attributes = %w[href strong em ref rel src title alt]
ActionController::Base.helpers.sanitize markdown.render(@content),
ActionController::Base.helpers.sanitize(markdown.render(@content),
tags: allowed_tags,
attributes: allowed_attributes
attributes: ALLOWED_ATTRIBUTES)
end

def evaluate_limited_markdown
Expand All @@ -64,10 +64,9 @@ def evaluate_limited_markdown
renderer = Redcarpet::Render::HTMLRouge.new(hard_wrap: true, filter_html: false)
markdown = Redcarpet::Markdown.new(renderer, Constants::Redcarpet::CONFIG)
allowed_tags = %w[strong i u b em p br code]
allowed_attributes = %w[href strong em ref rel src title alt]
ActionController::Base.helpers.sanitize markdown.render(@content),
ActionController::Base.helpers.sanitize(markdown.render(@content),
tags: allowed_tags,
attributes: allowed_attributes
attributes: ALLOWED_ATTRIBUTES)
end

def evaluate_inline_limited_markdown
Expand All @@ -76,10 +75,9 @@ def evaluate_inline_limited_markdown
renderer = Redcarpet::Render::HTMLRouge.new(hard_wrap: true, filter_html: false)
markdown = Redcarpet::Markdown.new(renderer, Constants::Redcarpet::CONFIG)
allowed_tags = %w[strong i u b em code]
allowed_attributes = %w[href strong em ref rel src title alt]
ActionController::Base.helpers.sanitize markdown.render(@content),
ActionController::Base.helpers.sanitize(markdown.render(@content),
tags: allowed_tags,
attributes: allowed_attributes
attributes: ALLOWED_ATTRIBUTES)
end

def evaluate_listings_markdown
Expand All @@ -89,10 +87,9 @@ def evaluate_listings_markdown
markdown = Redcarpet::Markdown.new(renderer, Constants::Redcarpet::CONFIG)
allowed_tags = %w[strong abbr aside em p h4 h5 h6 i u b code pre
br ul ol li small sup sub a span hr blockquote kbd]
allowed_attributes = %w[href strong em ref rel src title alt]
ActionController::Base.helpers.sanitize markdown.render(@content),
ActionController::Base.helpers.sanitize(markdown.render(@content),
tags: allowed_tags,
attributes: allowed_attributes
attributes: ALLOWED_ATTRIBUTES)
end

def tags_used
Expand All @@ -112,7 +109,7 @@ def tags_used
def catch_xss_attempts(markdown)
return unless markdown.match?(Regexp.union(BAD_XSS_REGEX))

raise ArgumentError, "Invalid markdown detected"
raise ArgumentError, "Invalid markdown detected!"
end

def escape_liquid_tags_in_codeblock(content)
Expand Down
32 changes: 27 additions & 5 deletions spec/services/markdown_processor/parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ def generate_and_parse_markdown(raw_markdown)
expect(test).to be(content)
end

it "permits abbr tags" do
result = generate_and_parse_markdown("<abbr title=\"ol korrect\">OK</abbr>")
expect(result).to include("<abbr title=\"ol korrect\">OK</abbr>")
end

context "when rendering links markdown" do
# the following specs are testing HTMLRouge
it "renders properly if protocol http is included" do
Expand Down Expand Up @@ -361,11 +366,6 @@ def generate_and_parse_markdown(raw_markdown)
result = generate_and_parse_markdown("- [A](#a)\n - [B](#b)\n- [C](#c)")
expect(result).not_to include("<br>")
end

it "permits abbr and aside tags" do
result = generate_and_parse_markdown("<aside><abbr title=\"ol korrect\">OK</abbr><aside>")
expect(result).to include("<aside><abbr title=\"ol korrect\">OK</abbr><aside>")
end
end

context "when word as snake case" do
Expand Down Expand Up @@ -398,4 +398,26 @@ def generate_and_parse_markdown(raw_markdown)
expect(generate_and_parse_markdown(code_block)).to include("highlight ada")
end
end

context "when using a valid attribute" do
let(:example_text) { "{% liquid example %}" }

it "prevents the attribute from having Liquid tags inside" do
text = '<img src="x" alt="{% 404/404#">%}'
expect(generate_and_parse_markdown(text)).to exclude("{%")
end

it "does not scrub attributes in inline code" do
inline_code = "`#{example_text}`"
expect(generate_and_parse_markdown(inline_code)).to include(example_text)
double_fenced_code = "``#{example_text}``"
expect(generate_and_parse_markdown(double_fenced_code)).to include(example_text)
end

it "does not scrub attributes in codeblocks" do
codeblock = "```\n#{example_text}\n```"
expect(generate_and_parse_markdown(codeblock)).to include("{%")
expect(generate_and_parse_markdown(codeblock)).to include("%}")
end
end
end

0 comments on commit dc30ead

Please sign in to comment.