Skip to content

Commit

Permalink
Revert of Improve canonicalization of mailto url path components (pat…
Browse files Browse the repository at this point in the history
…chset chromium#2 id:20001 of https://codereview.chromium.org/2817213002/ )

Reason for revert:
appears to be breaking fast/url/mailto.html on at least the Mac bots:
https://build.chromium.org/p/chromium.webkit/builders/WebKit%20Mac10.10/builds/32726
https://build.chromium.org/p/chromium.webkit/builders/WebKit%20Mac10.12/builds/1614
https://build.chromium.org/p/chromium.webkit/builders/WebKit%20Mac10.9/builds/45169

diffs:
-PASS canonicalize('mailto:addr1, addr2') is 'mailto:addr1, addr2'
+FAIL canonicalize('mailto:addr1, addr2') should be mailto:addr1, addr2. Was mailto:addr1,%20addr2.

Original issue's description:
> Improve canonicalization of mailto url path components
>
> The canonicalization of the path component of mailto urls is too lax, leading to
> information disclosure and possible command injection attacks against mail
> clients. To fix this, we will percent-encode more characters in the path
> component of mailto urls, matching other browsers.
>
> BUG=711020
> TEST=url_unittests
>
> Review-Url: https://codereview.chromium.org/2817213002
> Cr-Commit-Position: refs/heads/master@{#465046}
> Committed: https://chromium.googlesource.com/chromium/src/+/484ff36cdcb8dcf5efa999a471d1d509c0a8a5f2

TBR=brettw@chromium.org,elawrence@chromium.org
# Skipping CQ checks because original CL landed less than 1 days ago.
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=711020

Review-Url: https://codereview.chromium.org/2823883005
Cr-Commit-Position: refs/heads/master@{#465063}
  • Loading branch information
alexmos authored and Commit bot committed Apr 17, 2017
1 parent c236847 commit 468109c
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 64 deletions.
21 changes: 2 additions & 19 deletions url/url_canon_mailtourl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,6 @@ namespace url {

namespace {

// Certain characters should be percent-encoded when they appear in the path
// component of a mailto URL, to improve compatibility and mitigate against
// command-injection attacks on mailto handlers. See https://crbug.com/711020.
template <typename UCHAR>
bool ShouldEncodeMailboxCharacter(UCHAR uch) {
if (uch < 0x21 || // space & control characters.
uch > 0x7e || // high-ascii characters.
uch == 0x22 || // quote.
uch == 0x3c || uch == 0x3e || // angle brackets.
uch == 0x60 || // backtick.
uch == 0x7b || uch == 0x7c || uch == 0x7d // braces and pipe.
) {
return true;
}
return false;
}

template <typename CHAR, typename UCHAR>
bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
const Parsed& parsed,
Expand All @@ -55,12 +38,12 @@ bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
new_parsed->path.begin = output->length();

// Copy the path using path URL's more lax escaping rules.
// We convert to UTF-8 and escape non-ASCII, but leave most
// We convert to UTF-8 and escape non-ASCII, but leave all
// ASCII characters alone.
int end = parsed.path.end();
for (int i = parsed.path.begin; i < end; ++i) {
UCHAR uch = static_cast<UCHAR>(source.path[i]);
if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
if (uch < 0x20 || uch >= 0x80)
success &= AppendUTF8EscapedChar(source.path, &i, end, output);
else
output->push_back(static_cast<char>(uch));
Expand Down
59 changes: 14 additions & 45 deletions url/url_canon_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1847,51 +1847,20 @@ TEST(URLCanonTest, CanonicalizeMailtoURL) {
Component expected_path;
Component expected_query;
} cases[] = {
// Null character should be escaped to %00.
// Keep this test first in the list as it is handled specially below.
{"mailto:addr1\0addr2?foo",
"mailto:addr1%00addr2?foo",
true, Component(7, 13), Component(21, 3)},
{"mailto:addr1",
"mailto:addr1",
true, Component(7, 5), Component()},
{"mailto:addr1@foo.com",
"mailto:addr1@foo.com",
true, Component(7, 13), Component()},
{"mailto:addr1", "mailto:addr1", true, Component(7, 5), Component()},
{"mailto:addr1@foo.com", "mailto:addr1@foo.com", true, Component(7, 13), Component()},
// Trailing whitespace is stripped.
{"MaIlTo:addr1 \t ",
"mailto:addr1",
true, Component(7, 5), Component()},
{"MaIlTo:addr1?to=jon",
"mailto:addr1?to=jon",
true, Component(7, 5), Component(13,6)},
{"mailto:addr1,addr2",
"mailto:addr1,addr2",
true, Component(7, 11), Component()},
// Embedded spaces must be encoded.
{"mailto:addr1, addr2",
"mailto:addr1,%20addr2",
true, Component(7, 14), Component()},
{"mailto:addr1, addr2?subject=one two ",
"mailto:addr1,%20addr2?subject=one%20two",
true, Component(7, 14), Component(22, 17)},
{"mailto:addr1%2caddr2",
"mailto:addr1%2caddr2",
true, Component(7, 13), Component()},
{"mailto:\xF0\x90\x8C\x80",
"mailto:%F0%90%8C%80",
true, Component(7, 12), Component()},
{"MaIlTo:addr1 \t ", "mailto:addr1", true, Component(7, 5), Component()},
{"MaIlTo:addr1?to=jon", "mailto:addr1?to=jon", true, Component(7, 5), Component(13,6)},
{"mailto:addr1,addr2", "mailto:addr1,addr2", true, Component(7, 11), Component()},
{"mailto:addr1, addr2", "mailto:addr1, addr2", true, Component(7, 12), Component()},
{"mailto:addr1%2caddr2", "mailto:addr1%2caddr2", true, Component(7, 13), Component()},
{"mailto:\xF0\x90\x8C\x80", "mailto:%F0%90%8C%80", true, Component(7, 12), Component()},
// Null character should be escaped to %00
{"mailto:addr1\0addr2?foo", "mailto:addr1%00addr2?foo", true, Component(7, 13), Component(21, 3)},
// Invalid -- UTF-8 encoded surrogate value.
{"mailto:\xed\xa0\x80",
"mailto:%EF%BF%BD",
false, Component(7, 9), Component()},
{"mailto:addr1?",
"mailto:addr1?",
true, Component(7, 5), Component(13, 0)},
// Certain characters have special meanings and must be encoded.
{"mailto:! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~\x7f?Query! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~",
"mailto:!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_%60az%7B%7C%7D~%7F?Query!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_`az{|}~",
true, Component(7, 53), Component(61, 47)},
{"mailto:\xed\xa0\x80", "mailto:%EF%BF%BD", false, Component(7, 9), Component()},
{"mailto:addr1?", "mailto:addr1?", true, Component(7, 5), Component(13, 0)},
};

// Define outside of loop to catch bugs where components aren't reset
Expand All @@ -1900,8 +1869,8 @@ TEST(URLCanonTest, CanonicalizeMailtoURL) {

for (size_t i = 0; i < arraysize(cases); i++) {
int url_len = static_cast<int>(strlen(cases[i].input));
if (i == 0) {
// The first test case purposely has a '\0' in it -- don't count it
if (i == 8) {
// The 9th test case purposely has a '\0' in it -- don't count it
// as the string terminator.
url_len = 22;
}
Expand Down

0 comments on commit 468109c

Please sign in to comment.