Skip to content

Commit

Permalink
FormatURL used to escape characters in the query string that
Browse files Browse the repository at this point in the history
GURL doesn't escape when cannonicalizing, and are illegal
in the query portion of the URL according to spec.  Fix that
behavior, so if gurl is a valid url, so is GURL(FormatURL(gurl)).

BUG=90387
TEST=NetUtilTest.FormatUrlRoundTripPathASCII, NetUtilTest.FormatUrlRoundTripPathEscape, NetUtilTest.FormatUrlRoundTripQueryASCII, NetUtilTest.FormatUrlRoundTripQueryEscaped

Review URL: http://codereview.chromium.org/7649024

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@97093 0039d316-1c4b-4281-b951-d872f2087c98
  • Loading branch information
mmenke@chromium.org committed Aug 17, 2011
1 parent 740fb12 commit 86e4ff1
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 4 deletions.
12 changes: 8 additions & 4 deletions net/base/escape.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ std::string Escape(const std::string& text, const Charmap& charmap,
// you could get different behavior if you copy and paste the URL, or press
// enter in the URL bar. The list of characters that fall into this category
// are the ones labeled PASS (allow either escaped or unescaped) in the big
// lookup table at the top of googleurl/src/url_canon_path.cc
// lookup table at the top of googleurl/src/url_canon_path.cc. Also, characters
// that have CHAR_QUERY set in googleurl/src/url_canon_internal.cc but are not
// allowed in query strings according to http://www.ietf.org/rfc/rfc3261.txt are
// not unescaped, to avoid turning a valid url according to spec into an
// invalid one.
const char kUrlUnescape[128] = {
// NULL, control chars...
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Expand All @@ -92,11 +96,11 @@ const char kUrlUnescape[128] = {
// @ A B C D E F G H I J K L M N O
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// P Q R S T U V W X Y Z [ \ ] ^ _
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
// ` a b c d e f g h i j k l m n o
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// p q r s t u v w x y z { | } ~ <NBSP>
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0
};

template<typename STR>
Expand Down
74 changes: 74 additions & 0 deletions net/base/net_util_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include "net/base/net_util.h"

#include <string.h>

#include <algorithm>

#include "base/file_path.h"
Expand Down Expand Up @@ -2830,6 +2832,78 @@ TEST(NetUtilTest, FormatUrlParsed) {
formatted.substr(parsed.path.begin, parsed.path.len));
}

// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
// results in the original GURL, for each ASCII character in the path.
TEST(NetUtilTest, FormatUrlRoundTripPathASCII) {
for (unsigned char test_char = 32; test_char < 128; ++test_char) {
GURL url(std::string("http://www.google.com/") +
static_cast<char>(test_char));
size_t prefix_len;
string16 formatted = FormatUrl(
url, "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL,
&prefix_len, NULL);
EXPECT_EQ(url.spec(), GURL(formatted).spec());
}
}

// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
// results in the original GURL, for each escaped ASCII character in the path.
TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) {
for (unsigned char test_char = 32; test_char < 128; ++test_char) {
std::string original_url("http://www.google.com/");
original_url.push_back('%');
original_url.append(base::HexEncode(&test_char, 1));

GURL url(original_url);
size_t prefix_len;
string16 formatted = FormatUrl(
url, "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL,
&prefix_len, NULL);
EXPECT_EQ(url.spec(), GURL(formatted).spec());
}
}

// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
// results in the original GURL, for each ASCII character in the query.
TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) {
for (unsigned char test_char = 32; test_char < 128; ++test_char) {
GURL url(std::string("http://www.google.com/?") +
static_cast<char>(test_char));
size_t prefix_len;
string16 formatted = FormatUrl(
url, "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL,
&prefix_len, NULL);
EXPECT_EQ(url.spec(), GURL(formatted).spec());
}
}

// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
// only results in a different GURL for certain characters.
TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) {
// A full list of characters which FormatURL should unescape and GURL should
// not escape again, when they appear in a query string.
const char* kUnescapedCharacters =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~";
for (unsigned char test_char = 0; test_char < 128; ++test_char) {
std::string original_url("http://www.google.com/?");
original_url.push_back('%');
original_url.append(base::HexEncode(&test_char, 1));

GURL url(original_url);
size_t prefix_len;
string16 formatted = FormatUrl(
url, "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL,
&prefix_len, NULL);

if (test_char &&
strchr(kUnescapedCharacters, static_cast<char>(test_char))) {
EXPECT_NE(url.spec(), GURL(formatted).spec());
} else {
EXPECT_EQ(url.spec(), GURL(formatted).spec());
}
}
}

TEST(NetUtilTest, FormatUrlWithOffsets) {
const AdjustOffsetCase null_cases[] = {
{0, string16::npos},
Expand Down

0 comments on commit 86e4ff1

Please sign in to comment.