diff --git a/net/base/escape.cc b/net/base/escape.cc index 7dd0ba222e8a97..75eba946cf2565 100644 --- a/net/base/escape.cc +++ b/net/base/escape.cc @@ -80,7 +80,11 @@ std::string Escape(const std::string& text, const Charmap& charmap, // you could get different behavior if you copy and paste the URL, or press // enter in the URL bar. The list of characters that fall into this category // are the ones labeled PASS (allow either escaped or unescaped) in the big -// lookup table at the top of googleurl/src/url_canon_path.cc +// lookup table at the top of googleurl/src/url_canon_path.cc. Also, characters +// that have CHAR_QUERY set in googleurl/src/url_canon_internal.cc but are not +// allowed in query strings according to http://www.ietf.org/rfc/rfc3261.txt are +// not unescaped, to avoid turning a valid url according to spec into an +// invalid one. const char kUrlUnescape[128] = { // NULL, control chars... 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -92,11 +96,11 @@ const char kUrlUnescape[128] = { // @ A B C D E F G H I J K L M N O 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // P Q R S T U V W X Y Z [ \ ] ^ _ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // ` a b c d e f g h i j k l m n o - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // p q r s t u v w x y z { | } ~ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 }; template diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index fe180590362683..4add9832601e83 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -4,6 +4,8 @@ #include "net/base/net_util.h" +#include + #include #include "base/file_path.h" @@ -2830,6 +2832,78 @@ TEST(NetUtilTest, FormatUrlParsed) { formatted.substr(parsed.path.begin, parsed.path.len)); } +// Make sure that calling FormatUrl on a GURL and then converting back to a GURL +// results in the original GURL, for each ASCII character in the path. +TEST(NetUtilTest, FormatUrlRoundTripPathASCII) { + for (unsigned char test_char = 32; test_char < 128; ++test_char) { + GURL url(std::string("http://www.google.com/") + + static_cast(test_char)); + size_t prefix_len; + string16 formatted = FormatUrl( + url, "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, + &prefix_len, NULL); + EXPECT_EQ(url.spec(), GURL(formatted).spec()); + } +} + +// Make sure that calling FormatUrl on a GURL and then converting back to a GURL +// results in the original GURL, for each escaped ASCII character in the path. +TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) { + for (unsigned char test_char = 32; test_char < 128; ++test_char) { + std::string original_url("http://www.google.com/"); + original_url.push_back('%'); + original_url.append(base::HexEncode(&test_char, 1)); + + GURL url(original_url); + size_t prefix_len; + string16 formatted = FormatUrl( + url, "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, + &prefix_len, NULL); + EXPECT_EQ(url.spec(), GURL(formatted).spec()); + } +} + +// Make sure that calling FormatUrl on a GURL and then converting back to a GURL +// results in the original GURL, for each ASCII character in the query. +TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) { + for (unsigned char test_char = 32; test_char < 128; ++test_char) { + GURL url(std::string("http://www.google.com/?") + + static_cast(test_char)); + size_t prefix_len; + string16 formatted = FormatUrl( + url, "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, + &prefix_len, NULL); + EXPECT_EQ(url.spec(), GURL(formatted).spec()); + } +} + +// Make sure that calling FormatUrl on a GURL and then converting back to a GURL +// only results in a different GURL for certain characters. +TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) { + // A full list of characters which FormatURL should unescape and GURL should + // not escape again, when they appear in a query string. + const char* kUnescapedCharacters = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~"; + for (unsigned char test_char = 0; test_char < 128; ++test_char) { + std::string original_url("http://www.google.com/?"); + original_url.push_back('%'); + original_url.append(base::HexEncode(&test_char, 1)); + + GURL url(original_url); + size_t prefix_len; + string16 formatted = FormatUrl( + url, "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, + &prefix_len, NULL); + + if (test_char && + strchr(kUnescapedCharacters, static_cast(test_char))) { + EXPECT_NE(url.spec(), GURL(formatted).spec()); + } else { + EXPECT_EQ(url.spec(), GURL(formatted).spec()); + } + } +} + TEST(NetUtilTest, FormatUrlWithOffsets) { const AdjustOffsetCase null_cases[] = { {0, string16::npos},