Skip to content

Commit

Permalink
Support unescape plus in query parameters (#60)
Browse files Browse the repository at this point in the history
* Support unescape plus in query parameters

Signed-off-by: Wayne Zhang <qiwzhang@google.com>

* update per comment

Signed-off-by: Wayne Zhang <qiwzhang@google.com>
  • Loading branch information
qiwzhang authored Sep 22, 2021
1 parent f1591a4 commit 3127eea
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 27 deletions.
65 changes: 43 additions & 22 deletions src/include/grpc_transcoding/path_matcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ class PathMatcher {
// The info associated with each method. The path matcher nodes
// will hold pointers to MethodData objects in this vector.
std::vector<std::unique_ptr<MethodData>> methods_;
UrlUnescapeSpec unescape_spec_;
UrlUnescapeSpec path_unescape_spec_;
bool query_param_unescape_plus_;

private:
friend class PathMatcherBuilder<Method>;
Expand Down Expand Up @@ -127,8 +128,14 @@ class PathMatcherBuilder {
const std::string& body_field_path, Method method);

// Change unescaping behavior, see UrlUnescapeSpec for available options.
void SetUrlUnescapeSpec(UrlUnescapeSpec unescape_spec) {
unescape_spec_ = unescape_spec;
// This only applies to path, not query parameters.
void SetUrlUnescapeSpec(UrlUnescapeSpec path_unescape_spec) {
path_unescape_spec_ = path_unescape_spec;
}
// If true, unescape '+' in query parameters to space. Default is false.
// To support [HTML 2.0 or RFC1866](https://tools.ietf.org/html/rfc1866#section-8.2.1).
void SetQueryParamUnescapePlus(bool query_param_unescape_plus) {
query_param_unescape_plus_ = query_param_unescape_plus;
}

// Returns a unique_ptr to a thread safe PathMatcher that contains all
Expand All @@ -151,8 +158,9 @@ class PathMatcherBuilder {
std::unordered_set<std::string> custom_verbs_;
typedef typename PathMatcher<Method>::MethodData MethodData;
std::vector<std::unique_ptr<MethodData>> methods_;
UrlUnescapeSpec unescape_spec_ =
UrlUnescapeSpec path_unescape_spec_ =
UrlUnescapeSpec::kAllCharactersExceptReserved;
bool query_param_unescape_plus_ = false;

friend class PathMatcher<Method>;
};
Expand Down Expand Up @@ -222,44 +230,53 @@ inline int hex_digit_to_int(char c) {
//
// If the next three characters are an escaped character then this function will
// also return what character is escaped.
bool GetEscapedChar(const std::string& src, size_t i,
UrlUnescapeSpec unescape_spec, char* out) {
//
// If unescape_plus is true, unescape '+' to space.
//
// return value: 0: not unescaped, >0: unescaped, number of used original characters.
//
int GetEscapedChar(const std::string& src, size_t i,
UrlUnescapeSpec unescape_spec, bool unescape_plus, char* out) {
if (unescape_plus && src[i] == '+') {
*out = ' ';
return 1;
}
if (i + 2 < src.size() && src[i] == '%') {
if (ascii_isxdigit(src[i + 1]) && ascii_isxdigit(src[i + 2])) {
char c =
(hex_digit_to_int(src[i + 1]) << 4) | hex_digit_to_int(src[i + 2]);
switch (unescape_spec) {
case UrlUnescapeSpec::kAllCharactersExceptReserved:
if (IsReservedChar(c)) {
return false;
return 0;
}
break;
case UrlUnescapeSpec::kAllCharactersExceptSlash:
if (c == '/') {
return false;
return 0;
}
break;
case UrlUnescapeSpec::kAllCharacters:
break;
}
*out = c;
return true;
return 3;
}
}
return false;
return 0;
}

// Unescapes string 'part' and returns the unescaped string. Reserved characters
// (as specified in RFC 6570) are not escaped if unescape_reserved_chars is
// false.
std::string UrlUnescapeString(const std::string& part,
UrlUnescapeSpec unescape_spec) {
UrlUnescapeSpec unescape_spec, bool unescape_plus) {
std::string unescaped;
// Check whether we need to escape at all.
bool needs_unescaping = false;
char ch = '\0';
for (size_t i = 0; i < part.size(); ++i) {
if (GetEscapedChar(part, i, unescape_spec, &ch)) {
if (GetEscapedChar(part, i, unescape_spec, unescape_plus, &ch) > 0) {
needs_unescaping = true;
break;
}
Expand All @@ -275,9 +292,10 @@ std::string UrlUnescapeString(const std::string& part,
char* p = begin;

for (size_t i = 0; i < part.size();) {
if (GetEscapedChar(part, i, unescape_spec, &ch)) {
int skip = GetEscapedChar(part, i, unescape_spec, unescape_plus, &ch);
if (skip > 0) {
*p++ = ch;
i += 3;
i += skip;
} else {
*p++ = part[i];
i += 1;
Expand All @@ -291,8 +309,8 @@ std::string UrlUnescapeString(const std::string& part,
template <class VariableBinding>
void ExtractBindingsFromPath(const std::vector<HttpTemplate::Variable>& vars,
const std::vector<std::string>& parts,
std::vector<VariableBinding>* bindings,
UrlUnescapeSpec unescape_spec) {
UrlUnescapeSpec unescape_spec,
std::vector<VariableBinding>* bindings) {
for (const auto& var : vars) {
// Determine the subpath bound to the variable based on the
// [start_segment, end_segment) segment range of the variable.
Expand All @@ -316,7 +334,7 @@ void ExtractBindingsFromPath(const std::vector<HttpTemplate::Variable>& vars,
// Joins parts with "/" to form a path string.
for (size_t i = var.start_segment; i < end_segment; ++i) {
// For multipart matches only unescape non-reserved characters.
binding.value += UrlUnescapeString(parts[i], var_unescape_spec);
binding.value += UrlUnescapeString(parts[i], var_unescape_spec, false);
if (i < end_segment - 1) {
binding.value += "/";
}
Expand All @@ -329,6 +347,7 @@ template <class VariableBinding>
void ExtractBindingsFromQueryParameters(
const std::string& query_params,
const std::unordered_set<std::string>& system_params,
bool query_param_unescape_plus,
std::vector<VariableBinding>* bindings) {
// The bindings in URL the query parameters have the following form:
// <field_path1>=value1&<field_path2>=value2&...&<field_pathN>=valueN
Expand All @@ -350,7 +369,8 @@ void ExtractBindingsFromQueryParameters(
VariableBinding binding;
split(name, '.', binding.field_path);
binding.value = UrlUnescapeString(param.substr(pos + 1),
UrlUnescapeSpec::kAllCharacters);
UrlUnescapeSpec::kAllCharacters,
query_param_unescape_plus);
bindings->emplace_back(std::move(binding));
}
}
Expand Down Expand Up @@ -424,7 +444,8 @@ PathMatcher<Method>::PathMatcher(PathMatcherBuilder<Method>&& builder)
: root_ptr_(std::move(builder.root_ptr_)),
custom_verbs_(std::move(builder.custom_verbs_)),
methods_(std::move(builder.methods_)),
unescape_spec_(builder.unescape_spec_) {}
path_unescape_spec_(builder.path_unescape_spec_),
query_param_unescape_plus_(builder.query_param_unescape_plus_) {}

// Lookup is a wrapper method for the recursive node Lookup. First, the wrapper
// splits the request path into slash-separated path parts. Next, the method
Expand Down Expand Up @@ -460,11 +481,11 @@ Method PathMatcher<Method>::Lookup(
MethodData* method_data = reinterpret_cast<MethodData*>(lookup_result.data);
if (variable_bindings != nullptr) {
variable_bindings->clear();
ExtractBindingsFromPath(method_data->variables, parts, variable_bindings,
unescape_spec_);
ExtractBindingsFromPath(method_data->variables, parts,
path_unescape_spec_, variable_bindings);
ExtractBindingsFromQueryParameters(
query_params, method_data->system_query_parameter_names,
variable_bindings);
query_param_unescape_plus_, variable_bindings);
}
if (body_field_path != nullptr) {
*body_field_path = method_data->body_field_path;
Expand Down
52 changes: 47 additions & 5 deletions test/path_matcher_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ class PathMatcherTest : public ::testing::Test {
builder_.SetUrlUnescapeSpec(unescape_spec);
}

void SetQueryParamUnescapePlus(bool query_param_unescape_plus) {
builder_.SetQueryParamUnescapePlus(query_param_unescape_plus);
}

void Build() { matcher_ = builder_.Build(); }

MethodInfo* LookupWithBodyFieldPath(std::string method, std::string path,
Expand Down Expand Up @@ -320,9 +324,10 @@ TEST_F(PathMatcherTest, PercentEscapesUnescapedForSingleSegment) {
EXPECT_NE(nullptr, a_c);

Bindings bindings;
EXPECT_EQ(Lookup("GET", "/a/p%20q%2Fr/c", &bindings), a_c);
// Also test '+', make sure it is not unescaped
EXPECT_EQ(Lookup("GET", "/a/p%20q%2Fr+/c", &bindings), a_c);
EXPECT_EQ(Bindings({
Binding{FieldPath{"x"}, "p q/r"},
Binding{FieldPath{"x"}, "p q/r+"},
}),
bindings);
}
Expand Down Expand Up @@ -383,9 +388,10 @@ TEST_F(PathMatcherTest, PercentEscapesNotUnescapedForMultiSegment2) {
EXPECT_NE(nullptr, a__c);

Bindings bindings;
EXPECT_EQ(Lookup("GET", "/a/p/foo%20foo/q/bar%2Fbar/c", &bindings), a__c);
// space (%20) is escaped, but slash (%2F) isn't.
EXPECT_EQ(Bindings({Binding{FieldPath{"x"}, "p/foo foo/q/bar%2Fbar"}}),
// Also test '+', make sure it is not unescaped
EXPECT_EQ(Lookup("GET", "/a/p/foo%20foo/q/bar%2Fbar+/c", &bindings), a__c);
// space (%20) is unescaped, but slash (%2F) isn't. nor +
EXPECT_EQ(Bindings({Binding{FieldPath{"x"}, "p/foo foo/q/bar%2Fbar+"}}),
bindings);
}

Expand Down Expand Up @@ -819,6 +825,42 @@ TEST_F(PathMatcherTest, VariableBindingsWithQueryParamsEncoding) {
bindings);
}

TEST_F(PathMatcherTest, QueryParameterNotUnescapePlus) {
MethodInfo* a = AddGetPath("/a");
Build();

EXPECT_NE(nullptr, a);

Bindings bindings;
// The bindings from the query parameters "x=Hello+world&y=%2B+%20"
// By default, only unescape percent-encoded %HH, but not '+'
EXPECT_EQ(LookupWithParams("GET", "/a", "x=Hello+world&y=%2B+%20", &bindings), a);
EXPECT_EQ(Bindings({
Binding{FieldPath{"x"}, "Hello+world"},
Binding{FieldPath{"y"}, "++ "},
}),
bindings);
}

TEST_F(PathMatcherTest, QueryParameterUnescapePlus) {
MethodInfo* a = AddGetPath("/a");
// Enable query_param_unescape_plus to unescape '+'
SetQueryParamUnescapePlus(true);
Build();

EXPECT_NE(nullptr, a);

Bindings bindings;
// The bindings from the query parameters "x=Hello+world&y=%2B+%20"
// Unescape percent-encoded %HH, and convert '+' to space
EXPECT_EQ(LookupWithParams("GET", "/a", "x=Hello+world&y=%2B+%20", &bindings), a);
EXPECT_EQ(Bindings({
Binding{FieldPath{"x"}, "Hello world"},
Binding{FieldPath{"y"}, "+ "},
}),
bindings);
}

TEST_F(PathMatcherTest, VariableBindingsWithQueryParamsAndSystemParams) {
std::unordered_set<std::string> system_params{"key", "api_key"};
MethodInfo* a_b = AddPathWithSystemParams("GET", "/a/{x}/b", &system_params);
Expand Down

0 comments on commit 3127eea

Please sign in to comment.