Skip to content

Commit

Permalink
http: Use GURL as HTTP URL parser utility (envoyproxy#11670)
Browse files Browse the repository at this point in the history
This replaces http_parser's URL parser with GURL.

Risk Level: Medium
Testing: Unit
Docs Changes: N/A
Release Notes: N/A

Signed-off-by: Dhi Aurrahman <dio@tetrate.io>
Signed-off-by: scheler <santosh.cheler@appdynamics.com>
  • Loading branch information
dio authored and scheler committed Aug 4, 2020
1 parent 480c451 commit 67d58ae
Show file tree
Hide file tree
Showing 21 changed files with 454 additions and 111 deletions.
119 changes: 119 additions & 0 deletions bazel/external/googleurl.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# TODO(dio): Consider to remove this patch when we have the ability to compile the project using
# clang-cl. Tracked in https://github.com/envoyproxy/envoy/issues/11974.

diff --git a/base/compiler_specific.h b/base/compiler_specific.h
index 2962537..6193b56 100644
--- a/base/compiler_specific.h
+++ b/base/compiler_specific.h
@@ -7,10 +7,6 @@

#include "build/build_config.h"

-#if defined(COMPILER_MSVC) && !defined(__clang__)
-#error "Only clang-cl is supported on Windows, see https://crbug.com/988071"
-#endif
-
// Annotate a variable indicating it's ok if the variable is not used.
// (Typically used to silence a compiler warning when the assignment
// is important for some other reason.)
@@ -212,7 +208,9 @@
#endif
#endif

-#if defined(__clang__) && __has_attribute(uninitialized)
+#if defined(__clang__)
+#if defined(__has_attribute)
+#if __has_attribute(uninitialized)
// Attribute "uninitialized" disables -ftrivial-auto-var-init=pattern for
// the specified variable.
// Library-wide alternative is
@@ -243,6 +241,8 @@
// E.g. platform, bot, benchmark or test name in patch description or next to
// the attribute.
#define STACK_UNINITIALIZED __attribute__((uninitialized))
+#endif
+#endif
#else
#define STACK_UNINITIALIZED
#endif
diff --git a/base/strings/BUILD b/base/strings/BUILD
index 7a06170..7c86a5f 100644
--- a/base/strings/BUILD
+++ b/base/strings/BUILD
@@ -6,23 +6,21 @@ load("//:build_config.bzl", "build_config")
cc_library(
name = "strings",
srcs = [
- "string16.cc",
"string_piece.cc",
"string_util.cc",
"string_util_constants.cc",
"utf_string_conversion_utils.cc",
"utf_string_conversions.cc",
- ],
+ ] + build_config.strings_srcs,
hdrs = [
"char_traits.h",
"string16.h",
"string_piece.h",
"string_piece_forward.h",
"string_util.h",
- "string_util_posix.h",
"utf_string_conversion_utils.h",
"utf_string_conversions.h",
- ],
+ ] + build_config.strings_hdrs,
copts = build_config.default_copts,
visibility = ["//visibility:public"],
deps = [
diff --git a/build_config.bzl b/build_config.bzl
index d5fca65..fc0d7e5 100644
--- a/build_config/build_config.bzl
+++ b/build_config/build_config.bzl
@@ -1,8 +1,25 @@
-_default_copts = [
- "-std=c++14",
- "-fno-strict-aliasing",
-]
+_default_copts = select({
+ "@envoy//bazel:windows_x86_64": [
+ "/std:c++14",
+ ],
+ "//conditions:default": [
+ "-std=c++14",
+ "-fno-strict-aliasing",
+ ],
+})
+
+_strings_srcs = select({
+ "@envoy//bazel:windows_x86_64": [],
+ "//conditions:default": ["string16.cc"],
+})
+
+_strings_hdrs = select({
+ "@envoy//bazel:windows_x86_64": ["string_util_win.h"],
+ "//conditions:default": ["string_util_posix.h"],
+})

build_config = struct(
default_copts = _default_copts,
+ strings_srcs = _strings_srcs,
+ strings_hdrs = _strings_hdrs,
)
diff --git a/url/BUILD b/url/BUILD
index 0126bdc..5d1a171 100644
--- a/url/BUILD
+++ b/url/BUILD
@@ -43,11 +43,11 @@ cc_library(
"url_util.h",
],
copts = build_config.default_copts,
- linkopts = ["-licuuc"],
visibility = ["//visibility:public"],
deps = [
"//base",
"//base/strings",
"//polyfills",
+ "@org_unicode_icuuc//:common",
],
)
59 changes: 59 additions & 0 deletions bazel/external/icuuc.BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
load("@rules_cc//cc:defs.bzl", "cc_library")

licenses(["notice"]) # Apache 2

exports_files([
"icu4c/LICENSE",
"icu4j/main/shared/licenses/LICENSE",
])

icuuc_copts = [
"-DU_STATIC_IMPLEMENTATION",
"-DU_COMMON_IMPLEMENTATION",
"-DU_HAVE_STD_ATOMICS",
] + select({
"@envoy//bazel:apple": [
"-Wno-shorten-64-to-32",
"-Wno-unused-variable",
],
"@envoy//bazel:windows_x86_64": [
"/utf-8",
"/DLOCALE_ALLOW_NEUTRAL_NAMES=0",
],
# TODO(dio): Add "@envoy//bazel:android" when we have it.
# "@envoy//bazel:android": [
# "-fdata-sections",
# "-DU_HAVE_NL_LANGINFO_CODESET=0",
# "-Wno-deprecated-declarations",
# ],
"//conditions:default": [],
})

cc_library(
name = "headers",
hdrs = glob(["icu4c/source/common/unicode/*.h"]),
includes = ["icu4c/source/common"],
visibility = ["//visibility:public"],
)

cc_library(
name = "common",
hdrs = glob(["icu4c/source/common/unicode/*.h"]),
includes = ["icu4c/source/common"],
visibility = ["//visibility:public"],
deps = [":icuuc"],
)

cc_library(
name = "icuuc",
srcs = glob([
"icu4c/source/common/*.c",
"icu4c/source/common/*.cpp",
"icu4c/source/stubdata/*.cpp",
]),
hdrs = glob(["icu4c/source/common/*.h"]),
copts = icuuc_copts,
tags = ["requires-rtti"],
visibility = ["//visibility:private"],
deps = [":headers"],
)
14 changes: 14 additions & 0 deletions bazel/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def envoy_dependencies(skip_targets = []):
_repository_impl("bazel_compdb")
_repository_impl("envoy_build_tools")
_repository_impl("rules_cc")
_org_unicode_icuuc()

# Unconditional, since we use this only for compiler-agnostic fuzzing utils.
_org_llvm_releases_compiler_rt()
Expand Down Expand Up @@ -699,6 +700,8 @@ def _com_googlesource_quiche():
def _com_googlesource_googleurl():
_repository_impl(
name = "com_googlesource_googleurl",
patches = ["@envoy//bazel/external:googleurl.patch"],
patch_args = ["-p1"],
)
native.bind(
name = "googleurl",
Expand Down Expand Up @@ -858,6 +861,17 @@ filegroup(
**_get_location("kafka_python_client")
)

def _org_unicode_icuuc():
_repository_impl(
name = "org_unicode_icuuc",
build_file = "@envoy//bazel/external:icuuc.BUILD",
# TODO(dio): Consider patching udata when we need to embed some data.
)
native.bind(
name = "icuuc",
actual = "@org_unicode_icuuc//:common",
)

def _foreign_cc_dependencies():
_repository_impl("rules_foreign_cc")

Expand Down
7 changes: 7 additions & 0 deletions bazel/repository_locations.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -477,4 +477,11 @@ DEPENDENCY_REPOSITORIES = dict(
urls = ["https://github.com/dpkp/kafka-python/archive/2.0.0.tar.gz"],
use_category = ["test"],
),
org_unicode_icuuc = dict(
strip_prefix = "icu-release-64-2",
sha256 = "524960ac99d086cdb6988d2a92fc163436fd3c6ec0a84c475c6382fbf989be05",
urls = ["https://github.com/unicode-org/icu/archive/release-64-2.tar.gz"],
use_category = ["dataplane"],
cpe = "cpe:2.3:a:icu-project:international_components_for_unicode",
),
)
15 changes: 14 additions & 1 deletion source/common/http/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,6 @@ envoy_cc_library(
hdrs = ["utility.h"],
external_deps = [
"abseil_optional",
"http_parser",
"nghttp2",
],
deps = [
Expand Down Expand Up @@ -428,3 +427,17 @@ envoy_cc_library(
"//source/common/common:assert_lib",
],
)

envoy_cc_library(
name = "url_utility_lib",
srcs = ["url_utility.cc"],
hdrs = ["url_utility.h"],
external_deps = [
"googleurl",
],
deps = [
"//source/common/common:assert_lib",
"//source/common/common:empty_string",
"//source/common/common:utility_lib",
],
)
1 change: 1 addition & 0 deletions source/common/http/http1/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ envoy_cc_library(
"//source/common/http:header_utility_lib",
"//source/common/http:headers_lib",
"//source/common/http:status_lib",
"//source/common/http:url_utility_lib",
"//source/common/http:utility_lib",
"//source/common/runtime:runtime_features_lib",
"@envoy_api//envoy/config/core/v3:pkg_cc_proto",
Expand Down
1 change: 1 addition & 0 deletions source/common/http/http1/codec_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "common/http/header_utility.h"
#include "common/http/headers.h"
#include "common/http/http1/header_formatter.h"
#include "common/http/url_utility.h"
#include "common/http/utility.h"
#include "common/runtime/runtime_features.h"

Expand Down
95 changes: 95 additions & 0 deletions source/common/http/url_utility.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#include "common/http/url_utility.h"

#include <sys/types.h>

#include <iostream>
#include <string>

#include "common/common/assert.h"
#include "common/common/empty_string.h"
#include "common/common/utility.h"

#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"

namespace Envoy {
namespace Http {
namespace Utility {

bool Url::initialize(absl::string_view absolute_url, bool is_connect) {
// TODO(dio): When we have access to base::StringPiece, probably we can convert absolute_url to
// that instead.
GURL parsed(std::string{absolute_url});
if (is_connect) {
return initializeForConnect(std::move(parsed));
}

// TODO(dio): Check if we need to accommodate to strictly validate only http(s) AND ws(s) schemes.
// Currently, we only accept http(s).
if (!parsed.is_valid() || !parsed.SchemeIsHTTPOrHTTPS()) {
return false;
}

scheme_ = parsed.scheme();

// Only non-default ports will be rendered as part of host_and_port_. For example,
// http://www.host.com:80 has port component (i.e. 80). However, since 80 is a default port for
// http scheme, host_and_port_ will be rendered as www.host.com (without port). The same case with
// https scheme (with port 443) as well.
host_and_port_ =
absl::StrCat(parsed.host(), parsed.has_port() ? ":" : EMPTY_STRING, parsed.port());

const int port = parsed.EffectiveIntPort();
if (port <= 0 || port > std::numeric_limits<uint16_t>::max()) {
return false;
}
port_ = static_cast<uint16_t>(port);

// RFC allows the absolute URI to not end in "/", but the absolute path form must start with "/".
path_and_query_params_ = parsed.PathForRequest();
if (parsed.has_ref()) {
absl::StrAppend(&path_and_query_params_, "#", parsed.ref());
}

return true;
}

bool Url::initializeForConnect(GURL&& url) {
// CONNECT requests can only contain "hostname:port"
// https://github.com/nodejs/http-parser/blob/d9275da4650fd1133ddc96480df32a9efe4b059b/http_parser.c#L2503-L2506.
if (!url.is_valid() || url.IsStandard()) {
return false;
}

const auto& parsed = url.parsed_for_possibly_invalid_spec();
// The parsed.scheme contains the URL's hostname (stored by GURL). While host and port have -1
// as its length.
if (parsed.scheme.len <= 0 || parsed.host.len > 0 || parsed.port.len > 0) {
return false;
}

host_and_port_ = url.possibly_invalid_spec();
const auto& parts = StringUtil::splitToken(host_and_port_, ":", /*keep_empty_string=*/true,
/*trim_whitespace=*/false);
if (parts.size() != 2 || static_cast<size_t>(parsed.scheme.len) != parts.at(0).size() ||
!validPortForConnect(parts.at(1))) {
return false;
}

return true;
}

bool Url::validPortForConnect(absl::string_view port_string) {
int port;
const bool valid = absl::SimpleAtoi(port_string, &port);
// Only a port value in valid range (1-65535) is allowed.
if (!valid || port <= 0 || port > std::numeric_limits<uint16_t>::max()) {
return false;
}
port_ = static_cast<uint16_t>(port);
return true;
}

} // namespace Utility
} // namespace Http
} // namespace Envoy
Loading

0 comments on commit 67d58ae

Please sign in to comment.