From f436d5d8e07289304ebb27312a5930127e45f484 Mon Sep 17 00:00:00 2001 From: Feng Yu Date: Fri, 3 Jun 2022 22:41:56 +0800 Subject: [PATCH] url: split forbidden host/domain code points, and add all C0 controls and add U+007F to the latter --- src/node_url.cc | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 57c039b69e5176..ca4b77ce20cb25 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -165,6 +165,9 @@ enum url_cb_args { // https://infra.spec.whatwg.org/#ascii-tab-or-newline CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) +// https://infra.spec.whatwg.org/#c0-control +CHAR_TEST(8, IsC0Control, (ch >= '\0' && ch <= '\x1f')) + // https://infra.spec.whatwg.org/#c0-control-or-space CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' ')) @@ -190,12 +193,18 @@ T ASCIILowercase(T ch) { } // https://url.spec.whatwg.org/#forbidden-host-code-point -CHAR_TEST(8, IsForbiddenHostCodePoint, - ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || - ch == ' ' || ch == '#' || ch == '%' || ch == '/' || - ch == ':' || ch == '?' || ch == '@' || ch == '[' || - ch == '<' || ch == '>' || ch == '\\' || ch == ']' || - ch == '^' || ch == '|') +CHAR_TEST(8, + IsForbiddenHostCodePoint, + ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || ch == ' ' || + ch == '#' || ch == '/' || ch == ':' || ch == '?' || ch == '@' || + ch == '[' || ch == '<' || ch == '>' || ch == '\\' || ch == ']' || + ch == '^' || ch == '|') + +// https://url.spec.whatwg.org/#forbidden-domain-code-point +CHAR_TEST(8, + IsForbiddenDomainCodePoint, + IsForbiddenHostCodePoint(ch) || IsC0Control(ch) || ch == '%' || + ch == '\x7f') // https://url.spec.whatwg.org/#windows-drive-letter TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter, @@ -483,7 +492,7 @@ void URLHost::ParseOpaqueHost(const char* input, size_t length) { output.reserve(length); for (size_t i = 0; i < length; i++) { const char ch = input[i]; - if (ch != '%' && IsForbiddenHostCodePoint(ch)) { + if (IsForbiddenHostCodePoint(ch)) { return; } else { AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET); @@ -522,7 +531,7 @@ void URLHost::ParseHost(const char* input, // If any of the following characters are still present, we have to fail for (size_t n = 0; n < decoded.size(); n++) { const char ch = decoded[n]; - if (IsForbiddenHostCodePoint(ch)) { + if (IsForbiddenDomainCodePoint(ch)) { return; } }