From 382f5ce703ac3bb8466bd158c3e65a7bac7d22ee Mon Sep 17 00:00:00 2001 From: Feng Yu Date: Fri, 3 Jun 2022 22:41:56 +0800 Subject: [PATCH] url: split forbidden host/domain code points, and add all C0 controls and add U+007F to the latter --- src/node_url.cc | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index a7a8aefeb3c71e..5519df92d80e0b 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -165,6 +165,9 @@ enum url_cb_args { // https://infra.spec.whatwg.org/#ascii-tab-or-newline CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) +// https://infra.spec.whatwg.org/#c0-control +CHAR_TEST(8, IsC0Control, (ch >= '\0' && ch <= '\x1f')) + // https://infra.spec.whatwg.org/#c0-control-or-space CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' ')) @@ -190,12 +193,18 @@ T ASCIILowercase(T ch) { } // https://url.spec.whatwg.org/#forbidden-host-code-point -CHAR_TEST(8, IsForbiddenHostCodePoint, - ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || - ch == ' ' || ch == '#' || ch == '%' || ch == '/' || - ch == ':' || ch == '?' || ch == '@' || ch == '[' || - ch == '<' || ch == '>' || ch == '\\' || ch == ']' || - ch == '^' || ch == '|') +CHAR_TEST(8, + IsForbiddenHostCodePoint, + ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || ch == ' ' || + ch == '#' || ch == '/' || ch == ':' || ch == '?' || ch == '@' || + ch == '[' || ch == '<' || ch == '>' || ch == '\\' || ch == ']' || + ch == '^' || ch == '|') + +// https://url.spec.whatwg.org/#forbidden-domain-code-point +CHAR_TEST(8, + IsForbiddenDomainCodePoint, + IsForbiddenHostCodePoint(ch) || IsC0Control(ch) || ch == '%' || + ch == '\x7f') // https://url.spec.whatwg.org/#windows-drive-letter TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter, @@ -482,7 +491,7 @@ void URLHost::ParseOpaqueHost(const char* input, size_t length) { output.reserve(length); for (size_t i = 0; i < length; i++) { const char ch = input[i]; - if (ch != '%' && IsForbiddenHostCodePoint(ch)) { + if (IsForbiddenHostCodePoint(ch)) { return; } else { AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET); @@ -521,7 +530,7 @@ void URLHost::ParseHost(const char* input, // If any of the following characters are still present, we have to fail for (size_t n = 0; n < decoded.size(); n++) { const char ch = decoded[n]; - if (IsForbiddenHostCodePoint(ch)) { + if (IsForbiddenDomainCodePoint(ch)) { return; } }