Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(v7.x backport) url: updates to the WHATWG URL parser #12507

Merged
merged 21 commits into from
Apr 25, 2017
Merged
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
71d3f94
url: extend URLSearchParams constructor
TimothyGu Jan 28, 2017
c40a45f
doc: document URLSearchParams constructor
TimothyGu Jan 28, 2017
b0fecbe
url: enforce valid UTF-8 in WHATWG parser
TimothyGu Feb 4, 2017
c3366a5
url: prioritize toString when stringifying
TimothyGu Mar 8, 2017
6b2cb6d
url: spec-compliant URLSearchParams serializer
TimothyGu Feb 4, 2017
7e7fd66
src: remove explicit UTF-8 validity check in url
TimothyGu Mar 15, 2017
4a94c2d
querystring: move isHexTable to internal
TimothyGu Mar 15, 2017
d86f0d7
url: spec-compliant URLSearchParams parser
TimothyGu Mar 15, 2017
a2a3d6c
url: use a class for WHATWG url[context]
TimothyGu Mar 22, 2017
75ef213
url: add ToObject method to native URL class
jasnell Mar 27, 2017
5b7b775
src: WHATWG URL C++ parser cleanup
TimothyGu Mar 16, 2017
d912e28
url: change path parsing for non-special URLs
watilde Apr 3, 2017
dceb12e
test: synchronize WPT url test data
watilde Apr 3, 2017
43faf56
url: error when domainTo*() is called w/o argument
TimothyGu Mar 20, 2017
dafa600
url: avoid instanceof for WHATWG URL
mscdex Mar 5, 2017
68cf850
url: trim leading slashes of file URL paths
watilde Apr 10, 2017
752097c
url: remove javascript URL special case
watilde Apr 12, 2017
f484cfd
url: disallow invalid IPv4 in IPv6 parser
watilde Apr 14, 2017
9288b73
url: clean up WHATWG URL origin generation
TimothyGu Apr 5, 2017
8f702ef
url: improve WHATWG URL inspection
TimothyGu Apr 5, 2017
473bd5e
src: clean up WHATWG WG parser
TimothyGu Apr 6, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
src: WHATWG URL C++ parser cleanup
- Clarify port state
- Remove scheme flag
- Clarify URL_FLAG_TERMINATED

PR-URL: #12507
Reviewed-By: James M Snell <jasnell@gmail.com>
TimothyGu committed Apr 25, 2017
commit 5b7b775e54a541a03b93cbb14679f7f5ad2d5358
54 changes: 31 additions & 23 deletions src/node_url.cc
Original file line number Diff line number Diff line change
@@ -494,7 +494,9 @@ namespace url {
if (flags->IsInt32())
base->flags = flags->Int32Value(context).FromJust();

GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME);
Local<Value> scheme = GET(env, base_obj, "scheme");
base->scheme = Utf8Value(env->isolate(), scheme).out();

GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME);
GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD);
GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST);
@@ -644,7 +646,7 @@ namespace url {
state = kNoScheme;
continue;
} else {
url->flags |= URL_FLAGS_TERMINATED;
url->flags |= URL_FLAGS_FAILED;
return;
}
break;
@@ -654,10 +656,12 @@ namespace url {
p++;
continue;
} else if (ch == ':' || (has_state_override && ch == kEOL)) {
buffer += ':';
if (buffer.size() > 0) {
url->flags |= URL_FLAGS_HAS_SCHEME;
buffer += ':';
url->scheme = buffer;
} else if (has_state_override) {
url->flags |= URL_FLAGS_TERMINATED;
return;
}
if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL;
@@ -672,7 +676,6 @@ namespace url {
state = kFile;
} else if (special &&
has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
url->scheme == base->scheme) {
state = kSpecialRelativeOrAuthority;
} else if (special) {
@@ -692,7 +695,7 @@ namespace url {
p = input;
continue;
} else {
url->flags |= URL_FLAGS_TERMINATED;
url->flags |= URL_FLAGS_FAILED;
return;
}
break;
@@ -702,7 +705,6 @@ namespace url {
url->flags |= URL_FLAGS_FAILED;
return;
} else if (cannot_be_base && ch == '#') {
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = base->scheme;
if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL;
@@ -725,12 +727,10 @@ namespace url {
url->flags |= URL_FLAGS_CANNOT_BE_BASE;
state = kFragment;
} else if (has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme != "file:") {
state = kRelative;
continue;
} else {
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = "file:";
url->flags |= URL_FLAGS_SPECIAL;
special = true;
@@ -756,7 +756,6 @@ namespace url {
}
break;
case kRelative:
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = base->scheme;
if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL;
@@ -951,7 +950,6 @@ namespace url {
buffer.clear();
state = kPort;
if (state_override == kHostname) {
url->flags |= URL_FLAGS_TERMINATED;
return;
}
} else if (ch == kEOL ||
@@ -972,7 +970,6 @@ namespace url {
buffer.clear();
state = kPathStart;
if (has_state_override) {
url->flags |= URL_FLAGS_TERMINATED;
return;
}
} else {
@@ -996,13 +993,26 @@ namespace url {
int port = 0;
for (size_t i = 0; i < buffer.size(); i++)
port = port * 10 + buffer[i] - '0';
if (port >= 0 && port <= 0xffff) {
url->port = NormalizePort(url->scheme, port);
} else if (!has_state_override) {
url->flags |= URL_FLAGS_FAILED;
if (port < 0 || port > 0xffff) {
// TODO(TimothyGu): This hack is currently needed for the host
// setter since it needs access to hostname if it is valid, and
// if the FAILED flag is set the entire response to JS layer
// will be empty.
if (state_override == kHost)
url->port = -1;
else
url->flags |= URL_FLAGS_FAILED;
return;
}
url->port = NormalizePort(url->scheme, port);
buffer.clear();
} else if (has_state_override) {
// TODO(TimothyGu): Similar case as above.
if (state_override == kHost)
url->port = -1;
else
url->flags |= URL_FLAGS_TERMINATED;
return;
}
state = kPathStart;
continue;
@@ -1014,7 +1024,6 @@ namespace url {
case kFile:
base_is_file = (
has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme == "file:");
switch (ch) {
case kEOL:
@@ -1097,7 +1106,6 @@ namespace url {
state = kFileHost;
} else {
if (has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme == "file:" &&
base->flags & URL_FLAGS_HAS_PATH &&
base->path.size() > 0 &&
@@ -1158,8 +1166,7 @@ namespace url {
url->path.push_back("");
}
} else {
if (url->flags & URL_FLAGS_HAS_SCHEME &&
url->scheme == "file:" &&
if (url->scheme == "file:" &&
url->path.empty() &&
buffer.size() == 2 &&
WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) {
@@ -1233,8 +1240,7 @@ namespace url {
const struct url_data* url) {
Isolate* isolate = env->isolate();
argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags);
if (url->flags & URL_FLAGS_HAS_SCHEME)
argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
if (url->flags & URL_FLAGS_HAS_USERNAME)
argv[ARG_USERNAME] = UTF8STRING(isolate, url->username);
if (url->flags & URL_FLAGS_HAS_PASSWORD)
@@ -1275,7 +1281,9 @@ namespace url {
HarvestBase(env, &base, base_obj.As<Object>());

URL::Parse(input, len, state_override, &url, &base, has_base);
if (url.flags & URL_FLAGS_INVALID_PARSE_STATE)
if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
((state_override != kUnknownState) &&
(url.flags & URL_FLAGS_TERMINATED)))
return;

// Define the return value placeholders
13 changes: 6 additions & 7 deletions src/node_url.h
Original file line number Diff line number Diff line change
@@ -451,13 +451,12 @@ static inline void PercentDecode(const char* input,
XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \
XX(URL_FLAGS_TERMINATED, 0x08) \
XX(URL_FLAGS_SPECIAL, 0x10) \
XX(URL_FLAGS_HAS_SCHEME, 0x20) \
XX(URL_FLAGS_HAS_USERNAME, 0x40) \
XX(URL_FLAGS_HAS_PASSWORD, 0x80) \
XX(URL_FLAGS_HAS_HOST, 0x100) \
XX(URL_FLAGS_HAS_PATH, 0x200) \
XX(URL_FLAGS_HAS_QUERY, 0x400) \
XX(URL_FLAGS_HAS_FRAGMENT, 0x800)
XX(URL_FLAGS_HAS_USERNAME, 0x20) \
XX(URL_FLAGS_HAS_PASSWORD, 0x40) \
XX(URL_FLAGS_HAS_HOST, 0x80) \
XX(URL_FLAGS_HAS_PATH, 0x100) \
XX(URL_FLAGS_HAS_QUERY, 0x200) \
XX(URL_FLAGS_HAS_FRAGMENT, 0x400)

#define ARGS(XX) \
XX(ARG_FLAGS) \