Skip to content

Commit 6317f77

Browse files
aduh95ruyadorno
authored andcommitted
url: refactor pathToFileURL to native
PR-URL: #55476 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
1 parent 7acb963 commit 6317f77

File tree

3 files changed

+120
-77
lines changed

3 files changed

+120
-77
lines changed

lib/internal/url.js

+15-77
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ const {
1616
ObjectSetPrototypeOf,
1717
ReflectGetOwnPropertyDescriptor,
1818
ReflectOwnKeys,
19-
RegExpPrototypeSymbolReplace,
2019
SafeMap,
2120
SafeSet,
2221
StringPrototypeCharAt,
@@ -779,6 +778,8 @@ function isURL(self) {
779778
* for invalid URL inputs.
780779
*/
781780
const kParseURLSymbol = Symbol('kParseURL');
781+
const kCreateURLFromPosixPathSymbol = Symbol('kCreateURLFromPosixPath');
782+
const kCreateURLFromWindowsPathSymbol = Symbol('kCreateURLFromWindowsPath');
782783

783784
class URL {
784785
#context = new URLContext();
@@ -812,8 +813,17 @@ class URL {
812813
base = `${base}`;
813814
}
814815

815-
const raiseException = parseSymbol !== kParseURLSymbol;
816-
const href = bindingUrl.parse(input, base, raiseException);
816+
let href;
817+
if (arguments.length < 3) {
818+
href = bindingUrl.parse(input, base, true);
819+
} else {
820+
const raiseException = parseSymbol !== kParseURLSymbol;
821+
const interpretAsWindowsPath = parseSymbol === kCreateURLFromWindowsPathSymbol;
822+
const pathToFileURL = interpretAsWindowsPath || (parseSymbol === kCreateURLFromPosixPathSymbol);
823+
href = pathToFileURL ?
824+
bindingUrl.pathToFileURL(input, interpretAsWindowsPath, base) :
825+
bindingUrl.parse(input, base, raiseException);
826+
}
817827
if (href) {
818828
this.#updateContext(href);
819829
}
@@ -1500,76 +1510,9 @@ function fileURLToPath(path, options = kEmptyObject) {
15001510
return (windows ?? isWindows) ? getPathFromURLWin32(path) : getPathFromURLPosix(path);
15011511
}
15021512

1503-
// RFC1738 defines the following chars as "unsafe" for URLs
1504-
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
1505-
const percentRegEx = /%/g;
1506-
const newlineRegEx = /\n/g;
1507-
const carriageReturnRegEx = /\r/g;
1508-
const tabRegEx = /\t/g;
1509-
const quoteRegEx = /"/g;
1510-
const hashRegex = /#/g;
1511-
const spaceRegEx = / /g;
1512-
const questionMarkRegex = /\?/g;
1513-
const openSquareBracketRegEx = /\[/g;
1514-
const backslashRegEx = /\\/g;
1515-
const closeSquareBracketRegEx = /]/g;
1516-
const caretRegEx = /\^/g;
1517-
const verticalBarRegEx = /\|/g;
1518-
const tildeRegEx = /~/g;
1519-
1520-
function encodePathChars(filepath, options = kEmptyObject) {
1521-
if (StringPrototypeIncludes(filepath, '%')) {
1522-
filepath = RegExpPrototypeSymbolReplace(percentRegEx, filepath, '%25');
1523-
}
1524-
1525-
if (StringPrototypeIncludes(filepath, '\t')) {
1526-
filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09');
1527-
}
1528-
if (StringPrototypeIncludes(filepath, '\n')) {
1529-
filepath = RegExpPrototypeSymbolReplace(newlineRegEx, filepath, '%0A');
1530-
}
1531-
if (StringPrototypeIncludes(filepath, '\r')) {
1532-
filepath = RegExpPrototypeSymbolReplace(carriageReturnRegEx, filepath, '%0D');
1533-
}
1534-
if (StringPrototypeIncludes(filepath, ' ')) {
1535-
filepath = RegExpPrototypeSymbolReplace(spaceRegEx, filepath, '%20');
1536-
}
1537-
if (StringPrototypeIncludes(filepath, '"')) {
1538-
filepath = RegExpPrototypeSymbolReplace(quoteRegEx, filepath, '%22');
1539-
}
1540-
if (StringPrototypeIncludes(filepath, '#')) {
1541-
filepath = RegExpPrototypeSymbolReplace(hashRegex, filepath, '%23');
1542-
}
1543-
if (StringPrototypeIncludes(filepath, '?')) {
1544-
filepath = RegExpPrototypeSymbolReplace(questionMarkRegex, filepath, '%3F');
1545-
}
1546-
if (StringPrototypeIncludes(filepath, '[')) {
1547-
filepath = RegExpPrototypeSymbolReplace(openSquareBracketRegEx, filepath, '%5B');
1548-
}
1549-
// Back-slashes must be special-cased on Windows, where they are treated as path separator.
1550-
if (!options.windows && StringPrototypeIncludes(filepath, '\\')) {
1551-
filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C');
1552-
}
1553-
if (StringPrototypeIncludes(filepath, ']')) {
1554-
filepath = RegExpPrototypeSymbolReplace(closeSquareBracketRegEx, filepath, '%5D');
1555-
}
1556-
if (StringPrototypeIncludes(filepath, '^')) {
1557-
filepath = RegExpPrototypeSymbolReplace(caretRegEx, filepath, '%5E');
1558-
}
1559-
if (StringPrototypeIncludes(filepath, '|')) {
1560-
filepath = RegExpPrototypeSymbolReplace(verticalBarRegEx, filepath, '%7C');
1561-
}
1562-
if (StringPrototypeIncludes(filepath, '~')) {
1563-
filepath = RegExpPrototypeSymbolReplace(tildeRegEx, filepath, '%7E');
1564-
}
1565-
1566-
return filepath;
1567-
}
1568-
15691513
function pathToFileURL(filepath, options = kEmptyObject) {
15701514
const windows = options?.windows ?? isWindows;
15711515
if (windows && StringPrototypeStartsWith(filepath, '\\\\')) {
1572-
const outURL = new URL('file://');
15731516
// UNC path format: \\server\share\resource
15741517
// Handle extended UNC path and standard UNC path
15751518
// "\\?\UNC\" path prefix should be ignored.
@@ -1592,12 +1535,7 @@ function pathToFileURL(filepath, options = kEmptyObject) {
15921535
);
15931536
}
15941537
const hostname = StringPrototypeSlice(filepath, prefixLength, hostnameEndIndex);
1595-
outURL.hostname = domainToASCII(hostname);
1596-
outURL.pathname = encodePathChars(
1597-
RegExpPrototypeSymbolReplace(backslashRegEx, StringPrototypeSlice(filepath, hostnameEndIndex), '/'),
1598-
{ windows },
1599-
);
1600-
return outURL;
1538+
return new URL(StringPrototypeSlice(filepath, hostnameEndIndex), hostname, kCreateURLFromWindowsPathSymbol);
16011539
}
16021540
let resolved = windows ? path.win32.resolve(filepath) : path.posix.resolve(filepath);
16031541
// path.resolve strips trailing slashes so we must add them back
@@ -1608,7 +1546,7 @@ function pathToFileURL(filepath, options = kEmptyObject) {
16081546
resolved[resolved.length - 1] !== path.sep)
16091547
resolved += '/';
16101548

1611-
return new URL(`file://${encodePathChars(resolved, { windows })}`);
1549+
return new URL(resolved, undefined, windows ? kCreateURLFromWindowsPathSymbol : kCreateURLFromPosixPathSymbol);
16121550
}
16131551

16141552
function toPathIfFileURL(fileURLOrPath) {

src/node_url.cc

+104
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,108 @@ void BindingData::Deserialize(v8::Local<v8::Context> context,
7575
CHECK_NOT_NULL(binding);
7676
}
7777

78+
#ifndef LARGEST_ASCII_CHAR_CODE_TO_ENCODE
79+
#define LARGEST_ASCII_CHAR_CODE_TO_ENCODE '~'
80+
#endif
81+
82+
// RFC1738 defines the following chars as "unsafe" for URLs
83+
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
84+
constexpr auto lookup_table = []() consteval {
85+
// Each entry is an array that can hold up to 3 chars + null terminator
86+
std::array<std::array<char, 4>, LARGEST_ASCII_CHAR_CODE_TO_ENCODE + 1>
87+
result{};
88+
89+
for (uint8_t i = 0; i <= LARGEST_ASCII_CHAR_CODE_TO_ENCODE; i++) {
90+
switch (i) {
91+
#define ENCODE_CHAR(CHAR, HEX_DIGIT_2, HEX_DIGIT_1) \
92+
case CHAR: \
93+
result[i] = {{'%', HEX_DIGIT_2, HEX_DIGIT_1, 0}}; \
94+
break;
95+
96+
ENCODE_CHAR('\0', '0', '0') // '\0' == 0x00
97+
ENCODE_CHAR('\t', '0', '9') // '\t' == 0x09
98+
ENCODE_CHAR('\n', '0', 'A') // '\n' == 0x0A
99+
ENCODE_CHAR('\r', '0', 'D') // '\r' == 0x0D
100+
ENCODE_CHAR(' ', '2', '0') // ' ' == 0x20
101+
ENCODE_CHAR('"', '2', '2') // '"' == 0x22
102+
ENCODE_CHAR('#', '2', '3') // '#' == 0x23
103+
ENCODE_CHAR('%', '2', '5') // '%' == 0x25
104+
ENCODE_CHAR('?', '3', 'F') // '?' == 0x3F
105+
ENCODE_CHAR('[', '5', 'B') // '[' == 0x5B
106+
ENCODE_CHAR('\\', '5', 'C') // '\\' == 0x5C
107+
ENCODE_CHAR(']', '5', 'D') // ']' == 0x5D
108+
ENCODE_CHAR('^', '5', 'E') // '^' == 0x5E
109+
ENCODE_CHAR('|', '7', 'C') // '|' == 0x7C
110+
ENCODE_CHAR('~', '7', 'E') // '~' == 0x7E
111+
#undef ENCODE_CHAR
112+
113+
default:
114+
result[i] = {{static_cast<char>(i), '\0', '\0', '\0'}};
115+
break;
116+
}
117+
}
118+
119+
return result;
120+
}
121+
();
122+
123+
enum class OS { WINDOWS, POSIX };
124+
125+
std::string EncodePathChars(std::string_view input_str, OS operating_system) {
126+
std::string encoded = "file://";
127+
encoded.reserve(input_str.size() +
128+
7); // Reserve space for "file://" and input_str
129+
for (size_t i : input_str) {
130+
if (i > LARGEST_ASCII_CHAR_CODE_TO_ENCODE) [[unlikely]] {
131+
encoded.push_back(i);
132+
continue;
133+
}
134+
if (operating_system == OS::WINDOWS) {
135+
if (i == '\\') {
136+
encoded.push_back('/');
137+
continue;
138+
}
139+
}
140+
encoded.append(lookup_table[i].data());
141+
}
142+
143+
return encoded;
144+
}
145+
146+
void BindingData::PathToFileURL(const FunctionCallbackInfo<Value>& args) {
147+
CHECK_GE(args.Length(), 2); // input
148+
CHECK(args[0]->IsString());
149+
CHECK(args[1]->IsBoolean());
150+
151+
Realm* realm = Realm::GetCurrent(args);
152+
BindingData* binding_data = realm->GetBindingData<BindingData>();
153+
Isolate* isolate = realm->isolate();
154+
OS os = args[1]->IsTrue() ? OS::WINDOWS : OS::POSIX;
155+
156+
Utf8Value input(isolate, args[0]);
157+
auto input_str = input.ToStringView();
158+
CHECK(!input_str.empty());
159+
160+
auto out =
161+
ada::parse<ada::url_aggregator>(EncodePathChars(input_str, os), nullptr);
162+
163+
if (!out) {
164+
return ThrowInvalidURL(realm->env(), input.ToStringView(), nullptr);
165+
}
166+
167+
if (os == OS::WINDOWS && args.Length() > 2 && !args[2]->IsUndefined())
168+
[[unlikely]] {
169+
CHECK(args[2]->IsString());
170+
Utf8Value hostname(isolate, args[2]);
171+
CHECK(out->set_hostname(hostname.ToStringView()));
172+
}
173+
174+
binding_data->UpdateComponents(out->get_components(), out->type);
175+
176+
args.GetReturnValue().Set(
177+
ToV8Value(realm->context(), out->get_href(), isolate).ToLocalChecked());
178+
}
179+
78180
void BindingData::DomainToASCII(const FunctionCallbackInfo<Value>& args) {
79181
Environment* env = Environment::GetCurrent(args);
80182
CHECK_GE(args.Length(), 1); // input
@@ -371,6 +473,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
371473
SetMethodNoSideEffect(isolate, target, "format", Format);
372474
SetMethodNoSideEffect(isolate, target, "getOrigin", GetOrigin);
373475
SetMethod(isolate, target, "parse", Parse);
476+
SetMethod(isolate, target, "pathToFileURL", PathToFileURL);
374477
SetMethod(isolate, target, "update", Update);
375478
SetFastMethodNoSideEffect(
376479
isolate, target, "canParse", CanParse, {fast_can_parse_methods_, 2});
@@ -391,6 +494,7 @@ void BindingData::RegisterExternalReferences(
391494
registry->Register(Format);
392495
registry->Register(GetOrigin);
393496
registry->Register(Parse);
497+
registry->Register(PathToFileURL);
394498
registry->Register(Update);
395499
registry->Register(CanParse);
396500
registry->Register(FastCanParse);

src/node_url.h

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class BindingData : public SnapshotableObject {
5959
static void Format(const v8::FunctionCallbackInfo<v8::Value>& args);
6060
static void GetOrigin(const v8::FunctionCallbackInfo<v8::Value>& args);
6161
static void Parse(const v8::FunctionCallbackInfo<v8::Value>& args);
62+
static void PathToFileURL(const v8::FunctionCallbackInfo<v8::Value>& args);
6263
static void Update(const v8::FunctionCallbackInfo<v8::Value>& args);
6364

6465
static void CreatePerIsolateProperties(IsolateData* isolate_data,

0 commit comments

Comments
 (0)