Skip to content

Commit 1dcba82

Browse files
committed
perf: improve URL parsing performance
1 parent 8c36659 commit 1dcba82

File tree

1 file changed

+76
-11
lines changed

1 file changed

+76
-11
lines changed

packages/utils/src/url.ts

Lines changed: 76 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1-
type PartialURL = {
2-
host?: string;
3-
path?: string;
4-
protocol?: string;
5-
relative?: string;
6-
search?: string;
7-
hash?: string;
8-
};
1+
type PartialURL = Partial<{
2+
host: string;
3+
path: string;
4+
protocol: string;
5+
relative: string;
6+
search: string;
7+
hash: string;
8+
urlInstance: URL;
9+
}>;
10+
11+
const urlRegex = /^(([^:/?#]+):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/;
912

1013
/**
1114
* Parses string form of URL into an object
@@ -19,7 +22,31 @@ export function parseUrl(url: string): PartialURL {
1922
return {};
2023
}
2124

22-
const match = url.match(/^(([^:/?#]+):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/);
25+
// Node.js v16 and above supports WHATWG URL API. We can use it when available.
26+
if (typeof URL !== undefined) {
27+
try {
28+
const parsed = new URL(url);
29+
const pathname = parsed.pathname;
30+
31+
return {
32+
host: parsed.host,
33+
// WHATWG URL API includes the leading slash in the pathname
34+
// Example: Returns `/` for `https://sentry.io`
35+
path: pathname.length === 1 ? '' : pathname,
36+
// WHATWG URL API includes the trailing colon in the protocol
37+
// Example: Returns `https:` for `https://sentry.io`
38+
protocol: parsed.protocol.slice(0, -1),
39+
search: parsed.search,
40+
hash: parsed.hash,
41+
relative: parsed.pathname + parsed.search + parsed.hash,
42+
urlInstance: parsed,
43+
};
44+
} catch {
45+
// If URL is invalid, fallback to regex parsing to support URLs without protocols.
46+
}
47+
}
48+
49+
const match = url.match(urlRegex);
2350

2451
if (!match) {
2552
return {};
@@ -62,15 +89,53 @@ export function getNumberOfUrlSegments(url: string): number {
6289
* see: https://develop.sentry.dev/sdk/data-handling/#structuring-data
6390
*/
6491
export function getSanitizedUrlString(url: PartialURL): string {
65-
const { protocol, host, path } = url;
92+
const { protocol, host, path, urlInstance } = url;
93+
94+
// This means that the environment supports WHATWG URL API.
95+
// This case will not be executed if URL does not have a protocol
96+
// since WHATWG URL specification requires protocol to be present.
97+
if (urlInstance !== undefined) {
98+
const { port, username, password, hostname, pathname, protocol } = urlInstance;
99+
const hasAuthority = username.length > 0 || password.length > 0;
100+
let output = `${protocol}//`;
101+
102+
if (hasAuthority) {
103+
if (username) {
104+
output += '[filtered]';
105+
106+
if (password) {
107+
output += ':';
108+
}
109+
}
110+
111+
if (password) {
112+
output += '[filtered]';
113+
}
114+
115+
output += '@';
116+
}
117+
118+
output += hostname;
119+
120+
if (port && port !== '80' && port !== '443') {
121+
output += `:${port}`;
122+
}
123+
124+
// Do not append pathname if it is empty.
125+
// For example: Pathname is `/` for `https://sentry.io`
126+
if (pathname.length > 1) {
127+
output += pathname;
128+
}
129+
130+
return output;
131+
}
66132

67133
const filteredHost =
68134
(host &&
69135
host
70136
// Always filter out authority
71137
.replace(/^.*@/, '[filtered]:[filtered]@')
72138
// Don't show standard :80 (http) and :443 (https) ports to reduce the noise
73-
// TODO: Use new URL global if it exists
74139
.replace(/(:80)$/, '')
75140
.replace(/(:443)$/, '')) ||
76141
'';

0 commit comments

Comments
 (0)