1
- type PartialURL = {
2
- host ?: string ;
3
- path ?: string ;
4
- protocol ?: string ;
5
- relative ?: string ;
6
- search ?: string ;
7
- hash ?: string ;
8
- } ;
1
+ type PartialURL = Partial < {
2
+ host : string ;
3
+ path : string ;
4
+ protocol : string ;
5
+ relative : string ;
6
+ search : string ;
7
+ hash : string ;
8
+ urlInstance : URL ;
9
+ } > ;
10
+
11
+ const urlRegex = / ^ ( ( [ ^ : / ? # ] + ) : ) ? ( \/ \/ ( [ ^ / ? # ] * ) ) ? ( [ ^ ? # ] * ) ( \? ( [ ^ # ] * ) ) ? ( # ( .* ) ) ? $ / ;
9
12
10
13
/**
11
14
* Parses string form of URL into an object
@@ -19,7 +22,31 @@ export function parseUrl(url: string): PartialURL {
19
22
return { } ;
20
23
}
21
24
22
- const match = url . match ( / ^ ( ( [ ^ : / ? # ] + ) : ) ? ( \/ \/ ( [ ^ / ? # ] * ) ) ? ( [ ^ ? # ] * ) ( \? ( [ ^ # ] * ) ) ? ( # ( .* ) ) ? $ / ) ;
25
+ // Node.js v16 and above supports WHATWG URL API. We can use it when available.
26
+ if ( typeof URL !== undefined ) {
27
+ try {
28
+ const parsed = new URL ( url ) ;
29
+ const pathname = parsed . pathname ;
30
+
31
+ return {
32
+ host : parsed . host ,
33
+ // WHATWG URL API includes the leading slash in the pathname
34
+ // Example: Returns `/` for `https://sentry.io`
35
+ path : pathname . length === 1 ? '' : pathname ,
36
+ // WHATWG URL API includes the trailing colon in the protocol
37
+ // Example: Returns `https:` for `https://sentry.io`
38
+ protocol : parsed . protocol . slice ( 0 , - 1 ) ,
39
+ search : parsed . search ,
40
+ hash : parsed . hash ,
41
+ relative : parsed . pathname + parsed . search + parsed . hash ,
42
+ urlInstance : parsed ,
43
+ } ;
44
+ } catch {
45
+ // If URL is invalid, fallback to regex parsing to support URLs without protocols.
46
+ }
47
+ }
48
+
49
+ const match = url . match ( urlRegex ) ;
23
50
24
51
if ( ! match ) {
25
52
return { } ;
@@ -62,15 +89,53 @@ export function getNumberOfUrlSegments(url: string): number {
62
89
* see: https://develop.sentry.dev/sdk/data-handling/#structuring-data
63
90
*/
64
91
export function getSanitizedUrlString ( url : PartialURL ) : string {
65
- const { protocol, host, path } = url ;
92
+ const { protocol, host, path, urlInstance } = url ;
93
+
94
+ // This means that the environment supports WHATWG URL API.
95
+ // This case will not be executed if URL does not have a protocol
96
+ // since WHATWG URL specification requires protocol to be present.
97
+ if ( urlInstance !== undefined ) {
98
+ const { port, username, password, hostname, pathname, protocol } = urlInstance ;
99
+ const hasAuthority = username . length > 0 || password . length > 0 ;
100
+ let output = `${ protocol } //` ;
101
+
102
+ if ( hasAuthority ) {
103
+ if ( username ) {
104
+ output += '[filtered]' ;
105
+
106
+ if ( password ) {
107
+ output += ':' ;
108
+ }
109
+ }
110
+
111
+ if ( password ) {
112
+ output += '[filtered]' ;
113
+ }
114
+
115
+ output += '@' ;
116
+ }
117
+
118
+ output += hostname ;
119
+
120
+ if ( port && port !== '80' && port !== '443' ) {
121
+ output += `:${ port } ` ;
122
+ }
123
+
124
+ // Do not append pathname if it is empty.
125
+ // For example: Pathname is `/` for `https://sentry.io`
126
+ if ( pathname . length > 1 ) {
127
+ output += pathname ;
128
+ }
129
+
130
+ return output ;
131
+ }
66
132
67
133
const filteredHost =
68
134
( host &&
69
135
host
70
136
// Always filter out authority
71
137
. replace ( / ^ .* @ / , '[filtered]:[filtered]@' )
72
138
// Don't show standard :80 (http) and :443 (https) ports to reduce the noise
73
- // TODO: Use new URL global if it exists
74
139
. replace ( / ( : 8 0 ) $ / , '' )
75
140
. replace ( / ( : 4 4 3 ) $ / , '' ) ) ||
76
141
'' ;
0 commit comments