-
Notifications
You must be signed in to change notification settings - Fork 26
/
redirects.js
297 lines (261 loc) · 7.97 KB
/
redirects.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
'use strict';
const {
findQueryParam
} = require('./common');
const SCHEMA = '<SCHEMA>';
const SUBDOMAIN = '<SUBDOMAIN>';
const PATH = '<PATH>';
const QS_VALUE = '<QSVALUE>';
const QS_KVS = '<QSKVS>';
const KNOWN_REDIRECTS = [
{
name: 'Google Search Results',
targetParam: 'url',
patterns: [
`${SCHEMA}www.google.com/url?`
],
// Google uses 'ping' method sometimes.
types: ['main_frame', 'ping']
},
{
// Gmail wraps links in e-mails to pass you through their servers
name: 'Gmail Link Wrappers',
targetParam: 'q',
patterns: [
`${SCHEMA}www.google.com/url?`
],
// I think that for Gmail, 'main_frame' is enough.
types: ['main_frame']
},
{
name: 'YouTube Redirect',
targetParam: 'q',
patterns: [
`${SCHEMA}www.youtube.com/redirect?`
],
types: ['main_frame']
},
{
name: 'RedirectingAt',
targetParam: 'url',
patterns: [
`${SCHEMA}${SUBDOMAIN}.redirectingat.com/?`,
],
types: ['main_frame']
},
{
name: 'Facebook',
targetParam: 'u',
patterns: [
`${SCHEMA}l.facebook.com/l.php?`,
`${SCHEMA}l.messenger.com/l.php?`
],
types: ['main_frame']
},
{
name: 'Amazon Affiliate',
targetParam: 'location',
patterns: [
`${SCHEMA}${SUBDOMAIN}.amazon.ca/gp/redirect.html?`
],
types: ['main_frame']
},
{
name: 'Rakuten Marketing',
targetParam: 'murl',
patterns: [
`${SCHEMA}click.linksynergy.com/deeplink?`
],
types: ['main_frame']
},
{
name: 'ValueClick',
targetParam: 'url',
patterns: [
`${SCHEMA}www.dpbolvw.net${PATH}?`,
`${SCHEMA}www.tkqlhce.com${PATH}?`,
`${SCHEMA}www.anrdoezrs.net${PATH}?`
],
types: ['main_frame']
},
{
name: 'Reddit',
targetParam: 'url',
patterns: [
`${SCHEMA}out.reddit.com${PATH}?`,
],
types: ['main_frame']
},
{
name: 'Tradedoubler',
targetParam: 'url',
patterns: [
`${SCHEMA}${SUBDOMAIN}.tradedoubler.com/click?`
],
types: ['main_frame']
},
{
name: 'Impact Radius',
targetParam: 'return',
patterns: [
`${SCHEMA}www.ojrq.net/p/?`
],
types: ['main_frame']
},
{
name: 'Connexity',
targetParam: 't',
patterns: [
`${SCHEMA}rd.connexity.net/rd?`
],
types: ['main_frame']
},
{
name: 'Commission Factory',
targetParam: 'Url',
patterns: [
`${SCHEMA}t.cfjump.com${PATH}?`
],
types: ['main_frame']
},
{
name: 'Slack',
targetParam: 'url',
patterns: [
`${SCHEMA}slack-redir.net/link?`
],
types: ['main_frame']
}
];
// Flip everything around a bit and store patterns that are looking for
// the same target all together. This way we can register these patterns
// using a closure-like approach to prevent having to scan the URL again
// to figure out which pattern it matched, and then finally extract the
// target for that pattern. Should result in things being much faster in
// then end.
// Use 'var' here so that it's not scoped incorrectly.
var REDIRECT_DATA_BY_TARGET_PARAM = {};
KNOWN_REDIRECTS.forEach(KNOWN_REDIRECT => {
// Pluck out the param and the patterns
const targetParam = KNOWN_REDIRECT.targetParam;
const orginalPatterns = KNOWN_REDIRECT.patterns;
const types = KNOWN_REDIRECT.types;
// Make sure everything looks good
if (!(targetParam && orginalPatterns && orginalPatterns.length && types && types.length)) {
return;
}
// Prep the Object if necessary
if (!(REDIRECT_DATA_BY_TARGET_PARAM[targetParam])) {
REDIRECT_DATA_BY_TARGET_PARAM[targetParam] = {
patterns: [],
regexes: [],
types: []
};
}
// Go through every 'type' for this redirect
types.forEach(type => {
// If we don't already have this type for this target param, add it in
if (!REDIRECT_DATA_BY_TARGET_PARAM[targetParam].types.includes(type)) {
REDIRECT_DATA_BY_TARGET_PARAM[targetParam].types.push(type);
}
});
const newPatterns = [];
const newClipboardRegexes = [];
// Go through each of these patterns and create any combinations we need to
orginalPatterns.forEach(originalPattern => {
// Create the key/value placeholder for the target param
const targetParamKv = `${targetParam}=${QS_VALUE}`;
// We need to generate a few variations on this original pattern for URL matching
// 1) support the URL param as the first param
newPatterns.push(replacePlaceholders(`${originalPattern}${targetParamKv}`));
// 2) support the URL param as a non-first param
newPatterns.push(replacePlaceholders(`${originalPattern}${QS_KVS}${targetParamKv}`));
// The regex only needs 1 variation which includes optional query string key/values
const regexPattern = replacePlaceholdersRegex(`${originalPattern}${QS_KVS}${targetParamKv}`);
newClipboardRegexes.push(new RegExp(regexPattern));
});
// Add these patterns to the array of patterns for this target param
REDIRECT_DATA_BY_TARGET_PARAM[targetParam].patterns.push(...newPatterns);
// Add these regexes to the array of regexes for this target param
REDIRECT_DATA_BY_TARGET_PARAM[targetParam].regexes.push(...newClipboardRegexes);
});
// Escape all of the literals
function escapeRegExp(str) {
return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
}
// Replace the placeholders for URL matching patterns
function replacePlaceholders(pattern) {
return pattern
.replace(SCHEMA, '*://')
.replace(SUBDOMAIN, '*')
.replace(PATH, '/*')
.replace(QS_KVS, '*&')
.replace(QS_VALUE, '*');
}
// Replace the placeholders for regex matching patterns
function replacePlaceholdersRegex(pattern) {
// Escape all the literals
return escapeRegExp(pattern)
.replace(SCHEMA, 'http(s)?\:\\/\\/')
.replace(SUBDOMAIN, '([a-zA-z\-0-9]*\.)?')
.replace(PATH, '(\\/[\\w\\-]+)+')
// This one required text on either side of the '=' sign, when I've seen
// some places build junk that would not match. Not sure if this is a good idea
// to "fix" or not.
// pattern = pattern.replace(QS_KVS, '([\\w]+\\=[\\w]+\\&)*');
// This would be the "fix" for the above. It allows blanks on either side of the
// '=' sign.
// pattern = pattern.replace(QS_KVS, '([\\w*+\\=[\\w]*\\&)*');
// OK, this one handles even more scenarios that are acceptable
.replace(QS_KVS, '([\\w*+\\=?[\\w]*\\&)*')
.replace(QS_VALUE, '\\w');
}
// Replace the placeholders to create an example URL
function replacePlaceholdersCreateExample(pattern) {
return pattern
.replace(SCHEMA, 'https://')
.replace(SUBDOMAIN, 'foo')
.replace(PATH, '/path/to/whatever')
.replace(QS_KVS, '&')
.replace(QS_VALUE, 'foo');
}
// Extract the redirect target from a URL given the target parameter
function extractRedirectTarget(url, targetParam = 'url') {
// See if we can find a target in the URL.
let target = findQueryParam(targetParam, url);
if (typeof target === 'string' && target.startsWith('http')) {
return decodeURIComponent(target);
}
return false;
}
// Find a known redirect in a url and return it, else return the original URL
function followRedirect(url) {
if (!url) return url;
// Go through each target param
outerLoop:
for (let targetParam in REDIRECT_DATA_BY_TARGET_PARAM) {
// Get the regexes for this target param
const {
regexes = []
} = REDIRECT_DATA_BY_TARGET_PARAM[targetParam];
// Go through each regex for this target param
for (let regex, i=0; i < regexes.length; i++) {
regex = regexes[i];
// If the URL matches this redirect pattern, then extract the redirect.
if (regex.test(url)) {
url = extractRedirectTarget(url, targetParam) || url;
// All done with this regex stuff.
break outerLoop;
}
}
}
return url;
}
module.exports = {
KNOWN_REDIRECTS,
REDIRECT_DATA_BY_TARGET_PARAM,
escapeRegExp,
replacePlaceholdersCreateExample,
extractRedirectTarget,
followRedirect
};