Skip to content

Commit 357870e

Browse files
authored
Create parser.js
1 parent 6308e0c commit 357870e

File tree

1 file changed

+152
-0
lines changed

1 file changed

+152
-0
lines changed

ted_baker/parser.js

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
(function() {
2+
var headerUrl = "https://www.tedbaker.com/uk/Brickwork_Sample_Page";
3+
4+
$.get(headerUrl, function(response) {
5+
var parsedResponse = replace_all_rel_by_abs(response);
6+
// console.log(parsedResponse)
7+
var head = parsedResponse.match(/<head[^>]*>((.|[\n\r])*)<\/head>/im)[0];
8+
var body = parsedResponse.match(/<body[^>]*>((.|[\n\r])*)<\/body>/im)[0];
9+
// debugger;
10+
document.head.innerHTML += head
11+
document.body.innerHTML += body
12+
});
13+
14+
function replace_all_rel_by_abs(html) {
15+
var att = "[^-a-z0-9:._]";
16+
17+
var entityEnd = "(?:;|(?!\\d))";
18+
var ents = {
19+
" ": "(?:\\s|&nbsp;?|&#0*32" + entityEnd + "|&#x0*20" + entityEnd + ")",
20+
"(": "(?:\\(|&#0*40" + entityEnd + "|&#x0*28" + entityEnd + ")",
21+
")": "(?:\\)|&#0*41" + entityEnd + "|&#x0*29" + entityEnd + ")",
22+
".": "(?:\\.|&#0*46" + entityEnd + "|&#x0*2e" + entityEnd + ")"
23+
};
24+
/* Placeholders to filter obfuscations */
25+
var charMap = {};
26+
var s = ents[" "] + "*"; //Short-hand for common use
27+
var any = "(?:[^>\"']*(?:\"[^\"]*\"|'[^']*'))*?[^>]*";
28+
29+
30+
function ae(string) {
31+
var all_chars_lowercase = string.toLowerCase();
32+
if (ents[string]) return ents[string];
33+
var all_chars_uppercase = string.toUpperCase();
34+
var RE_res = "";
35+
for (var i = 0; i < string.length; i++) {
36+
var char_lowercase = all_chars_lowercase.charAt(i);
37+
if (charMap[char_lowercase]) {
38+
RE_res += charMap[char_lowercase];
39+
continue;
40+
}
41+
var char_uppercase = all_chars_uppercase.charAt(i);
42+
var RE_sub = [char_lowercase];
43+
RE_sub.push("&#0*" + char_lowercase.charCodeAt(0) + entityEnd);
44+
RE_sub.push("&#x0*" + char_lowercase.charCodeAt(0).toString(16) + entityEnd);
45+
if (char_lowercase != char_uppercase) {
46+
/* Note: RE ignorecase flag has already been activated */
47+
RE_sub.push("&#0*" + char_uppercase.charCodeAt(0) + entityEnd);
48+
RE_sub.push("&#x0*" + char_uppercase.charCodeAt(0).toString(16) + entityEnd);
49+
}
50+
RE_sub = "(?:" + RE_sub.join("|") + ")";
51+
RE_res += (charMap[char_lowercase] = RE_sub);
52+
}
53+
return (ents[string] = RE_res);
54+
}
55+
56+
57+
function by(match, group1, group2, group3) {
58+
/* Note that this function can also be used to remove links:
59+
* return group1 + "javascript://" + group3; */
60+
return group1 + rel_to_abs(group2) + group3;
61+
}
62+
63+
var slashRE = new RegExp(ae("/"), 'g');
64+
var dotRE = new RegExp(ae("."), 'g');
65+
66+
function by2(match, group1, group2, group3) {
67+
/*Note that this function can also be used to remove links:
68+
* return group1 + "javascript://" + group3; */
69+
group2 = group2.replace(slashRE, "/").replace(dotRE, ".");
70+
return group1 + rel_to_abs(group2) + group3;
71+
}
72+
73+
function cr(selector, attribute, marker, delimiter, end) {
74+
if (typeof selector == "string") selector = new RegExp(selector, "gi");
75+
attribute = att + attribute;
76+
marker = typeof marker == "string" ? marker : "\\s*=\\s*";
77+
delimiter = typeof delimiter == "string" ? delimiter : "";
78+
end = typeof end == "string" ? "?)(" + end : ")(";
79+
var re1 = new RegExp('(' + attribute + marker + '")([^"' + delimiter + ']+' + end + ')', 'gi');
80+
var re2 = new RegExp("(" + attribute + marker + "')([^'" + delimiter + "]+" + end + ")", 'gi');
81+
var re3 = new RegExp('(' + attribute + marker + ')([^"\'][^\\s>' + delimiter + ']*' + end + ')', 'gi');
82+
html = html.replace(selector, function(match) {
83+
return match.replace(re1, by).replace(re2, by).replace(re3, by);
84+
});
85+
}
86+
87+
function cri(selector, attribute, front, flags, delimiter, end) {
88+
if (typeof selector == "string") selector = new RegExp(selector, "gi");
89+
attribute = att + attribute;
90+
flags = typeof flags == "string" ? flags : "gi";
91+
var re1 = new RegExp('(' + attribute + '\\s*=\\s*")([^"]*)', 'gi');
92+
var re2 = new RegExp("(" + attribute + "\\s*=\\s*')([^']+)", 'gi');
93+
var at1 = new RegExp('(' + front + ')([^"]+)(")', flags);
94+
var at2 = new RegExp("(" + front + ")([^']+)(')", flags);
95+
if (typeof delimiter == "string") {
96+
end = typeof end == "string" ? end : "";
97+
var at3 = new RegExp("(" + front + ")([^\"'][^" + delimiter + "]*" + (end ? "?)(" + end + ")" : ")()"), flags);
98+
var handleAttr = function(match, g1, g2) {
99+
return g1 + g2.replace(at1, by2).replace(at2, by2).replace(at3, by2)
100+
};
101+
} else {
102+
var handleAttr = function(match, g1, g2) {
103+
return g1 + g2.replace(at1, by2).replace(at2, by2)
104+
};
105+
}
106+
html = html.replace(selector, function(match) {
107+
return match.replace(re1, handleAttr).replace(re2, handleAttr);
108+
});
109+
}
110+
111+
cri("<meta" + any + att + "http-equiv\\s*=\\s*(?:\"" + ae("refresh") + "\"" + any + ">|'" + ae("refresh") + "'" + any + ">|" + ae("refresh") + "(?:" + ae(" ") + any + ">|>))", "content", ae("url") + s + ae("=") + s, "i");
112+
113+
cr("<" + any + att + "href\\s*=" + any + ">", "href"); /* Linked elements */
114+
cr("<" + any + att + "src\\s*=" + any + ">", "src"); /* Embedded elements */
115+
116+
cr("<object" + any + att + "data\\s*=" + any + ">", "data"); /* <object data= > */
117+
cr("<applet" + any + att + "codebase\\s*=" + any + ">", "codebase"); /* <applet codebase= > */
118+
119+
/* <param name=movie value= >*/
120+
cr("<param" + any + att + "name\\s*=\\s*(?:\"" + ae("movie") + "\"" + any + ">|'" + ae("movie") + "'" + any + ">|" + ae("movie") + "(?:" + ae(" ") + any + ">|>))", "value");
121+
122+
cr(/<style[^>]*>(?:[^"']*(?:"[^"]*"|'[^']*'))*?[^'"]*(?:<\/style|$)/gi, "url", "\\s*\\(\\s*", "", "\\s*\\)"); /* <style> */
123+
cri("<" + any + att + "style\\s*=" + any + ">", "style", ae("url") + s + ae("(") + s, 0, s + ae(")"), ae(")")); /*< style=" url(...) " > */
124+
return html;
125+
}
126+
127+
function rel_to_abs(url) {
128+
if (/^(https?|file|ftps?|mailto|javascript|data:image\/[^;]{2,9};):/i.test(url))
129+
return url; //Url is already absolute
130+
131+
var base_url = location.href.match(/^(.+)\/?(?:#.+)?$/)[0] + "/";
132+
if (url.substring(0, 2) == "//")
133+
return "https://www.tedbaker.com" + url;
134+
else if (url.charAt(0) == "/")
135+
return location.protocol + "//" + "www.tedbaker.com" + url;
136+
else if (url.substring(0, 2) == "./")
137+
url = "." + url;
138+
else if (/^\s*$/.test(url))
139+
return ""; //Empty = Return nothing
140+
else url = "../" + url;
141+
142+
url = base_url + url;
143+
var i = 0
144+
while (/\/\.\.\//.test(url = url.replace(/[^\/]+\/+\.\.\//g, "")));
145+
146+
/* Escape certain characters to prevent XSS */
147+
url = url.replace(/\.$/, "").replace(/\/\./g, "").replace(/"/g, "%22")
148+
.replace(/'/g, "%27").replace(/</g, "%3C").replace(/>/g, "%3E");
149+
return url;
150+
}
151+
152+
}())

0 commit comments

Comments
 (0)