forked from agent0ai/agent-zero
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_dom.js
More file actions
160 lines (132 loc) · 4.07 KB
/
extract_dom.js
File metadata and controls
160 lines (132 loc) · 4.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
function extractDOM([
selectorLabel = "",
selectorName = "data-a0sel3ct0r",
guidName = "data-a0gu1d",
]) {
let elementCounter = 0;
const time = new Date().toISOString().slice(11, -1).replace(/[:.]/g, "");
const ignoredTags = [
"style",
"script",
"meta",
"link",
"svg",
"noscript",
"path",
];
// Convert number to base64 and trim unnecessary chars
function toBase64(num) {
const chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
let result = "";
do {
result = chars[num & 63] + result;
num = num >> 6;
} while (num > 0);
return result;
}
function isElementVisible(element) {
// Return true for non-element nodes
if (element.nodeType !== Node.ELEMENT_NODE) {
return true;
}
const computedStyle = window.getComputedStyle(element);
// Check if element is hidden via CSS
if (
computedStyle.display === "none" ||
computedStyle.visibility === "hidden" ||
computedStyle.opacity === "0"
) {
return false;
}
// Check for hidden input type
if (element.tagName === "INPUT" && element.type === "hidden") {
return false;
}
// Check for hidden attribute
if (
element.hasAttribute("hidden") ||
element.getAttribute("aria-hidden") === "true"
) {
return false;
}
return true;
}
function convertAttribute(tag, attr) {
let out = {
name: attr.name,
value:
typeof attr.value == "string" ? attr.value : JSON.stringify(attr.value),
};
//excluded attributes
if (["srcset"].includes(out.name)) return null;
if (out.name.startsWith("data-") && out.name != selectorName) return null;
if (out.name == "src" && out.value.startsWith("data:"))
out.value = "data...";
return out;
}
function traverseNodes(node, depth = 0, visited = new Set()) {
// Safety checks
if (!node) return "";
if (depth > 1000) return "<!-- Max depth exceeded -->";
const guid = node.getAttribute?.(guidName);
if (guid && visited.has(guid)) {
return `<!-- Circular reference detected at guid: ${guid} -->`;
}
let content = "";
const tagName = node.tagName ? node.tagName.toLowerCase() : "";
// Skip ignored tags
if (tagName && ignoredTags.includes(tagName)) {
return "";
}
if (node.nodeType === Node.ELEMENT_NODE) {
// Add unique ID to the actual DOM element
if (tagName) {
const no = elementCounter++;
const selector = `${no}${selectorLabel}`;
const guid = `${time}-${selector}`;
node.setAttribute(selectorName, selector);
node.setAttribute(guidName, guid);
visited.add(guid);
}
content += `<${tagName}`;
// Add invisible attribute if element is not visible
if (!isElementVisible(node)) {
content += " invisible";
}
for (let attr of node.attributes) {
const out = convertAttribute(tagName, attr);
if (out) content += ` ${out.name}="${out.value}"`;
}
content += ">";
// Handle iframes
if (tagName === "iframe") {
try {
const frameId = elementCounter++;
node.setAttribute(selectorName, frameId);
content += `<!-- IFrame Content Placeholder ${frameId} -->`;
} catch (e) {
console.warn("Error marking iframe:", e);
}
}
if (node.shadowRoot) {
content += "<!-- Shadow DOM Start -->";
for (let shadowChild of node.shadowRoot.childNodes) {
content += traverseNodes(shadowChild, depth + 1, visited);
}
content += "<!-- Shadow DOM End -->";
}
for (let child of node.childNodes) {
content += traverseNodes(child, depth + 1, visited);
}
content += `</${tagName}>`;
} else if (node.nodeType === Node.TEXT_NODE) {
content += node.textContent;
} else if (node.nodeType === Node.COMMENT_NODE) {
content += `<!--${node.textContent}-->`;
}
return content;
}
const fullHTML = traverseNodes(document.documentElement);
return fullHTML;
}