-
Notifications
You must be signed in to change notification settings - Fork 41
/
Copy pathget-html.mts
61 lines (52 loc) · 1.74 KB
/
get-html.mts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import got from "got";
import * as cheerio from "cheerio";
import * as prettier from "prettier";
import { State, cache } from "./index.mjs";
export const getHtml = async (
state: State & { baseUrl: string; folderName: string },
): Promise<string> => {
const WEBHOOKS_DOCS_URL = state.baseUrl;
const cacheFilePath = `${state.folderName}/webhook-events-and-payloads.html`;
try {
if (state.cached) {
return cache.read(cacheFilePath);
}
} catch {
// if we can't read from the cache, continue and fetch from the source
}
console.log(`⌛ fetching ${WEBHOOKS_DOCS_URL}`);
const { body } = await got(WEBHOOKS_DOCS_URL, {
retry: {
limit: 10,
statusCodes: [503],
},
});
const $ = cheerio.load(body);
// get only the HTML we care about to avoid unnecessary cache updates
$('[data-testid="callout"]').remove();
const data = $("#article-contents").parent().parent();
// Remove all classes from the HTML, except the ones that are actively used in the code to get payload examples.
// This is done to avoid unnecessary cache updates in order to reduce noise from automated Pull Requests
// https://github.com/octokit/webhooks/issues/642
data.find("*").each((i, el) => {
const classes = $(el).attr("class");
if (classes) {
const filteredClasses = classes
.split(" ")
.filter((classSelector) =>
["language-json", "warning"].includes(classSelector),
);
if (filteredClasses.length) {
$(el).attr("class", filteredClasses.join(" "));
} else {
$(el).removeAttr("class");
}
}
});
const html = data.html() ?? "";
await cache.write(
cacheFilePath,
await prettier.format(html, { parser: "html" }),
);
return html;
};