Skip to content

Commit b80fa77

Browse files
committed
add trampolines tutorial
1 parent 402e56d commit b80fa77

File tree

5 files changed

+148
-0
lines changed

5 files changed

+148
-0
lines changed

trampolines/README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
## Trampolines tutorial
2+
3+
4+
What if you have a complex implementation of some logic that you need to feed data from various sources and it needs to be synchronous or async depending on where you use?
5+
Should you duplicate the logic? Should you split the logic into many small functions? what if there's a complex set of conditions?
6+
7+
Implement `crawlAndProcessSync` and `crawlAndProcessAsync` in `crawler.js` without duplicating the main crawling implementation and making significant changes to the body of the `crawlAndProcess` function.
8+
9+
![](https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExZmJ6NHAwMHVpdm5ycGVzNDhnemR1NWlnNzBqMGZnd2J3aXlqbjM0eiZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/n4FpwV0JxhYjtBjn4U/giphy.gif)
10+
11+
### Tips
12+
13+
Train of thought
14+
- The key idea: We need to outsource awaiting or not awaiting to the Sync/Async functions from `crawlAndProcess`.
15+
- The logic still has to stop when something potentially asynchronous is run.
16+
- What other primitive do we know that can pause execution mid-function?
17+
- Yes, that's the starting point. Now implement wrappers on the outside of the function.
18+
19+
20+
Let me guess, you broke recursion? You need to forward all values from it, not only the first one.
21+

trampolines/app.js

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import dns from "dns/promises";
2+
import { crawlAndProcessAsync, crawlAndProcessSync } from "./crawler.js";
3+
import { inMemoryFetch } from "./fixture.js";
4+
5+
const deny = ["127.0.0.1", "192.168.0.1", "localhost"];
6+
7+
function basicDenyFilter(url) {
8+
const domain = new URL(url).hostname;
9+
return !deny.includes(domain);
10+
}
11+
12+
async function dnsDenyFilter(url) {
13+
const domain = new URL(url).hostname;
14+
try {
15+
const addresses = await dns.resolve(domain);
16+
const isDenied = addresses.some((address) => deny.includes(address));
17+
if (!isDenied) {
18+
return true;
19+
}
20+
} catch (error) {
21+
console.error(`DNS lookup failed for ${domain}:`, error);
22+
}
23+
return false;
24+
}
25+
26+
// Example usage with DNS lookup filter and actual fetch
27+
const sitemap1 = await crawlAndProcessAsync({
28+
url: "https://example.com",
29+
filterURL: dnsDenyFilter,
30+
download: (url) => fetch(url).then((response) => response.text()),
31+
});
32+
console.log(sitemap1);
33+
34+
// Synchronous usage in a faked environment
35+
const sitemap2 = crawlAndProcessSync({
36+
url: "https://example.com",
37+
filterURL: basicDenyFilter,
38+
download: inMemoryFetch,
39+
});
40+
console.log(sitemap2);

trampolines/crawler.js

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
export async function crawlAndProcessAsync(options) {
2+
return crawlAndProcess(options);
3+
}
4+
5+
export function crawlAndProcessSync(options) {
6+
return crawlAndProcess(options);
7+
}
8+
9+
async function crawlAndProcess({
10+
url,
11+
filterURL,
12+
download,
13+
14+
depth = 2,
15+
visited = new Set(),
16+
sitemap = {},
17+
memo = new Map(),
18+
}) {
19+
if (depth < 1) return;
20+
if (visited.has(url)) return;
21+
visited.add(url);
22+
23+
const html = await download(url);
24+
25+
// Extract URLs from the HTML content
26+
const urlRegex = /https:\/\/[^\s"'<]+/g;
27+
const urls = html.match(urlRegex) || [];
28+
29+
// Filter URLs using the filterURL function and memoize results
30+
const filteredUrls = [];
31+
for (const url of urls) {
32+
if (!memo.has(url)) {
33+
const isAllowed = await filterURL(url);
34+
memo.set(url, isAllowed);
35+
}
36+
if (memo.get(url)) {
37+
filteredUrls.push(url);
38+
}
39+
}
40+
41+
// Add the current URL and its filtered URLs to the sitemap
42+
sitemap[url] = filteredUrls;
43+
44+
// Recursively crawl filtered URLs
45+
for (const filteredUrl of filteredUrls) {
46+
await crawlAndProcess({
47+
url: filteredUrl,
48+
filterURL,
49+
download,
50+
depth: depth - 1,
51+
visited,
52+
sitemap,
53+
memo,
54+
});
55+
}
56+
57+
return sitemap;
58+
}

trampolines/fixture.js

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
export function inMemoryFetch(url) {
2+
const html = examplePages.get(url);
3+
if (!html) {
4+
throw new Error(`URL not found in examplePages: ${url}`);
5+
}
6+
return html;
7+
}
8+
9+
export const examplePages = new Map([
10+
[
11+
"https://example.com",
12+
`<a href="https://example.com/page1">Page 1</a>
13+
<a href="https://localhost:8080/">You</a>
14+
<a href="https://naugtur.pl/">naugtur</a>`,
15+
],
16+
[
17+
"https://example.com/page1",
18+
`<a href="https://example.com/page2">Page 2</a>`,
19+
],
20+
[
21+
"https://example.com/page2",
22+
`<a href="https://example.com/page1">Page 1</a>`,
23+
],
24+
["https://example.com/page3", "No more links here."],
25+
["https://naugtur.pl/", "Hello!"],
26+
]);

trampolines/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"type": "module"
3+
}

0 commit comments

Comments
 (0)