|
| 1 | +import dns from "dns/promises"; |
| 2 | +import { crawlAndProcessAsync, crawlAndProcessSync } from "./crawler.js"; |
| 3 | +import { inMemoryFetch } from "./fixture.js"; |
| 4 | + |
| 5 | +const deny = ["127.0.0.1", "192.168.0.1", "localhost"]; |
| 6 | + |
| 7 | +function basicDenyFilter(url) { |
| 8 | + const domain = new URL(url).hostname; |
| 9 | + return !deny.includes(domain); |
| 10 | +} |
| 11 | + |
| 12 | +async function dnsDenyFilter(url) { |
| 13 | + const domain = new URL(url).hostname; |
| 14 | + try { |
| 15 | + const addresses = await dns.resolve(domain); |
| 16 | + const isDenied = addresses.some((address) => deny.includes(address)); |
| 17 | + if (!isDenied) { |
| 18 | + return true; |
| 19 | + } |
| 20 | + } catch (error) { |
| 21 | + console.error(`DNS lookup failed for ${domain}:`, error); |
| 22 | + } |
| 23 | + return false; |
| 24 | +} |
| 25 | + |
| 26 | +// Example usage with DNS lookup filter and actual fetch |
| 27 | +const sitemap1 = await crawlAndProcessAsync({ |
| 28 | + url: "https://example.com", |
| 29 | + filterURL: dnsDenyFilter, |
| 30 | + download: (url) => fetch(url).then((response) => response.text()), |
| 31 | +}); |
| 32 | +console.log(sitemap1); |
| 33 | + |
| 34 | +// Synchronous usage in a faked environment |
| 35 | +const sitemap2 = crawlAndProcessSync({ |
| 36 | + url: "https://example.com", |
| 37 | + filterURL: basicDenyFilter, |
| 38 | + download: inMemoryFetch, |
| 39 | +}); |
| 40 | +console.log(sitemap2); |
0 commit comments