-
Notifications
You must be signed in to change notification settings - Fork 406
/
Copy pathcustom-cache.js
84 lines (72 loc) · 2.26 KB
/
custom-cache.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
const fs = require('fs');
const HCCrawler = require('headless-chrome-crawler');
const BaseCache = require('headless-chrome-crawler/cache/base');
const FILE = './tmp/fs-cache.json';
// Create a new cache by extending BaseCache interface
class FsCache extends BaseCache {
init() {
fs.writeFileSync(this._settings.file, '{}');
return Promise.resolve();
}
clear() {
fs.unlinkSync(this._settings.file);
return Promise.resolve();
}
close() {
return Promise.resolve();
}
get(key) {
const obj = JSON.parse(fs.readFileSync(this._settings.file));
return Promise.resolve(obj[key] || null);
}
set(key, value) {
const obj = JSON.parse(fs.readFileSync(this._settings.file));
obj[key] = value;
fs.writeFileSync(this._settings.file, JSON.stringify(obj));
return Promise.resolve();
}
enqueue(key, value, priority) {
const obj = JSON.parse(fs.readFileSync(this._settings.file));
const queue = obj[key] || [];
const item = { value, priority };
queue.push(item);
queue.sort((a, b) => b.priority - a.priority);
obj[key] = queue;
fs.writeFileSync(this._settings.file, JSON.stringify(obj));
return Promise.resolve();
}
dequeue(key) {
const obj = JSON.parse(fs.readFileSync(this._settings.file));
const queue = obj[key] || [];
const item = queue.shift();
fs.writeFileSync(FILE, JSON.stringify(obj));
if (!item) return Promise.resolve(null);
return Promise.resolve(item.value);
}
size(key) {
const obj = JSON.parse(fs.readFileSync(this._settings.file));
if (!obj[key]) return Promise.resolve(0);
return Promise.resolve(obj[key].length);
}
remove(key) {
const obj = JSON.parse(fs.readFileSync(this._settings.file));
delete obj[key];
fs.writeFileSync(FILE, JSON.stringify(obj));
return Promise.resolve();
}
}
const cache = new FsCache({ file: FILE });
(async () => {
const crawler = await HCCrawler.launch({
maxConcurrency: 1,
onSuccess: result => {
console.log(`Requested ${result.options.url}.`);
},
cache,
});
await crawler.queue('https://example.com/');
await crawler.queue('https://example.net/');
await crawler.queue('https://example.com/'); // The queue won't be requested
await crawler.onIdle();
await crawler.close();
})();