-
Notifications
You must be signed in to change notification settings - Fork 0
/
server.js
66 lines (58 loc) · 2.01 KB
/
server.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/**
* Created by zcx on 2016/10/4.
*/
var http = require('http');
var url = require('url')
var cheerio = require('cheerio');
var superagent = require('superagent');
var async = require('async');
var eventproxy = require('eventproxy');
var ep = new eventproxy();
var mainUrl = 'https://movie.douban.com/j/search_tags?type=movie';
var movies = [];
var tagUrls = [];
function start() {
function onRequest(req, res) {
superagent.get(mainUrl).end(function (err, pres) {
var movieTags = [];
movieTags = JSON.parse(pres.text).tags;
tagUrlStart(req, res, movieTags);
})
}
http.createServer(onRequest).listen(3000, function() {
console.log('listen at 3000')
})
}
function tagUrlStart(req, res, movieTags) {
var page_limit = 0,
page_start = 0,
crowlCount = 0;
movieTags.forEach(function (movietag) {
tagUrls.push('https://movie.douban.com/j/search_subjects?type=movie&tag=' + encodeURI(movietag)+ // 这里讲中文转为encode,否则无法抓取
'&sort=recommend&page_limit=20&page_start=');
})
res.writeHead(200, {'Content-Type':'text/html;charset="utf-8"'})
var tagIndex = 0;
var timer = setInterval(crawl, 100);
function crawl() {
var crawlUrl = tagUrls[tagIndex];
page_start += 20;
crawlUrl += page_start;
console.log("正在执行第" + crowlCount + "次爬取" + "共" + crawlUrl + "条信息")
superagent.get(crawlUrl).end(function (err, pres) {
console.log()
if(!JSON.parse(pres.text).subjects[0]){
clearInterval(timer);
tagIndex++;
timer = setInterval(crawl, 1000)
page_start = 0;
} else {
JSON.parse(pres.text).subjects.forEach(function (subject) {
res.write(movieTags[tagIndex] + crowlCount + subject.title + subject.rate + "</br>");
crowlCount++;
})
}
})
}
}
start();