@@ -55,4 +55,95 @@ GERAPY_PYPPETEER_DISABLE_GPU = True
55
55
56
56
## Example
57
57
58
- For more detail, please see [ example] ( ./example ) .
58
+ For more detail, please see [ example] ( ./example ) .
59
+
60
+ Also you can directly run with Docker:
61
+
62
+ ```
63
+ docker run germey/gerapy-pyppeteer-example
64
+ ```
65
+
66
+ Outputs:
67
+
68
+ ``` shell script
69
+ 2020-07-13 01:49:13 [scrapy.utils.log] INFO: Scrapy 2.2.0 started (bot: example)
70
+ 2020-07-13 01:49:13 [scrapy.utils.log] INFO: Versions: lxml 4.3.3.0, libxml2 2.9.9, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.7.7 (default, May 6 2020, 04:59:01) - [Clang 4.0.1 (tags/RELEASE_401/final)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1d 10 Sep 2019), cryptography 2.8, Platform Darwin-19.4.0-x86_64-i386-64bit
71
+ 2020-07-13 01:49:13 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor
72
+ 2020-07-13 01:49:13 [scrapy.crawler] INFO: Overridden settings:
73
+ {' BOT_NAME' : ' example' ,
74
+ ' CONCURRENT_REQUESTS' : 3,
75
+ ' NEWSPIDER_MODULE' : ' example.spiders' ,
76
+ ' RETRY_HTTP_CODES' : [403, 500, 502, 503, 504],
77
+ ' SPIDER_MODULES' : [' example.spiders' ]}
78
+ 2020-07-13 01:49:13 [scrapy.extensions.telnet] INFO: Telnet Password: 83c276fb41754bd0
79
+ 2020-07-13 01:49:13 [scrapy.middleware] INFO: Enabled extensions:
80
+ [' scrapy.extensions.corestats.CoreStats' ,
81
+ ' scrapy.extensions.telnet.TelnetConsole' ,
82
+ ' scrapy.extensions.memusage.MemoryUsage' ,
83
+ ' scrapy.extensions.logstats.LogStats' ]
84
+ 2020-07-13 01:49:13 [scrapy.middleware] INFO: Enabled downloader middlewares:
85
+ [' scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware' ,
86
+ ' scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware' ,
87
+ ' scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware' ,
88
+ ' scrapy.downloadermiddlewares.useragent.UserAgentMiddleware' ,
89
+ ' gerapy_pyppeteer.downloadermiddlewares.PyppeteerMiddleware' ,
90
+ ' scrapy.downloadermiddlewares.retry.RetryMiddleware' ,
91
+ ' scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware' ,
92
+ ' scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware' ,
93
+ ' scrapy.downloadermiddlewares.redirect.RedirectMiddleware' ,
94
+ ' scrapy.downloadermiddlewares.cookies.CookiesMiddleware' ,
95
+ ' scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware' ,
96
+ ' scrapy.downloadermiddlewares.stats.DownloaderStats' ]
97
+ 2020-07-13 01:49:13 [scrapy.middleware] INFO: Enabled spider middlewares:
98
+ [' scrapy.spidermiddlewares.httperror.HttpErrorMiddleware' ,
99
+ ' scrapy.spidermiddlewares.offsite.OffsiteMiddleware' ,
100
+ ' scrapy.spidermiddlewares.referer.RefererMiddleware' ,
101
+ ' scrapy.spidermiddlewares.urllength.UrlLengthMiddleware' ,
102
+ ' scrapy.spidermiddlewares.depth.DepthMiddleware' ]
103
+ 2020-07-13 01:49:13 [scrapy.middleware] INFO: Enabled item pipelines:
104
+ []
105
+ 2020-07-13 01:49:13 [scrapy.core.engine] INFO: Spider opened
106
+ 2020-07-13 01:49:13 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
107
+ 2020-07-13 01:49:13 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
108
+ 2020-07-13 01:49:13 [example.spiders.book] INFO: crawling https://dynamic5.scrape.center/page/1
109
+ 2020-07-13 01:49:13 [gerapy.pyppeteer] DEBUG: processing request < GET https://dynamic5.scrape.center/page/1>
110
+ 2020-07-13 01:49:13 [gerapy.pyppeteer] DEBUG: set options {' headless' : True, ' dumpio' : False, ' devtools' : False, ' args' : [' --window-size=1400,700' , ' --disable-extensions' , ' --hide-scrollbars' , ' --mute-audio' , ' --no-sandbox' , ' --disable-setuid-sandbox' , ' --disable-gpu' ]}
111
+ 2020-07-13 01:49:14 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/page/1
112
+ 2020-07-13 01:49:19 [gerapy.pyppeteer] DEBUG: waiting for .item .name finished
113
+ 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: wait for .item .name finished
114
+ 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: close pyppeteer
115
+ 2020-07-13 01:49:20 [scrapy.core.engine] DEBUG: Crawled (200) < GET https://dynamic5.scrape.center/page/1> (referer: None)
116
+ 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: processing request < GET https://dynamic5.scrape.center/detail/26898909>
117
+ 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: processing request < GET https://dynamic5.scrape.center/detail/26861389>
118
+ 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: processing request < GET https://dynamic5.scrape.center/detail/26855315>
119
+ 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: set options {' headless' : True, ' dumpio' : False, ' devtools' : False, ' args' : [' --window-size=1400,700' , ' --disable-extensions' , ' --hide-scrollbars' , ' --mute-audio' , ' --no-sandbox' , ' --disable-setuid-sandbox' , ' --disable-gpu' ]}
120
+ 2020-07-13 01:49:20 [gerapy.pyppeteer] DEBUG: set options {' headless' : True, ' dumpio' : False, ' devtools' : False, ' args' : [' --window-size=1400,700' , ' --disable-extensions' , ' --hide-scrollbars' , ' --mute-audio' , ' --no-sandbox' , ' --disable-setuid-sandbox' , ' --disable-gpu' ]}
121
+ 2020-07-13 01:49:21 [gerapy.pyppeteer] DEBUG: set options {' headless' : True, ' dumpio' : False, ' devtools' : False, ' args' : [' --window-size=1400,700' , ' --disable-extensions' , ' --hide-scrollbars' , ' --mute-audio' , ' --no-sandbox' , ' --disable-setuid-sandbox' , ' --disable-gpu' ]}
122
+ 2020-07-13 01:49:21 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/detail/26855315
123
+ 2020-07-13 01:49:21 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/detail/26861389
124
+ 2020-07-13 01:49:21 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/detail/26898909
125
+ 2020-07-13 01:49:24 [gerapy.pyppeteer] DEBUG: waiting for .item .name finished
126
+ 2020-07-13 01:49:24 [gerapy.pyppeteer] DEBUG: wait for .item .name finished
127
+ 2020-07-13 01:49:24 [gerapy.pyppeteer] DEBUG: close pyppeteer
128
+ 2020-07-13 01:49:24 [scrapy.core.engine] DEBUG: Crawled (200) < GET https://dynamic5.scrape.center/detail/26861389> (referer: https://dynamic5.scrape.center/page/1)
129
+ 2020-07-13 01:49:24 [gerapy.pyppeteer] DEBUG: processing request < GET https://dynamic5.scrape.center/page/2>
130
+ 2020-07-13 01:49:24 [gerapy.pyppeteer] DEBUG: set options {' headless' : True, ' dumpio' : False, ' devtools' : False, ' args' : [' --window-size=1400,700' , ' --disable-extensions' , ' --hide-scrollbars' , ' --mute-audio' , ' --no-sandbox' , ' --disable-setuid-sandbox' , ' --disable-gpu' ]}
131
+ 2020-07-13 01:49:25 [scrapy.core.scraper] DEBUG: Scraped from < 200 https://dynamic5.scrape.center/detail/26861389>
132
+ {' name' : ' 壁穴ヘブンホール' ,
133
+ ' score' : ' 5.6' ,
134
+ ' tags' : [' BL漫画' , ' 小基漫' , ' BL' , ' 『又腐又基』' , ' BLコミック' ]}
135
+ 2020-07-13 01:49:25 [gerapy.pyppeteer] DEBUG: waiting for .item .name finished
136
+ 2020-07-13 01:49:25 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/page/2
137
+ 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: wait for .item .name finished
138
+ 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: close pyppeteer
139
+ 2020-07-13 01:49:26 [scrapy.core.engine] DEBUG: Crawled (200) < GET https://dynamic5.scrape.center/detail/26855315> (referer: https://dynamic5.scrape.center/page/1)
140
+ 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: processing request < GET https://dynamic5.scrape.center/detail/27047626>
141
+ 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: set options {' headless' : True, ' dumpio' : False, ' devtools' : False, ' args' : [' --window-size=1400,700' , ' --disable-extensions' , ' --hide-scrollbars' , ' --mute-audio' , ' --no-sandbox' , ' --disable-setuid-sandbox' , ' --disable-gpu' ]}
142
+ 2020-07-13 01:49:26 [scrapy.core.scraper] DEBUG: Scraped from < 200 https://dynamic5.scrape.center/detail/26855315>
143
+ {' name' : ' 冒险小虎队' , ' score' : ' 9.4' , ' tags' : [' 冒险小虎队' , ' 童年' , ' 冒险' , ' 推理' , ' 小时候读的' ]}
144
+ 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: waiting for .item .name finished
145
+ 2020-07-13 01:49:26 [gerapy.pyppeteer] DEBUG: crawling https://dynamic5.scrape.center/detail/27047626
146
+ 2020-07-13 01:49:27 [gerapy.pyppeteer] DEBUG: wait for .item .name finished
147
+ 2020-07-13 01:49:27 [gerapy.pyppeteer] DEBUG: close pyppeteer
148
+ ...
149
+ ```
0 commit comments