|
14 | 14 | SPIDER_MODULES = ['example.spiders']
|
15 | 15 | NEWSPIDER_MODULE = 'example.spiders'
|
16 | 16 |
|
17 |
| - |
18 | 17 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
|
19 |
| -#USER_AGENT = 'example (+http://www.yourdomain.com)' |
| 18 | +# USER_AGENT = 'example (+http://www.yourdomain.com)' |
20 | 19 |
|
21 | 20 | # Obey robots.txt rules
|
22 | 21 | ROBOTSTXT_OBEY = False
|
23 | 22 |
|
24 | 23 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
|
25 | 24 | CONCURRENT_REQUESTS = 3
|
| 25 | +DOWNLOAD_TIMEOUT = 10 |
| 26 | +RETRY_TIMES = 10 |
26 | 27 |
|
27 | 28 | # Configure a delay for requests for the same website (default: 0)
|
28 | 29 | # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
29 | 30 | # See also autothrottle settings and docs
|
30 |
| -#DOWNLOAD_DELAY = 3 |
| 31 | +# DOWNLOAD_DELAY = 3 |
31 | 32 | # The download delay setting will honor only one of:
|
32 |
| -#CONCURRENT_REQUESTS_PER_DOMAIN = 16 |
33 |
| -#CONCURRENT_REQUESTS_PER_IP = 16 |
| 33 | +# CONCURRENT_REQUESTS_PER_DOMAIN = 16 |
| 34 | +# CONCURRENT_REQUESTS_PER_IP = 16 |
34 | 35 |
|
35 | 36 | # Disable cookies (enabled by default)
|
36 |
| -#COOKIES_ENABLED = False |
| 37 | +# COOKIES_ENABLED = False |
37 | 38 |
|
38 | 39 | # Disable Telnet Console (enabled by default)
|
39 |
| -#TELNETCONSOLE_ENABLED = False |
| 40 | +# TELNETCONSOLE_ENABLED = False |
40 | 41 |
|
41 | 42 | # Override the default request headers:
|
42 |
| -#DEFAULT_REQUEST_HEADERS = { |
| 43 | +# DEFAULT_REQUEST_HEADERS = { |
43 | 44 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
44 | 45 | # 'Accept-Language': 'en',
|
45 |
| -#} |
| 46 | +# } |
46 | 47 |
|
47 | 48 | # Enable or disable spider middlewares
|
48 | 49 | # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
49 |
| -#SPIDER_MIDDLEWARES = { |
| 50 | +# SPIDER_MIDDLEWARES = { |
50 | 51 | # 'example.middlewares.ExampleSpiderMiddleware': 543,
|
51 |
| -#} |
| 52 | +# } |
52 | 53 |
|
53 | 54 | # Enable or disable downloader middlewares
|
54 | 55 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
55 | 56 | DOWNLOADER_MIDDLEWARES = {
|
56 |
| - 'gerapy_proxy.middlewares.ProxyPoolMiddleware': 543, |
| 57 | + 'gerapy_proxy.middlewares.ProxyPoolMiddleware': 543, |
57 | 58 | }
|
58 | 59 |
|
59 | 60 | GERAPY_PROXY_POOL_URL = 'https://proxypool.scrape.center/random'
|
60 | 61 |
|
61 | 62 | # Enable or disable extensions
|
62 | 63 | # See https://docs.scrapy.org/en/latest/topics/extensions.html
|
63 |
| -#EXTENSIONS = { |
| 64 | +# EXTENSIONS = { |
64 | 65 | # 'scrapy.extensions.telnet.TelnetConsole': None,
|
65 |
| -#} |
| 66 | +# } |
66 | 67 |
|
67 | 68 | # Configure item pipelines
|
68 | 69 | # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
69 |
| -#ITEM_PIPELINES = { |
| 70 | +# ITEM_PIPELINES = { |
70 | 71 | # 'example.pipelines.ExamplePipeline': 300,
|
71 |
| -#} |
| 72 | +# } |
72 | 73 |
|
73 | 74 | # Enable and configure the AutoThrottle extension (disabled by default)
|
74 | 75 | # See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
75 |
| -#AUTOTHROTTLE_ENABLED = True |
| 76 | +# AUTOTHROTTLE_ENABLED = True |
76 | 77 | # The initial download delay
|
77 |
| -#AUTOTHROTTLE_START_DELAY = 5 |
| 78 | +# AUTOTHROTTLE_START_DELAY = 5 |
78 | 79 | # The maximum download delay to be set in case of high latencies
|
79 |
| -#AUTOTHROTTLE_MAX_DELAY = 60 |
| 80 | +# AUTOTHROTTLE_MAX_DELAY = 60 |
80 | 81 | # The average number of requests Scrapy should be sending in parallel to
|
81 | 82 | # each remote server
|
82 |
| -#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 |
| 83 | +# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 |
83 | 84 | # Enable showing throttling stats for every response received:
|
84 |
| -#AUTOTHROTTLE_DEBUG = False |
| 85 | +# AUTOTHROTTLE_DEBUG = False |
85 | 86 |
|
86 | 87 | # Enable and configure HTTP caching (disabled by default)
|
87 | 88 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
88 |
| -#HTTPCACHE_ENABLED = True |
89 |
| -#HTTPCACHE_EXPIRATION_SECS = 0 |
90 |
| -#HTTPCACHE_DIR = 'httpcache' |
91 |
| -#HTTPCACHE_IGNORE_HTTP_CODES = [] |
92 |
| -#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' |
| 89 | +# HTTPCACHE_ENABLED = True |
| 90 | +# HTTPCACHE_EXPIRATION_SECS = 0 |
| 91 | +# HTTPCACHE_DIR = 'httpcache' |
| 92 | +# HTTPCACHE_IGNORE_HTTP_CODES = [] |
| 93 | +# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' |
0 commit comments