Skip to content

Commit 23d9c06

Browse files
committed
fix
1 parent 7b89a9b commit 23d9c06

File tree

2 files changed

+25
-12
lines changed

2 files changed

+25
-12
lines changed

example/example/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
'gerapy_proxy.middlewares.ProxyPoolMiddleware': 543,
5757
}
5858

59-
GERAPY_PROXY_POOL_URL = 'https://proxypool.scrape.center'
59+
GERAPY_PROXY_POOL_URL = 'https://proxypool.scrape.center/random'
6060

6161
# Enable or disable extensions
6262
# See https://docs.scrapy.org/en/latest/topics/extensions.html

gerapy_proxy/middlewares.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
11
import requests
22
import random
33
import logging
4+
import aiohttp
45
from gerapy_proxy.settings import *
6+
import time
7+
import asyncio
8+
import sys
9+
import twisted.internet
10+
from twisted.internet.asyncioreactor import AsyncioSelectorReactor
11+
12+
reactor = AsyncioSelectorReactor(asyncio.get_event_loop())
13+
14+
# install AsyncioSelectorReactor
15+
twisted.internet.reactor = reactor
16+
sys.modules['twisted.internet.reactor'] = reactor
517

618
logger = logging.getLogger(__name__)
719

@@ -29,30 +41,31 @@ def from_crawler(cls, crawler):
2941
cls.proxy_pool_random_enable_rate = settings.get('GERAPY_PROXY_POOL_RANDOM_ENABLE_RATE',
3042
GERAPY_PROXY_POOL_RANDOM_ENABLE_RATE)
3143
cls.proxy_pool_timeout = settings.get('GERAPY_PROXY_POOL_TIMEOUT', GERAPY_PROXY_POOL_TIMEOUT)
32-
cls.proxy_pool_extract_func = settings.get('GERAPY_PROXY_EXTRACT_FUNC', GERAPY_PROXY_EXTRACT_FUNC)
44+
cls.proxy_pool_extract_func = lambda _: settings.get('GERAPY_PROXY_EXTRACT_FUNC', GERAPY_PROXY_EXTRACT_FUNC)
3345
return cls()
3446

35-
def get_proxy(self):
47+
async def get_proxy(self):
3648
"""
3749
get proxy from proxy pool
3850
:return:
3951
"""
4052
logger.debug('start to get proxy from proxy pool')
53+
await asyncio.sleep(10)
4154
kwargs = {}
4255
if self.proxy_pool_auth:
43-
kwargs['auth'] = (self.proxy_pool_username, self.proxy_pool_password)
56+
kwargs['auth'] = aiohttp.BasicAuth(login=self.proxy_pool_username, password=self.proxy_pool_password)
4457
if self.proxy_pool_timeout:
4558
kwargs['timeout'] = self.proxy_pool_timeout
4659
logger.debug('get proxy using kwargs %s', kwargs)
4760

48-
# get proxy using requests
49-
response = requests.get(self.proxy_pool_url, **kwargs)
50-
if response.status_code == 200:
51-
proxy = self.proxy_pool_extract_func(response.text)
52-
logger.debug('get proxy %s', proxy)
53-
return proxy
61+
async with aiohttp.ClientSession() as client:
62+
response = await client.get(self.proxy_pool_url, **kwargs)
63+
if response.status == 200:
64+
proxy = self.proxy_pool_extract_func()(response.text)
65+
logger.debug('get proxy %s', proxy)
66+
return proxy
5467

55-
def process_request(self, request, spider):
68+
async def process_request(self, request, spider):
5669
"""
5770
use proxy pool to process request
5871
:param request:
@@ -75,7 +88,7 @@ def process_request(self, request, spider):
7588
logger.debug('random number lager than proxy_pool_random_enable_rate, skip')
7689
return None
7790

78-
proxy = self.get_proxy()
91+
proxy = await self.get_proxy()
7992

8093
# skip invalid
8194
if not proxy:

0 commit comments

Comments
 (0)