1
1
import requests
2
2
import random
3
3
import logging
4
+ import aiohttp
4
5
from gerapy_proxy .settings import *
6
+ import time
7
+ import asyncio
8
+ import sys
9
+ import twisted .internet
10
+ from twisted .internet .asyncioreactor import AsyncioSelectorReactor
11
+
12
+ reactor = AsyncioSelectorReactor (asyncio .get_event_loop ())
13
+
14
+ # install AsyncioSelectorReactor
15
+ twisted .internet .reactor = reactor
16
+ sys .modules ['twisted.internet.reactor' ] = reactor
5
17
6
18
logger = logging .getLogger (__name__ )
7
19
@@ -29,30 +41,31 @@ def from_crawler(cls, crawler):
29
41
cls .proxy_pool_random_enable_rate = settings .get ('GERAPY_PROXY_POOL_RANDOM_ENABLE_RATE' ,
30
42
GERAPY_PROXY_POOL_RANDOM_ENABLE_RATE )
31
43
cls .proxy_pool_timeout = settings .get ('GERAPY_PROXY_POOL_TIMEOUT' , GERAPY_PROXY_POOL_TIMEOUT )
32
- cls .proxy_pool_extract_func = settings .get ('GERAPY_PROXY_EXTRACT_FUNC' , GERAPY_PROXY_EXTRACT_FUNC )
44
+ cls .proxy_pool_extract_func = lambda _ : settings .get ('GERAPY_PROXY_EXTRACT_FUNC' , GERAPY_PROXY_EXTRACT_FUNC )
33
45
return cls ()
34
46
35
- def get_proxy (self ):
47
+ async def get_proxy (self ):
36
48
"""
37
49
get proxy from proxy pool
38
50
:return:
39
51
"""
40
52
logger .debug ('start to get proxy from proxy pool' )
53
+ await asyncio .sleep (10 )
41
54
kwargs = {}
42
55
if self .proxy_pool_auth :
43
- kwargs ['auth' ] = ( self .proxy_pool_username , self .proxy_pool_password )
56
+ kwargs ['auth' ] = aiohttp . BasicAuth ( login = self .proxy_pool_username , password = self .proxy_pool_password )
44
57
if self .proxy_pool_timeout :
45
58
kwargs ['timeout' ] = self .proxy_pool_timeout
46
59
logger .debug ('get proxy using kwargs %s' , kwargs )
47
60
48
- # get proxy using requests
49
- response = requests .get (self .proxy_pool_url , ** kwargs )
50
- if response .status_code == 200 :
51
- proxy = self .proxy_pool_extract_func (response .text )
52
- logger .debug ('get proxy %s' , proxy )
53
- return proxy
61
+ async with aiohttp . ClientSession () as client :
62
+ response = await client .get (self .proxy_pool_url , ** kwargs )
63
+ if response .status == 200 :
64
+ proxy = self .proxy_pool_extract_func () (response .text )
65
+ logger .debug ('get proxy %s' , proxy )
66
+ return proxy
54
67
55
- def process_request (self , request , spider ):
68
+ async def process_request (self , request , spider ):
56
69
"""
57
70
use proxy pool to process request
58
71
:param request:
@@ -75,7 +88,7 @@ def process_request(self, request, spider):
75
88
logger .debug ('random number lager than proxy_pool_random_enable_rate, skip' )
76
89
return None
77
90
78
- proxy = self .get_proxy ()
91
+ proxy = await self .get_proxy ()
79
92
80
93
# skip invalid
81
94
if not proxy :
0 commit comments