26
26
'Optimization Enabled' : 'optimizations' ,
27
27
'Other Settings:' : 'settings' }
28
28
29
+ session = {}
30
+
31
+ def get_session_from_chromedriver (url ):
32
+ driver = uc .Chrome ()
33
+ driver .get (url )
34
+
35
+ session = requests .Session ()
36
+ user_agent = driver .execute_script ("return navigator.userAgent;" )
37
+ session .headers .update ({'User-Agent' : user_agent })
38
+
39
+ if len (driver .get_cookies ()):
40
+ raise Exception ('Should have some cookies here' )
41
+
42
+ for cookie in driver .get_cookies ():
43
+ session .cookies .set (cookie ['name' ], cookie ['value' ])
44
+
45
+ print (f'Cookies loaded from { url } { session .cookies } ' )
46
+ return session
47
+
48
+ def load_session (url ):
49
+ global session
50
+ if not session :
51
+ session = get_session_from_chromedriver (url )
52
+ return session
53
+
54
+
29
55
def address_from_tr (td : Any ) -> str :
30
56
a = td .select_one ('a.js-clipboard' )
31
57
return a .attrs .get ('data-clipboard-text' ) if (a and a .attrs ) else None
@@ -34,7 +60,7 @@ def address_from_tr(td: Any) -> str:
34
60
def parse_page (page : Optional [int ]= None , retry = 3 , retry_delay = 5 ) -> Optional [List [Dict [str , str ]]]:
35
61
url = VERIFIED_CONTRACT_URL if page is None else f'{ VERIFIED_CONTRACT_URL } /{ page } '
36
62
print (f'Crawling { url } ' )
37
- resp = requests .get (url , headers = REQ_HEADER , allow_redirects = False , proxies = proxies )
63
+ resp = session .get (url , allow_redirects = False )
38
64
if resp .status_code != 200 :
39
65
print (f'No results found on page: { page } , http status: { resp .status_code } ' )
40
66
return None
@@ -131,7 +157,7 @@ def download_source(contract: Dict[str, str], retry=3, retry_delay=5, throw_if_f
131
157
address = contract ['Address' ]
132
158
contract_name = contract ['Contract Name' ]
133
159
url = CONTRACT_SOURCE_URL .format (address )
134
- resp = requests .get (url , headers = REQ_HEADER , allow_redirects = False , proxies = proxies )
160
+ resp = session .get (url , allow_redirects = False )
135
161
136
162
def maybe_retry (e = None ):
137
163
if retry > 0 :
@@ -165,25 +191,16 @@ def fetch_all():
165
191
166
192
def download_url_poly (url , retry = 3 , retry_delay = 5 , throw_if_fail = False ):
167
193
address = url .split ('/' )[- 1 ].split ('#' )[0 ]
168
- driver = uc .Chrome ()
169
- driver .get (url )
170
-
171
- # fullscreen_btn = driver.find_elements(By.XPATH, '//a[@class="btn btn-xss btn-secondary togglefullscreen"]')
172
- # for btn in fullscreen_btn:
173
- # btn.click()
174
- # time.sleep(0.05)
175
194
176
- cookie = driver .get_cookies ()[0 ]
177
- for key , value in cookie .items ():
178
- cookie [key ] = str (value )
195
+ session = get_session_from_chromedriver (url )
179
196
180
- resp = requests .get (url , headers = REQ_HEADER , allow_redirects = True , cookies = cookie , proxies = proxies )
197
+ resp = session .get (url )
181
198
soup = BeautifulSoup (resp .content , 'lxml' )
182
199
parse_source_soup (soup , address )
183
200
184
201
def download_url (url , retry = 3 , retry_delay = 5 , throw_if_fail = False ):
185
202
address = url .split ('/' )[- 1 ].split ('#' )[0 ]
186
- resp = requests .get (url , headers = REQ_HEADER , allow_redirects = False , proxies = proxies )
203
+ resp = session .get (url , allow_redirects = False )
187
204
188
205
if resp .status_code != 200 :
189
206
if retry > 0 :
@@ -198,7 +215,6 @@ def download_url(url, retry=3, retry_delay=5, throw_if_fail=False):
198
215
soup = BeautifulSoup (resp .content , 'lxml' )
199
216
parse_source_soup (soup , address )
200
217
201
-
202
218
if __name__ == '__main__' :
203
219
ap = argparse .ArgumentParser ()
204
220
ap .add_argument ("--web" , default = "etherscan" ,type = str , help = "Choose website, etherscan(default) or bscscan" )
@@ -210,6 +226,7 @@ def download_url(url, retry=3, retry_delay=5, throw_if_fail=False):
210
226
ROOT_DIR = f'{ OUTPUT_DIR } /contracts'
211
227
212
228
web = args .web
229
+
213
230
if web == 'etherscan' :
214
231
VERIFIED_CONTRACT_URL = 'https://etherscan.io/contractsVerified'
215
232
CONTRACT_SOURCE_URL = 'https://etherscan.io/address/{}#code'
@@ -237,6 +254,9 @@ def download_url(url, retry=3, retry_delay=5, throw_if_fail=False):
237
254
print (CONTRACT_SOURCE_URL )
238
255
print (ROOT_DIR )
239
256
url = args .url
257
+
258
+ load_session (VERIFIED_CONTRACT_URL )
259
+
240
260
if url :
241
261
fn (url )
242
262
else :
0 commit comments