Skip to content

Commit 9607167

Browse files
committed
Adicionar proxie
1 parent 52b5d1f commit 9607167

File tree

2 files changed

+18
-4
lines changed

2 files changed

+18
-4
lines changed

pipelines/utils/crawler_tse_eleicoes/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class constants(Enum): # pylint: disable=c0103
1919
WHERE table_id = '{table_id}') AS last_modified_time;"""
2020

2121

22+
PROXY_LINK = "scraperapi.country_code=br.device_type=desktop:c45ba8009496799a77df31e2ef862325@proxy-server.scraperapi.com:8001"
2223

2324
MODE_TO_PROJECT_DICT = {
2425
"prod": "basedosdados",

pipelines/utils/crawler_tse_eleicoes/utils.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,30 @@ def download_extract_zip(self, url: str, chunk_size=128) -> None:
116116
"""
117117
self.path_input.mkdir(parents=True, exist_ok=True)
118118

119+
# request_headers = {
120+
# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
121+
# }
122+
123+
# r = requests.get(url, headers=request_headers, stream=True, timeout=60)
124+
119125
request_headers = {
120-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
126+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
127+
"Connection": "keep-alive"
128+
}
129+
proxies = {
130+
"https": tse_constants.PROXY_LINK.value
121131
}
122132

123-
r = requests.get(url, headers=request_headers, stream=True, timeout=60)
133+
r = requests.get(url, headers=request_headers, proxies=proxies, verify=False, timeout=300)
124134

125135
save_path = self.path_input / url.split("/")[-1]
126136

127137
with open(save_path, "wb") as fd:
128-
for chunk in r.iter_content(chunk_size=chunk_size):
129-
fd.write(chunk)
138+
fd.write(r.content)
139+
140+
# with open(save_path, "wb") as fd:
141+
# for chunk in r.iter_content(chunk_size=chunk_size):
142+
# fd.write(chunk)
130143

131144
with zipfile.ZipFile(save_path) as z:
132145
z.extractall(self.path_input)

0 commit comments

Comments
 (0)