Adicionar proxie

Winzen · Winzen · commit 960716777dfa · 2024-10-22T10:16:39.000-03:00
diff --git a/pipelines/utils/crawler_tse_eleicoes/constants.py b/pipelines/utils/crawler_tse_eleicoes/constants.py
@@ -19,6 +19,7 @@ class constants(Enum):  # pylint: disable=c0103
    WHERE table_id = '{table_id}') AS last_modified_time;"""
 
 
+    PROXY_LINK = "scraperapi.country_code=br.device_type=desktop:c45ba8009496799a77df31e2ef862325@proxy-server.scraperapi.com:8001"
 
     MODE_TO_PROJECT_DICT = {
         "prod": "basedosdados",
diff --git a/pipelines/utils/crawler_tse_eleicoes/utils.py b/pipelines/utils/crawler_tse_eleicoes/utils.py
@@ -116,17 +116,30 @@ def download_extract_zip(self, url: str, chunk_size=128) -> None:
     """
     self.path_input.mkdir(parents=True, exist_ok=True)
 
+    # request_headers = {
+    #     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
+    # }
+
+    # r = requests.get(url, headers=request_headers, stream=True, timeout=60)
+
     request_headers = {
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
+      "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
+      "Connection": "keep-alive"
+  }
+    proxies = {
+      "https": tse_constants.PROXY_LINK.value
     }
 
-    r = requests.get(url, headers=request_headers, stream=True, timeout=60)
+    r = requests.get(url, headers=request_headers, proxies=proxies, verify=False, timeout=300)
 
     save_path = self.path_input / url.split("/")[-1]
 
     with open(save_path, "wb") as fd:
-        for chunk in r.iter_content(chunk_size=chunk_size):
-            fd.write(chunk)
+      fd.write(r.content)
+
+    # with open(save_path, "wb") as fd:
+    #     for chunk in r.iter_content(chunk_size=chunk_size):
+    #         fd.write(chunk)
 
     with zipfile.ZipFile(save_path) as z:
         z.extractall(self.path_input)