diff --git a/src/gateio_new_coins_announcements_bot/announcement_scrapers/binance_scraper.py b/src/gateio_new_coins_announcements_bot/announcement_scrapers/binance_scraper.py index c4290ca..9f3a0af 100644 --- a/src/gateio_new_coins_announcements_bot/announcement_scrapers/binance_scraper.py +++ b/src/gateio_new_coins_announcements_bot/announcement_scrapers/binance_scraper.py @@ -1,49 +1,61 @@ -import random -import time - -import requests - -from gateio_new_coins_announcements_bot.logger import logger -from gateio_new_coins_announcements_bot.util.random import random_int -from gateio_new_coins_announcements_bot.util.random import random_str - - -class BinanceScraper: - def __init__(self, http_client=requests): - self.http_client = http_client - - def fetch_latest_announcement(self): - """ - Retrieves new coin listing announcements from binance.com - """ - logger.debug("Pulling announcement page") - request_url = self.__request_url() - response = self.http_client.get(request_url) - - # Raise an HTTPError if status is not 200 - response.raise_for_status() - - if "X-Cache" in response.headers: - logger.debug(f'Response was cached. Contains headers X-Cache: {response.headers["X-Cache"]}') - else: - logger.debug("Hit the source directly (no cache)") - - latest_announcement = response.json() - logger.debug("Finished pulling announcement page") - return latest_announcement["data"]["catalogs"][0]["articles"][0]["title"] - - def __request_url(self): - # Generate random query/params to help prevent caching - queries = [ - "type=1", - "catalogId=48", - "pageNo=1", - f"pageSize={str(random_int(maxInt=200))}", - f"rnd={str(time.time())}", - f"{random_str()}={str(random_int())}", - ] - random.shuffle(queries) - return ( - f"https://www.binance.com/gateway-api/v1/public/cms/article/list/query" - f"?{queries[0]}&{queries[1]}&{queries[2]}&{queries[3]}&{queries[4]}&{queries[5]}" - ) +import random +import time + +import requests + +from gateio_new_coins_announcements_bot.logger import logger +from gateio_new_coins_announcements_bot.rotating_proxy import get_proxy +from gateio_new_coins_announcements_bot.rotating_proxy import is_ready as rotating_proxy_is_ready +from gateio_new_coins_announcements_bot.util.random import random_int +from gateio_new_coins_announcements_bot.util.random import random_str + + +class BinanceScraper: + def __init__(self, http_client=requests): + self.http_client = http_client + + def fetch_latest_announcement(self): + """ + Retrieves new coin listing announcements from binance.com + """ + logger.debug("Pulling announcement page") + request_url = self.__request_url() + + if rotating_proxy_is_ready(): + proxy = get_proxy() + logger.debug(f"Using proxy: {proxy}") + try: + response = self.http_client.get(request_url, proxies={"http": "socks5://" + proxy}) + + except Exception as e: + logger.error(e) + else: + response = self.http_client.get(request_url) + + # Raise an HTTPError if status is not 200 + response.raise_for_status() + + if "X-Cache" in response.headers: + logger.debug(f'Response was cached. Contains headers X-Cache: {response.headers["X-Cache"]}') + else: + logger.debug("Hit the source directly (no cache)") + + latest_announcement = response.json() + logger.debug("Finished pulling announcement page") + return latest_announcement["data"]["catalogs"][0]["articles"][0]["title"] + + def __request_url(self): + # Generate random query/params to help prevent caching + queries = [ + "type=1", + "catalogId=48", + "pageNo=1", + f"pageSize={str(random_int(maxInt=200))}", + f"rnd={str(time.time())}", + f"{random_str()}={str(random_int())}", + ] + random.shuffle(queries) + return ( + f"https://www.binance.com/gateway-api/v1/public/cms/article/list/query" + f"?{queries[0]}&{queries[1]}&{queries[2]}&{queries[3]}&{queries[4]}&{queries[5]}" + ) diff --git a/src/gateio_new_coins_announcements_bot/new_listings_scraper.py b/src/gateio_new_coins_announcements_bot/new_listings_scraper.py index 9b9dbc8..ce4f20e 100644 --- a/src/gateio_new_coins_announcements_bot/new_listings_scraper.py +++ b/src/gateio_new_coins_announcements_bot/new_listings_scraper.py @@ -14,8 +14,7 @@ from gateio_new_coins_announcements_bot.load_config import load_config from gateio_new_coins_announcements_bot.logger import logger from gateio_new_coins_announcements_bot.store_order import load_order -from gateio_new_coins_announcements_bot.rotating_proxy import is_ready as rotating_proxy_is_ready -from gateio_new_coins_announcements_bot.rotating_proxy import get_proxy + config = load_config("config.yml") client = load_gateio_creds("auth/auth.yml") diff --git a/src/gateio_new_coins_announcements_bot/rotating_proxy.py b/src/gateio_new_coins_announcements_bot/rotating_proxy.py index 402832e..58bde0f 100644 --- a/src/gateio_new_coins_announcements_bot/rotating_proxy.py +++ b/src/gateio_new_coins_announcements_bot/rotating_proxy.py @@ -1,10 +1,11 @@ -from typing import Callable -import requests -import threading -import time import itertools import socket import struct +import threading +import time +from typing import Callable + +import requests import gateio_new_coins_announcements_bot.globals as globals from gateio_new_coins_announcements_bot.logger import logger @@ -23,7 +24,7 @@ def init_proxy(): def _fetch_proxies(): logger.info("Fetching proxies...") - global _proxy_list + _proxy_list global _proxy threads: list[threading.Thread] = [] try: @@ -31,13 +32,14 @@ def _fetch_proxies(): "https://www.proxyscan.io/api/proxy?last_check=180&limit=20&type=socks5&format=txt&ping=1000" ).text except requests.exceptions.RequestException as e: - logger.error(e) + logger.error(f"Can't fetch proxies. Reason: {e}") + return # Merging old proxies with new ones - _list = list(proxy_res[:-1].split("\n") | _proxy_list.keys()) + _merged_proxies = list(proxy_res[:-1].split("\n") | _proxy_list.keys()) - if len(_list) > 0: - for p in _list: + if len(_merged_proxies) > 0: + for p in _merged_proxies: t = threading.Thread(target=checker, args=[p]) t.start() threads.append(t) @@ -50,7 +52,10 @@ def _fetch_proxies(): def get_proxy() -> str: - return next(_proxy) + try: + return next(_proxy) + except StopIteration as exc: + raise Exception("No proxies available") from exc def is_ready() -> bool: @@ -70,8 +75,9 @@ def _every(delay: int, task: Callable): if not globals.stop_threads: try: task() - except Exception: + except Exception as e: logger.error("Problem while fetching proxies") + logger.debug(e) # skip tasks if we are behind schedule: next_time += (time.time() - next_time) // delay * delay + delay logger.info("Proxies fetching thread has stopped.") @@ -82,24 +88,21 @@ def checker(proxy: str): ip, port = proxy.split(":") sen = struct.pack("BBB", 0x05, 0x01, 0x00) - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.settimeout(5) - try: - s.connect((ip, int(port))) - s.sendall(sen) - - data = s.recv(2) - version, auth = struct.unpack("BB", data) - - if version == 5 and auth == 0: - _proxy_list[proxy] = proxy - else: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(5) + try: + s.connect((ip, int(port))) + s.sendall(sen) + + data = s.recv(2) + version, auth = struct.unpack("BB", data) + # Check if the proxy is socks5 and it doesn't require authentication + if version == 5 and auth == 0: + _proxy_list[proxy] = proxy + else: + _proxy_list.pop(proxy, None) + + except Exception as e: + logger.info(f"Proxy {proxy} invalid. Reason: {e}") _proxy_list.pop(proxy, None) - s.close() - return - - except Exception as e: - logger.info(f"Proxy {proxy} invalid. Reason: {e}") - _proxy_list.pop(proxy, None) - s.close() - return + return