forked from itsOwen/CyberScraper-2077
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
387 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
class TorException(Exception): | ||
"""Base exception for Tor-related errors""" | ||
pass | ||
|
||
class TorConnectionError(TorException): | ||
"""Raised when there's an error connecting to the Tor network""" | ||
pass | ||
|
||
class TorInitializationError(TorException): | ||
"""Raised when Tor service fails to initialize""" | ||
pass | ||
|
||
class TorCircuitError(TorException): | ||
"""Raised when there's an error creating or managing Tor circuits""" | ||
pass | ||
|
||
class OnionServiceError(TorException): | ||
"""Raised when there's an error accessing an onion service""" | ||
pass | ||
|
||
class TorProxyError(TorException): | ||
"""Raised when there's an error with the Tor SOCKS proxy""" | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from dataclasses import dataclass | ||
from typing import List | ||
|
||
@dataclass | ||
class TorConfig: | ||
"""Configuration for Tor connection and scraping""" | ||
socks_port: int = 9050 | ||
control_port: int = 9051 | ||
debug: bool = False | ||
max_retries: int = 3 | ||
timeout: int = 30 | ||
circuit_timeout: int = 10 | ||
auto_renew_circuit: bool = True | ||
verify_connection: bool = True | ||
user_agents: List[str] = None | ||
|
||
def __post_init__(self): | ||
if self.user_agents is None: | ||
self.user_agents = [ | ||
'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0', | ||
'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0', | ||
'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0', | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import requests | ||
import random | ||
import logging | ||
import socket | ||
import socks | ||
from typing import Dict, Optional | ||
from urllib.parse import urlparse | ||
from .tor_config import TorConfig | ||
from .exceptions import ( | ||
TorConnectionError, | ||
TorInitializationError, | ||
OnionServiceError, | ||
TorProxyError | ||
) | ||
|
||
class TorManager: | ||
"""Manages Tor connection and session handling""" | ||
|
||
def __init__(self, config: TorConfig = TorConfig()): | ||
self.logger = logging.getLogger(__name__) | ||
self.logger.setLevel(logging.DEBUG if config.debug else logging.INFO) | ||
self.config = config | ||
self._setup_logging() | ||
self._setup_proxy() | ||
|
||
def _setup_logging(self): | ||
handler = logging.StreamHandler() | ||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') | ||
handler.setFormatter(formatter) | ||
self.logger.addHandler(handler) | ||
|
||
def _setup_proxy(self): | ||
"""Configure SOCKS proxy for Tor""" | ||
try: | ||
socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", self.config.socks_port) | ||
socket.socket = socks.socksocket | ||
self.proxies = { | ||
'http': f'socks5h://127.0.0.1:{self.config.socks_port}', | ||
'https': f'socks5h://127.0.0.1:{self.config.socks_port}' | ||
} | ||
except Exception as e: | ||
raise TorProxyError(f"Failed to setup Tor proxy: {str(e)}") | ||
|
||
def get_headers(self) -> Dict[str, str]: | ||
"""Get randomized Tor Browser-like headers""" | ||
return { | ||
'User-Agent': random.choice(self.config.user_agents), | ||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | ||
'Accept-Language': 'en-US,en;q=0.5', | ||
'Accept-Encoding': 'gzip, deflate', | ||
'Connection': 'keep-alive', | ||
'Upgrade-Insecure-Requests': '1', | ||
'DNT': '1', | ||
'Sec-Fetch-Dest': 'document', | ||
'Sec-Fetch-Mode': 'navigate', | ||
'Sec-Fetch-Site': 'none', | ||
'Sec-Fetch-User': '?1' | ||
} | ||
|
||
async def verify_tor_connection(self) -> bool: | ||
"""Verify Tor connection is working""" | ||
try: | ||
session = self.get_tor_session() | ||
response = session.get('https://check.torproject.org/api/ip', | ||
timeout=self.config.timeout) | ||
is_tor = response.json().get('IsTor', False) | ||
|
||
if is_tor: | ||
self.logger.info("Successfully connected to Tor network") | ||
return True | ||
else: | ||
raise TorConnectionError("Connection is not using Tor network") | ||
|
||
except Exception as e: | ||
raise TorConnectionError(f"Failed to verify Tor connection: {str(e)}") | ||
|
||
def get_tor_session(self) -> requests.Session: | ||
"""Create a requests session that routes through Tor""" | ||
session = requests.Session() | ||
session.proxies = self.proxies | ||
session.headers = self.get_headers() | ||
return session | ||
|
||
@staticmethod | ||
def is_onion_url(url: str) -> bool: | ||
"""Check if the given URL is an onion service""" | ||
try: | ||
parsed = urlparse(url) | ||
return parsed.hostname.endswith('.onion') if parsed.hostname else False | ||
except Exception: | ||
return False | ||
|
||
async def fetch_content(self, url: str) -> str: | ||
"""Fetch content from an onion site""" | ||
if not self.is_onion_url(url): | ||
raise OnionServiceError("URL is not a valid onion service") | ||
|
||
try: | ||
session = self.get_tor_session() | ||
|
||
if self.config.verify_connection: | ||
await self.verify_tor_connection() | ||
|
||
response = session.get(url, timeout=self.config.timeout) | ||
response.raise_for_status() | ||
|
||
self.logger.info(f"Successfully fetched content from {url}") | ||
return response.text | ||
|
||
except requests.RequestException as e: | ||
raise OnionServiceError(f"Failed to fetch onion content: {str(e)}") | ||
except Exception as e: | ||
raise TorException(f"Unexpected error fetching onion content: {str(e)}") |
Oops, something went wrong.