-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathproxy_tool.py
More file actions
133 lines (117 loc) · 5.2 KB
/
proxy_tool.py
File metadata and controls
133 lines (117 loc) · 5.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python3
"""
Integrated Proxy Tool - Combines scraping and checking functionality
Scrapes proxies from websites and then checks them for working status
"""
import argparse
import sys
import os
from proxy_scraper import ProxyScraper
from proxy_checker import ProxyChecker
def main():
parser = argparse.ArgumentParser(description='Integrated Proxy Scraper and Checker')
parser.add_argument('--mode', choices=['scrape', 'check', 'both'], default='both',
help='Mode: scrape only, check only, or both')
parser.add_argument('--urls', nargs='+', help='URLs to scrape proxies from')
parser.add_argument('--url-file', help='File containing URLs to scrape')
parser.add_argument('--proxy-file', help='File containing proxies to check')
parser.add_argument('--max-workers', type=int, default=50, help='Number of worker threads')
parser.add_argument('--timeout', type=int, default=10, help='Timeout for proxy testing')
parser.add_argument('--max-pages', type=int, default=5, help='Max pages to scrape per site')
parser.add_argument('--delay', type=float, default=1, help='Delay between scraping requests')
args = parser.parse_args()
print("Integrated Proxy Tool v1.0")
print("=" * 40)
# If no mode specified and running interactively, ask user
if len(sys.argv) == 1: # No arguments provided
print("\nSelect mode:")
print("1. Scrape only")
print("2. Check only")
print("3. Both (scrape then check)")
while True:
choice = input("\nEnter choice (1-3): ").strip()
if choice == '1':
args.mode = 'scrape'
break
elif choice == '2':
args.mode = 'check'
break
elif choice == '3':
args.mode = 'both'
break
else:
print("Invalid choice. Please enter 1, 2, or 3.")
scraped_proxies_file = None
# Scraping phase
if args.mode in ['scrape', 'both']:
print("\n[SCRAPING PHASE]")
scraper = ProxyScraper(max_workers=args.max_workers, delay=args.delay)
urls = []
if args.urls:
urls = args.urls
elif args.url_file:
try:
with open(args.url_file, 'r') as f:
urls = [line.strip() for line in f if line.strip() and not line.startswith('#')]
except FileNotFoundError:
print(f"URL file {args.url_file} not found!")
return
else:
# Check for default proxy_sources.txt file
if os.path.exists("proxy_sources.txt"):
print("No URLs specified, using default proxy_sources.txt")
try:
with open("proxy_sources.txt", 'r') as f:
urls = [line.strip() for line in f if line.strip() and not line.startswith('#')]
print(f"Loaded {len(urls)} URLs from proxy_sources.txt")
except Exception as e:
print(f"Error reading proxy_sources.txt: {e}")
return
else:
# Interactive mode for URL input
print("\nNo proxy_sources.txt found. Enter URLs to scrape (one per line, empty line to finish):")
while True:
url = input("URL: ").strip()
if not url:
break
if not url.startswith(('http://', 'https://')):
url = 'http://' + url
urls.append(url)
if not urls:
print("No URLs provided for scraping!")
if args.mode == 'scrape':
return
else:
scraper.scrape_urls(urls, max_pages_per_site=args.max_pages)
scraper.display_results()
summary = scraper.save_results()
# Save scraped proxies for checking
if scraper.all_proxies:
scraped_proxies_file = "temp_scraped_proxies.txt"
with open(scraped_proxies_file, 'w') as f:
f.write('\n'.join(sorted(scraper.all_proxies)))
print(f"\nScraped proxies saved to: {scraped_proxies_file}")
# Checking phase
if args.mode in ['check', 'both']:
print("\n[CHECKING PHASE]")
checker = ProxyChecker(timeout=args.timeout, max_workers=args.max_workers)
proxy_file = None
if args.proxy_file:
proxy_file = args.proxy_file
elif scraped_proxies_file and os.path.exists(scraped_proxies_file):
proxy_file = scraped_proxies_file
elif os.path.exists("proxy_list.txt"):
proxy_file = "proxy_list.txt"
else:
print("No proxy file specified and no default found!")
return
checker.run_check(proxy_file)
# Clean up temporary file
if scraped_proxies_file and os.path.exists(scraped_proxies_file):
try:
os.remove(scraped_proxies_file)
print(f"\nCleaned up temporary file: {scraped_proxies_file}")
except:
pass
if __name__ == "__main__":
main()