@@ -155,7 +155,7 @@ def main(argv):
155
155
scraped_domains .add (parsed .netloc )
156
156
else :
157
157
logger .debug ("[~] Domain already added to the scanning queue: "
158
- + str (parsed .netloc ))
158
+ + SubCrawlHelpers . defang_url ( str (parsed .netloc ) ))
159
159
else :
160
160
logger .info ("[ENGINE] Using file input for URL processing..." )
161
161
try :
@@ -172,7 +172,8 @@ def main(argv):
172
172
scrape_urls .add (parsed_url )
173
173
scraped_domains .add (parsed .netloc )
174
174
else :
175
- logger .debug ("[ENGINE] Domain already added to the scanning queue: " + str (parsed .netloc ))
175
+ logger .debug ("[ENGINE] Domain already added to the scanning queue: "
176
+ + str (parsed .netloc ))
176
177
except Exception as e :
177
178
logger .error ("[ENGINE] Error reading input file for URL processing: " + str (e ))
178
179
sys .exit (- 1 )
@@ -188,7 +189,7 @@ def main(argv):
188
189
for start_url in scrape_urls :
189
190
# This will add the full URL if it ends with an extension, then passes it along for parsing
190
191
if start_url .endswith ('.exe' ):
191
- logger .debug ("[ENGINGE] Adding EXE URL directly: " + start_url )
192
+ logger .debug ("[ENGINGE] Adding EXE URL directly: " + SubCrawlHelpers . defang_url ( start_url ) )
192
193
if start_url not in distinct_urls :
193
194
distinct_urls .append (start_url )
194
195
domain_urls .setdefault (parsed .netloc , []).append (start_url )
@@ -205,7 +206,7 @@ def main(argv):
205
206
for path in paths :
206
207
tmp_url = urljoin (tmp_url , path ) + "/"
207
208
208
- logger .debug ("Generated new URL: " + tmp_url )
209
+ logger .debug ("Generated new URL: " + SubCrawlHelpers . defang_url ( tmp_url ) )
209
210
210
211
if tmp_url not in distinct_urls :
211
212
distinct_urls .append (tmp_url )
@@ -264,7 +265,7 @@ def scrape_manager(data):
264
265
init_pages = domain_urls
265
266
process_processing_modules = processing_modules
266
267
267
- logger .debug ("[ENGINE] Starting down path... " + domain_urls [0 ])
268
+ logger .debug ("[ENGINE] Starting down path... " + SubCrawlHelpers . defang_url ( domain_urls [0 ]) )
268
269
269
270
result_dicts = list ()
270
271
for url in domain_urls :
@@ -285,7 +286,7 @@ def scrape(start_url, s_data):
285
286
try :
286
287
scrape_domain = dict ()
287
288
request_start = datetime .datetime .now ()
288
- logger .debug ("[ENGINE] Scanning URL: " + start_url )
289
+ logger .debug ("[ENGINE] Scanning URL: " + SubCrawlHelpers . defang_url ( start_url ) )
289
290
resp = requests .get (start_url , timeout = SubCrawlHelpers .get_config (
290
291
process_cfg , "crawler" , "http_request_timeout" ),
291
292
headers = SubCrawlHelpers .get_config (process_cfg , "crawler" ,
@@ -338,7 +339,7 @@ def scrape(start_url, s_data):
338
339
339
340
if next_page not in crawl_pages and next_page not in init_pages \
340
341
and not next_page .lower ().endswith (tuple (SubCrawlHelpers .get_config (process_cfg , "crawler" , "ext_exclude" ))):
341
- logger .debug ("[ENGINE] Discovered: " + next_page )
342
+ logger .debug ("[ENGINE] Discovered: " + SubCrawlHelpers . defang_url ( next_page ) )
342
343
crawl_pages .append (next_page )
343
344
scrape (next_page , s_data )
344
345
else :
0 commit comments