Skip to content

Commit 4fe6f0a

Browse files
committed
Exclude .mp3 files
1 parent 52b4ae3 commit 4fe6f0a

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

Search-Engine-and-Crawler/Crawler/crawlerExpand.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ def process_links_from_soup (soup, cur_link, grab_all=False):
378378
# if the link is not in crawledURLsArray then it appends it to urls and crawledURLsArray
379379
if new_link not in crawledURLsArray:
380380
# Ensures no jpg or pdfs are stored and that no mailto: links are stored.
381-
if new_link.startswith("http") and '.pdf' not in new_link and '.jpg' not in new_link:
381+
if new_link.startswith("http") and '.pdf' not in new_link and '.jpg' not in new_link and '.mp3' not in new_link:
382382
#???TODO: add checks for www.domain.com and https://
383383
# Adds new link to array
384384
plannedURLsArray.append(new_link)
@@ -426,7 +426,7 @@ def process_links_from_html (html, cur_link, grab_all=False):
426426
# if the link is not in crawledURLsArray then it appends it to urls and crawledURLsArray
427427
if new_link not in crawledURLsArray:
428428
# Ensures no jpg or pdfs are stored and that no mailto: links are stored.
429-
if new_link.startswith("http") and '.pdf' not in new_link and '.jpg' not in new_link:
429+
if new_link.startswith("http") and '.pdf' not in new_link and '.jpg' not in new_link and '.mp3' not in new_link:
430430
#???TODO: add checks for www.domain.com and https://
431431
# Adds new link to array
432432
plannedURLsArray.append(new_link)

0 commit comments

Comments
 (0)