11#!/usr/bin/env python
22import argparse
3- import signal
43import sys
54import re
65import csv
1514
1615logger = logging .getLogger (__name__ )
1716
18-
19- class TookTooDamnLongException (Exception ): # Custom exception class
20- pass
21-
22-
23- def toodamnlong_handler (signum , frame ): # Custom signal handler
24- raise TookTooDamnLongException
25-
26-
27- # Change the behavior of SIGALRM
28- signal .signal (signal .SIGALRM , toodamnlong_handler )
29-
3017# current time, used in the names of the folder and the logging file
3118curtime = time .strftime ("%Y-%m-%d-%H-%M-%S" , time .gmtime ())
3219# Create a new log file
@@ -87,9 +74,7 @@ def main():
8774 logger .info ("CSV headers written" )
8875
8976 for idx , txt_file in enumerate (all_txt_files ):
90- # Start the timer.
91- # Once [patience] seconds are over, a SIGALRM signal is sent.
92- signal .alarm (patience )
77+
9378 with open (txt_file ) as fp :
9479 visible_text_list = fp .readlines ()
9580 current_url = visible_text_list [0 ].strip ().rstrip ()
@@ -103,22 +88,11 @@ def main():
10388 # This try/except loop ensures that
10489 # you'll catch TookTooDamnLongException when it's sent.
10590 # https://stackoverflow.com/questions/25027122/break-the-function-after-certain-time
106- try :
107- # counts keywords in page
108- found_count , found_keywords = count_keywords (
109- visible_text_list ,
110- keywords
111- )
112- except TookTooDamnLongException :
113- # TODO: Keep a record of pages that took forever to search
114- tqdm .write ("[{0:0{width}d}] Aarrrgh! "
115- "TOOK TOO DAMN LONG TO SEARCH! {1}" .
116- format (idx + 1 , current_url , width = num_digits ))
117- logger .warn ("TTDL >>> {} <<<" .format (current_url ))
118- pbar .update (1 )
119- # continue the for loop if count_keywords takes more
120- # than [patience] seconds
121- continue
91+ # counts keywords in page
92+ found_count , found_keywords = count_keywords (
93+ visible_text_list ,
94+ keywords
95+ )
12296
12397 logger .info ("Keywords found: {}" .format (found_count ))
12498 found_keywords_as_dict = dict ((x , y ) for x , y in found_keywords )
0 commit comments