haohaom1 · haohaom1 · Nov 13, 2018 · Nov 13, 2018
diff --git a/classifier.py b/classifier.py
@@ -251,7 +251,7 @@ def move_emails(mail, df):
     '''
 
     # converts str to boolean
-    df['moved'].apply(lambda x: x == 'True')
+    df['moved_bool'] = df['moved'].apply(lambda x: x == 'True')
 
     for _, row in df.iterrows():
         folder = row['folder']
@@ -269,4 +269,6 @@ def move_emails(mail, df):
         # byte_id = str.encode('1779')
         # print(byte_id, type(byte_id))
 
+        ### COMMENT OUT the line below to prevent emails from being moved
         reader.move_email_to_folder(mail=mail, orig_folder=folder, target_folder=target_folder, email_uid=email_uid)
+        print('moved emails')
diff --git a/emailreader.py b/emailreader.py
@@ -145,6 +145,9 @@ def get_links(self, email_df):
     # Method for moving an email
     def move_email_to_folder(self, mail, orig_folder, target_folder, email_uid):
 
+        if not mail:
+            mail = self.mail
+
         if orig_folder.lower() == 'inbox':
             mail.select('"INBOX"')
         else:

diff --git a/scraper.py b/scraper.py
@@ -30,10 +30,9 @@ def __init__(self):
 
         self.num_features = 3  # determines the number of features to use
 
-    # exits the function after 60 seconds; throws keyboard interrupt error if happens.
-    @threading_timer.exit_after(60)
+    # @threading_timer.exit_after(60)
     def get_text_from_url(self, url, clean=True):
-        hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A'}
+        hdr = {'User-Agent': 'Mozilla/5.0'}
 
         try:
 
@@ -212,6 +211,7 @@ def scrape_words_from_urls(self, urls, split_up_links):
         # urls is a 1D list of urls
         # returns a list of list of words
         for url in urls:
+            print('url', url)
 
             try:
                 words = self.get_text_from_url(url, clean=False)
@@ -228,9 +228,9 @@ def scrape_words_from_urls(self, urls, split_up_links):
                     return words
                 list_of_words.append(words)
 
-            except (URLError, HTTPError, KeyboardInterrupt) as error:
+            except:# (URLError, HTTPError, KeyboardInterrupt, TimeoutError) as error:
                 warnings.warn('Unable to load {}'.format(url))
-                print(error)
+                # print(error)
                 if split_up_links:
                     return None
                 list_of_words.append('')

diff --git a/test.py b/test.py
@@ -16,12 +16,56 @@
 import os
 import warnings
 import webbrowser
+from urllib.request import Request, urlopen
+from bs4 import BeautifulSoup
+import requests
+
 ##%%
 scraper = Scraper()
 reader = Emailreader()
 username = 'prospectstudent@colby.edu'
 password = 'Student.2017'
 mail = reader.login_email(username, password)
+#%%
+df = pd.read_csv(r'logs\2018-11-08 13.09.54_logs.csv')
+df.head(15)
+#%%
+# print(df.columns)
+classifier.move_emails(mail, df)
+# reader.move_email_to_folder(mail, 'Priority Mail', 'Completed', b'1896')
+#%%
+
+
+url = 'http://www.ifre.com/bobs-big-bet-barclays-purchase-of-lehman-brothers-10-years-later/21356030.fullarticle'
+
+hdr = {'User-Agent': 'Mozilla/5.0'}
+
+try:
+    page = requests.get(url).text
+    soup = BeautifulSoup(page, 'lxml')  # creates a BS4 object
+except:
+    print('asfd')
+
+# try:
+#     r = requests.get(url, timeout=10.0)
+# except requests.Timeout as err:
+#     print(err)
+# except requests.RequestException as err:
+#     print(err)
+
+#%%
+import requests
+MAX_RETRIES = 20
+
+session = requests.Session()
+adapter = requests.adapters.HTTPAdapter(max_retries=MAX_RETRIES)
+session.mount('https://', adapter)
+session.mount('http://', adapter)
+
+r = session.get(url)
+soup = BeautifulSoup(r, 'lxml')
+
+
 #%%
 a = 'b\'123'
 str.encode(a)

diff --git a/tkinter-skeleton.py b/tkinter-skeleton.py
@@ -11,20 +11,18 @@
 '''
 TO DO LIST
 
-- - Record whether the user chose to automatically move all emails in raiser CSV function
-
-- Fix Pathing Error in Windows (control f 'logs/')
-- Debug functionalities on Windows machines, such as key binds, etc
-- Add a waiting animation when Classifying Emails
-- fix bug of why i cant just classify 1 mail
+- Record whether the user chose to automatically move all emails in raiser CSV function: have two booleans
+move and move email: Set the column to move and only move emails if both are true
+- Add arrow keys as shortcut in table
+- When moving emails, concatenate emails based on their id and process each situation accordingly
 
 IDEAS FOR FUTURE
 
 - Fix Anti-scrape policy
     - idea 1: create a separate bot that identifies whether an article is
     - idea 2: rotate IP addresses, delay scrape time
 - optimize retrieving data (maybe line by line) so a crash wont lose all the data
-
+- Add a waiting animation when Classifying Emails
 
 
 TO SWITCH FROM WINDOWS AND MAC
@@ -316,6 +314,7 @@ def setBindings(self):
         # binds control-e to switch the label of an element in the bottom table
         self.bottomFrame.bind('<Control-e>', self.switchLabel)
         self.bottomFrame.bind('<Control-w>', self.switchMovedState)
+        self.bottomFrame.bind('<Control-a>', self.moveAll)
 
 
     def handleQuit(self, event=None):
@@ -393,11 +392,16 @@ def process_text(string, length=50, total_string_size=100):
             self.tree.insert('', i, text=i, values=tuple(row), tag=shaded)
 
         self.tree.tag_configure('even_row', background='lightgrey')
-        self.tree.pack(fill=tk.X, padx=5, pady=5)
+        self.tree.pack(fill=tk.X, side='left', expand=1)
 
         # binds Button1 to the tree
         self.tree.bind('<<TreeviewSelect>>', self.onselect)
 
+        # adds scrollbar to the tree
+        vsb = ttk.Scrollbar(self.bottomFrame, orient="vertical", command=self.tree.yview)
+        vsb.pack(side='right', fill=tk.Y)
+        self.tree.configure(yscrollcommand=vsb.set)
+
     def buildScoresTable(self, curItem):
         '''
         passes in the current selected treeview row to display the score table
@@ -653,7 +657,7 @@ def handleDisplayCSV(self):
             messagebox.showwarning('Warning', 'No data available to export to Raiser\'s Edge')
             return
 
-        d = RaiserDialog(self.root, df=merged_df, merged_df=True, title='Raiser Edge CSV')
+        d = RaiserDialog(self.root, df=merged_df, merged_df=True, main_df=self.df, title='Raiser Edge CSV')
 
     def handleConfidence(self):
         if self.df is None:
@@ -783,7 +787,11 @@ def switchMovedState(self, event=None):
 
         self.df.ix[row_num, 'moved'] = new_label  # updates the new value in the locally stored dataframe
 
-        pass
+    # switches all the states from true to false
+    def moveAll(self, event=None):
+        self.df['moved'] = 'True'
+        self.buildBottomTable()
+
 
     def exportEmails(self, event=None):
         '''
@@ -842,11 +850,12 @@ def cancel(self, event=None):
 # dialog box for displaying CSVs
 class RaiserDialog(simpledialog.Dialog):
 
-    def __init__(self, parent, csv_path=None, df=None, merged_df=False, title='Title'):
+    def __init__(self, parent, csv_path=None, df=None, main_df=None, merged_df=False, title='Title'):
         '''
 
         :param parent: parent widget
         :param csv_path: the csv path of the raiser_df, optional
+        :param main_df: the entire dataframe, containing both received and completed data
         :param df: the actual raiser df object
         :param merged_df: whether or not the passed raiser_df is the merged version, or has already been processed already
         :param title: title of this widget
@@ -857,55 +866,69 @@ def __init__(self, parent, csv_path=None, df=None, merged_df=False, title='Title
                                  value=preset_path,
                                  name='pathVar')    # path of raiser file to be exported
 
+        self.main_df = main_df      # df containing both received and completed data
+
         if df is None:
             self.df = pd.read_csv(csv_path)
         else:
             self.df = df
 
         if merged_df:
             self.merged_df = df
+
             raiser_headers = ['constituent_id', 'date', 'move/status change (or n/a)', 'type',
                               'author', 'description', 'text']
             self.df = self.df.loc[:, raiser_headers].reset_index(drop=True)
 
-
         simpledialog.Dialog.__init__(self, parent, title)
 
     def body(self, master):
         ## Builds the Table to display the CSV
 
         # builds two different frames
-        top_frame = tk.Frame(master=master)
+        top_frame = tk.Frame(master=master)     # used to contain the table
         top_frame.pack(side=tk.TOP, fill=tk.X)
-        bottom_frame = tk.Frame(master=master)
+        bottom_frame = tk.Frame(master=master)  # used to contain the path
         bottom_frame.pack(side=tk.TOP)
         error_frame = tk.Frame(master=master)
         error_frame.pack(side=tk.BOTTOM)
 
-        values = self.df.values
 
-        # builds column and row names
-        for c, col_name in enumerate(self.df.columns, start=1):
-            tk.Label(top_frame, text=col_name).grid(row=0, column=c)
+        # preprocesses the dataframe for displaying
+        df = self.df.rename(index=int, columns={'constituent_id': 'id'})  # make the columns shorter
+        df['text'] = self.df['text'].apply(lambda x: x[:min(len(x), 150)])  # limits the characters in the text column
 
-        for r, index_name in enumerate(self.df.index, start=1):
-            tk.Label(top_frame, text=index_name).grid(row=r, column=0)
+        # # builds column and row names
+        tree = ttk.Treeview(top_frame)
+        num_col = len(df.columns)
+        tree['columns'] = tuple(range(num_col))
+        for i, col in enumerate(self.df.columns):
+            # give extra width to text
+            width = 300 if col == 'text' else 110
+            stretch = col == 'text'
 
-        # adds the values
+            tree.column(i, width=width, minwidth=0, stretch=stretch)
+            tree.heading(i, text=col)
 
-        for i, val_row in enumerate(values, start=1):
-            for j, val in enumerate(val_row, start=1):
+        # builds the contents of the table
+        for i, row in df.iterrows():
+            shaded = 'even_row' if i % 2 == 0 else 'odd_row'
+            tree.insert('', i, text=i, values=tuple(row), tag=shaded)
+
+        tree.tag_configure('even_row', background='lightgrey')
+        tree.pack(fill=tk.X, side='left', expand=1)
 
-                # reformat the string if it is the text column
-                if isinstance(val, str):
-                    val = val[:40] + '...'
-                tk.Label(top_frame, text=val).grid(row=i, column=j)
+        # adds scrollbar to the tree
+        vsb = ttk.Scrollbar(top_frame, orient="vertical", command=tree.yview)
+        vsb.pack(side='right', fill=tk.Y)
+        tree.configure(yscrollcommand=vsb.set)
 
+        # makes the index column smaller
+        tree.column("#0", width=50)
 
         label = tk.Label(bottom_frame, text='Path: ')
         label.pack(side=tk.LEFT)
 
-
         e = tk.Entry(bottom_frame, textvariable=self.path, exportselection=0, width=35)
         e.pack(side=tk.LEFT)
 
@@ -954,9 +977,9 @@ def apply(self):
         print('downloaded at ', self.path.get())
 
         # moves the emails as well
-        classifier.move_emails(mail=mail, df=self.merged_df)
-        print('moved emails')
-
+        classifier.move_emails(mail=mail, df=self.main_df)
+        # print(self.main_df)
+        print('finished moving emails')
 
 class ClassifyDialog(simpledialog.Dialog):