1111scraper = Scraper ()
1212reader = Emailreader ()
1313
14- constituent_df = pd .read_csv (pathlib .PureWindowsPath (pathlib .Path ('datasets/OrganizationRelationships_NickNamesAdded_5.24.2018.csv' ))
15- )
16-
14+ # constituent_df = pd.read_csv(pathlib.PureWindowsPath(pathlib.Path('datasets/OrganizationRelationships_NickNamesAdded_5.24.2018.csv'))
15+ # )
16+ constituent_df = 'datasets/OrganizationRelationships_NickNamesAdded_5.24.2018.csv'
1717
1818
1919def score (df , clf , return_proba = False , remove_nan = True ):
@@ -119,8 +119,6 @@ def classify_mails(mail, folder, clf=None, cap_at=None, latest_first=True, thres
119119 :return: dataframe containing UID of the emails, Scores, probability,
120120 and confidence, decision, and timestamp, constituent info, sorted by confidence,
121121 '''
122- from datetime import datetime
123- print ('starting classify' , datetime .now ())
124122
125123 if not clf :
126124 clf = joblib .load ('Classifiers/LR_7_30.pkl' )
@@ -150,10 +148,17 @@ def classify_mails(mail, folder, clf=None, cap_at=None, latest_first=True, thres
150148
151149 df = classify_mails_from_data (mail = mail , df = scores_df , folder = folder , threshold = threshold , move = move )
152150
151+ # turn constituent_id to int
152+ df ['constituent_id' ] = df ['constituent_id' ].astype (np .int64 )
153+
153154 # gets the actual words from the urls
154155 # IF TIME PERMITTED USE WORDS FROM THE START TO BE MORE EFFICIENT
155156 df ['text' ] = df ['url' ].apply (lambda x : ' ' .join (scraper .get_text_from_url (x , clean = False )))
156157
158+ # documents the mail source
159+ df ['folder' ] = folder
160+
161+
157162 # sorts df by probability
158163 # df.sort_values(['proba'], inplace=True)
159164
@@ -166,7 +171,8 @@ def classify_mails(mail, folder, clf=None, cap_at=None, latest_first=True, thres
166171 date = datetime .strftime (datetime .now (), '%Y-%m-%d %H.%M.%S' )
167172
168173 # saves to the logs
169- windows_path = pathlib .PureWindowsPath (pathlib .Path ('logs/{}_logs.csv' .format (date )))
174+ # windows_path = pathlib.PureWindowsPath(pathlib.Path('logs/{}_logs.csv'.format(date)))
175+ windows_path = 'logs/{}_logs.csv' .format (date )
170176 df .to_csv (windows_path , index = False )
171177
172178 # if to_raiser is true AND there is an available data from logs, then return the data to be
@@ -195,6 +201,8 @@ def create_csv_for_raiser(logs=None, df=None, return_merged_df=False):
195201
196202 # returns null if there are no values in the log to be moved
197203 if df .empty :
204+ if return_merged_df :
205+ return df , df
198206 return df
199207
200208 dates = datetime .strftime (datetime .now (), '%m/%d/%Y' )
@@ -232,6 +240,33 @@ def get_description(fname, lname, arg):
232240
233241 if return_merged_df :
234242
235- return df
243+ return raisers_df , df
236244
237245 return raisers_df
246+
247+
248+ def move_emails (mail , df ):
249+ '''
250+ Uses the Raiser CSV to determine which emails to move to which folder
251+ '''
252+
253+ # converts str to boolean
254+ df ['moved' ].apply (lambda x : x == 'True' )
255+
256+ for _ , row in df .iterrows ():
257+ folder = row ['folder' ]
258+ email_uid = str .encode (str (row ['id' ]))
259+
260+ if row ['label' ] == 0 and row ['moved' ]:
261+ target_folder = 'Received'
262+ elif row ['label' ] == 1 and row ['moved' ]:
263+ target_folder = 'Completed'
264+ else :
265+ target_folder = 'Further Review Needed'
266+
267+ # print('moving from', folder, 'to', target_folder)
268+ # print(row['id'], type(row['id']))
269+ # byte_id = str.encode('1779')
270+ # print(byte_id, type(byte_id))
271+
272+ reader .move_email_to_folder (mail = mail , orig_folder = folder , target_folder = target_folder , email_uid = email_uid )
0 commit comments