-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean.py
46 lines (31 loc) · 1.58 KB
/
clean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from dateutil import parser
from Column import Column
def clean(df, date_column_name, amount_column_name, transaction_name_column):
df.drop(columns=['index',], inplace = True)
__set_amount_column_name(df, amount_column_name)
__set_date_column_name(df, date_column_name)
__set_transaction_name_column_name(df, transaction_name_column)
__format_date(df)
__format_amount(df)
def __set_amount_column_name(df, amount_column_name):
df.rename(columns = {amount_column_name: Column.AMOUNT.value}, inplace = True)
def __set_date_column_name(df, date_column_name):
df.rename(columns = {date_column_name: Column.DATE.value}, inplace = True)
def __set_transaction_name_column_name(df, transaction_name_column):
df.rename(columns = {transaction_name_column: Column.NAME.value}, inplace = True)
def __format_date(df):
format = "%Y-%m-%d"
new_date_column = []
for index, row in df.iterrows():
original_date = parser.parse(row[Column.DATE.value])
new_date_column.append(original_date.strftime(format))
df.drop(columns=[Column.DATE.value], inplace = True)
assert len(new_date_column) == len(df)
df.insert(0, Column.DATE.value, new_date_column)
def __format_amount(df):
new_amount_column = []
for index, row in df.iterrows():
new_amount_column.append(float(row[Column.AMOUNT.value].replace(",", "").replace(" ", "").replace(".",""))*float(0.01))
df.drop(columns=[Column.AMOUNT.value], inplace = True)
assert len(new_amount_column) == len(df)
df.insert(0, Column.AMOUNT.value, new_amount_column)