forked from explainX/explainx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrescale_numeric_feature.py
123 lines (83 loc) · 2.82 KB
/
rescale_numeric_feature.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from .imports import *
"""
This class calculates feature importance
Input:
"""
class get_cols():
def __init__(self):
super(get_cols, self).__init__()
self.param = None
def get_all_cols(self, df):
all_columns = df.columns
original_columns = []
for c in all_columns:
if "_impact" in c:
pass
elif c + "_impact" in all_columns:
original_columns.append(c)
return original_columns
def get_cate_numer_col(self, df):
original_columns = self.get_all_cols(df)
categorical_columns = self.cate_col(df)
numeric_columns = set(original_columns) - set(categorical_columns)
numeric_columns = list(numeric_columns)
return numeric_columns, categorical_columns
def cate_col(self, df):
df= df.convert_dtypes()
di = dict(df.dtypes)
c = df.columns
sample_df = pd.DataFrame(["a", "b", "c"])
sample_df = sample_df.convert_dtypes()
sample_df_dic= dict(sample_df.dtypes)
cate = []
for i in c:
try:
if di[i] == sample_df_dic[0]:
cate.append(i)
except:
pass
return cate
def cate_col_with_index(self, df):
df = df.convert_dtypes()
di = dict(df.dtypes)
c = df.columns
sample_df = pd.DataFrame(["a", "b", "c"])
sample_df = sample_df.convert_dtypes()
sample_df_dic = dict(sample_df.dtypes)
cate = []
index=[]
j=0
for i in c:
try:
if di[i] == sample_df_dic[0]:
cate.append(i)
index.append(j)
except:
pass
j = j + 1
return cate, index
class rescale_numeric_features():
def __init__(self):
super(rescale_numeric_features, self).__init__()
self.param = None
def get_min_max(self, df_describe, variable_name):
mini = df_describe[variable_name][3]
maxi = df_describe[variable_name][-1]
return mini, maxi
def add_col_rescaled(self, df):
df_describe = df.describe()
# let's find numeric and categorical columns
column = get_cols()
numeric_columns, categorical_columns = column.get_cate_numer_col(df)
for nc in numeric_columns:
# get min and max
mini, maxi = self.get_min_max(df_describe, nc)
df[nc + "_rescaled"] = (df[nc] - mini) / (maxi - mini) * 10
for cc in categorical_columns:
df[cc + "_rescaled"] = 0
return df
def rescale(self, df):
column = get_cols()
original_columns = column.get_all_cols(df)
df_with_rescaled_features = self.add_col_rescaled(df)
return df_with_rescaled_features