-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_data.py
46 lines (40 loc) · 1.32 KB
/
load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.utils import class_weight
def load_img(path):
data = Image.open(path).convert("L")
data = np.array(data.resize((520,520)))
return data
def load_to_dataframe(path_y, y_label = 1, split = 1, shuffle = 1, weights = 0):
df = pd.read_csv(path_y)
if y_label == 1:
df['class_number'] = df['class_number'].astype(str)
if shuffle == 1:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
if weights == 1:
class_weights = class_weight.compute_class_weight('balanced', np.unique(df['class_number']), df['class_number'])
class_weights = {k:v for (k,v) in zip([0,1,2,3], class_weights)}
if split == 0:
if weights == 1:
return df, class_weights
else:
return df
# msk = np.random.rand(len(df)) < 0.8
# df_train = df[msk]
# df_val = df[~msk]
split_idx = int(df.shape[0] * 0.05)
df_train = df.iloc[split_idx:,:]
df_val = df.iloc[:split_idx,:]
if weights == 1:
return df_train, df_val, class_weights
else:
return df_train, df_val
if __name__ == "__main__":
df_train, df_val = load_to_dataframe("./mldata/train_y.csv")
print(df_train, df_val)
from data_exploration import plot_classes
plot_classes(df_train, 'class_number')
plot_classes(df_val, 'class_number')
df = load_to_dataframe("./mldata/train_y.csv", split = 0)
# print(df)