-
Notifications
You must be signed in to change notification settings - Fork 0
/
Utility.py
27 lines (22 loc) · 1.03 KB
/
Utility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from sklearn.model_selection import train_test_split
def get_train_test_X_y(df_train, df_test, n_features=100):
X_train = df_train[[f"F{i}" for i in range(n_features)]].to_numpy()
y_train = df_train["target"].to_numpy()
X_test = df_test[[f"F{i}" for i in range(n_features)]].to_numpy()
y_test = df_test["target"].to_numpy()
return X_train, X_test, y_train, y_test
def _train_test_splitting(df, train_size=0.7):
try:
df_train, df_test = train_test_split(df, train_size=train_size, stratify=df[["group", "target"]])
except ValueError as e:
print(e)
try:
df_train, df_test = train_test_split(df, train_size=train_size, stratify=df["group"])
except ValueError as e:
print(e)
try:
df_train, df_test = train_test_split(df, train_size=train_size, stratify=df["target"])
except ValueError as e:
print(e)
df_train, df_test = train_test_split(df, train_size=train_size)
return df_train, df_test