forked from vsmolyakov/ml_algo_in_depth
-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot_smote_regular.py
49 lines (40 loc) · 1.56 KB
/
plot_smote_regular.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE
def plot_resampling(ax, X, y, title):
c0 = ax.scatter(X[y == 0, 0], X[y == 0, 1], label="Class #0", marker="o", alpha=0.5)
c1 = ax.scatter(X[y == 1, 0], X[y == 1, 1], label="Class #1", marker="s", alpha=0.5)
ax.set_title(title)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
ax.spines['left'].set_position(('outward', 10))
ax.spines['bottom'].set_position(('outward', 10))
ax.set_xlim([-6, 8])
ax.set_ylim([-6, 6])
return c0, c1
def main():
# generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.3, 0.7],
n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=1,
n_samples=80, random_state=10)
# fit PCA for visualization
pca = PCA(n_components=2)
X_vis = pca.fit_transform(X)
# apply regular SMOTE
method = SMOTE()
X_res, y_res = method.fit_resample(X, y)
X_res_vis = pca.transform(X_res)
# generate plots
f, (ax1, ax2) = plt.subplots(1, 2)
c0, c1 = plot_resampling(ax1, X_vis, y, 'Original')
plot_resampling(ax2, X_res_vis, y_res, 'SMOTE')
ax1.legend((c0, c1), ('Class #0', 'Class #1'))
plt.tight_layout()
plt.show()
if __name__ == "__main__":
main()