Skip to content

Commit 978ed51

Browse files
author
东凡
committed
modify elbow.py for better n_clusters
1 parent 1bbf6bc commit 978ed51

File tree

1 file changed

+13
-3
lines changed

1 file changed

+13
-3
lines changed

elbow.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66

77
def get_distance(data):
8+
"""
9+
把数据聚成多个类别,输出各个类别下的组内方差和,画出"肘型图"
10+
"""
811
distance = []
912
k = []
1013
for n_clusters in range(1, 10):
@@ -27,17 +30,24 @@ def get_distance(data):
2730

2831

2932
def get_elbow(distance):
33+
"""
34+
输出最佳聚类个数
35+
"""
3036
a_1 = pd.Series(distance[1:])
3137
a_2 = pd.Series(distance[:-1])
3238

3339
tmp = a_2 / a_1
34-
tmp[tmp < 1] = np.inf
35-
return tmp.values.argmax() + 2
40+
41+
tmp_1 = pd.Series(tmp[1:].values)
42+
tmp_2 = pd.Series(tmp[:-1].values)
43+
44+
tmp2 = tmp_2 / tmp_1
45+
return tmp2.values.argmax() + 2
3646

3747

3848
if __name__ == '__main__':
3949
a = pd.DataFrame(np.array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4]) * 10 + np.random.rand(15))
40-
distance, k = get_distance(grey)
50+
distance, k = get_distance(a)
4151
best_n_cluster = get_elbow(distance)
4252
best_n_cluster
4353

0 commit comments

Comments
 (0)