-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathmy_KMeans.py
55 lines (41 loc) · 1.69 KB
/
my_KMeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pandas as pd
import numpy as np
class my_KMeans:
def __init__(self, n_clusters=8, init = "k-means++", n_init = 10, max_iter=300, tol=1e-4):
# init = {"k-means++", "random"}
# use euclidean distance for inertia calculation.
# stop when either # iteration is greater than max_iter or the delta of self.inertia_ is smaller than tol.
# repeat n_init times and keep the best run (cluster_centers_, inertia_) with the lowest inertia_.
self.n_clusters = int(n_clusters)
self.init = init
self.n_init = n_init
self.max_iter = int(max_iter)
self.tol = tol
self.classes_ = range(n_clusters)
# Centroids
self.cluster_centers_ = None
# Sum of squared distances of samples to their closest cluster center.
self.inertia_ = None
def fit(self, X):
# X: pd.DataFrame, independent variables, float
# repeat self.n_init times and keep the best run
# (self.cluster_centers_, self.inertia_) with the lowest self.inertia_.
# write your code below
return
def predict(self, X):
# X: pd.DataFrame, independent variables, float
# return predictions: list
# write your code below
return predictions
def transform(self, X):
# Transform to cluster-distance space
# X: pd.DataFrame, independent variables, float
# return dists = list of [dist to centroid 1, dist to centroid 2, ...]
# write your code below
return dists
def fit_predict(self, X):
self.fit(X)
return self.predict(X)
def fit_transform(self, X):
self.fit(X)
return self.transform(X)