forked from TheAlgorithms/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathk_nearest_neighbours.py
58 lines (47 loc) · 1.86 KB
/
k_nearest_neighbours.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from collections import Counter
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
data = datasets.load_iris()
X = np.array(data["data"])
y = np.array(data["target"])
classes = data["target_names"]
X_train, X_test, y_train, y_test = train_test_split(X, y)
def euclidean_distance(a, b):
"""
Gives the euclidean distance between two points
>>> euclidean_distance([0, 0], [3, 4])
5.0
>>> euclidean_distance([1, 2, 3], [1, 8, 11])
10.0
"""
return np.linalg.norm(np.array(a) - np.array(b))
def classifier(train_data, train_target, classes, point, k=5):
"""
Classifies the point using the KNN algorithm
k closest points are found (ranked in ascending order of euclidean distance)
Params:
:train_data: Set of points that are classified into two or more classes
:train_target: List of classes in the order of train_data points
:classes: Labels of the classes
:point: The data point that needs to be classifed
>>> X_train = [[0, 0], [1, 0], [0, 1], [0.5, 0.5], [3, 3], [2, 3], [3, 2]]
>>> y_train = [0, 0, 0, 0, 1, 1, 1]
>>> classes = ['A','B']; point = [1.2,1.2]
>>> classifier(X_train, y_train, classes,point)
'A'
"""
data = zip(train_data, train_target)
# List of distances of all points from the point to be classified
distances = []
for data_point in data:
distance = euclidean_distance(data_point[0], point)
distances.append((distance, data_point[1]))
# Choosing 'k' points with the least distances.
votes = [i[1] for i in sorted(distances)[:k]]
# Most commonly occurring class among them
# is the class into which the point is classified
result = Counter(votes).most_common(1)[0][0]
return classes[result]
if __name__ == "__main__":
print(classifier(X_train, y_train, classes, [4.4, 3.1, 1.3, 1.4]))