-
Notifications
You must be signed in to change notification settings - Fork 1
/
glvq_utilities.py
138 lines (103 loc) · 4.57 KB
/
glvq_utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#
# A utility class to support the Glvq class
#
#
# By: Akash Anand
########################################
import numpy as np
#
#
########################################
def distances (xData, wPrototypes, metric = "squared-euclidean"):
"""
Calculates the distances between two vectors, xData and wPrototype. Returns array of distances for
each data point with each of the prototypes
Paramters:
xData: Input data points. (n,m) matrix. n is number of data points. m is the number of features.
wPrototypes: Prototypes to which distance has to be calculated. (k,m) matrix. k is number of
prototypes. m is the number of features (similar to x).
metric: Distance calculation metric. {"squared-euclidean", "euclidean", "manhattan"}
Returns:
Distance matrix. (n,k) matrix. Distance between each n data points to k prototypes.
Raises:
"""
# Checking if dimensions match
if (xData.shape[1] != wPrototypes.shape[1]):
# Invalid dimensions exception
raise ValueError("Invalid inputs. Shapes for the passed arguments do not match.")
# Checking for metric
if (metric == "squared-euclidean"):
# Caclulate Euclidean distance
# Using Numpy
expanded_data = np.expand_dims (xData, axis=1)
distances = np.sum (np.power (expanded_data - wPrototypes, 2), axis=2)
return distances.astype(float)
else:
# Code for other distances here
return False
#
#
########################################
def squared_euclidean (xData, wPrototypes):
"""
Calculate the squared euclidean distance between xData and wPrototypes
= (xData - wPrototypes) ^ 2
Parameters:
xData: Input data points. (n,m) matrix. n is number of data points. m is the number of features.
wPrototypes: Prototypes to which distance has to be calculated. (k,m) matrix. k is number of
prototypes. m is the number of features (similar to x).
Returns:
Distance matrix. (n,k) matrix. Distance between each n data points to k prototypes.
"""
# Checking if dimensions match
if (xData.shape[1] != wPrototypes.shape[1]):
# Invalid dimensions exception
raise ValueError("Invalid inputs. Shapes for the passed arguments do not match.")
# Caclulate Euclidean distance
# Using Numpy
expanded_data = np.expand_dims (xData, axis=1)
distances = np.sum (np.power (expanded_data - wPrototypes, 2), axis=2)
return distances.astype(float)
#
#
########################################
def sigmoid (xData, theta = 1):
"""
Calculates the sigmoid of the provided data vector.
Sigmoid (x) = 1 / (1 + e^(-theta * x))
Parameter:
xData: Input data vector. (n,m) matrix. n is number of data points and m is the number of features
theta: Theta parameter in the calculation of sigmoid. Default set to 1
Returns:
Resultant (n,m) matrix
Raises:
"""
return (1 / (1 + (np.exp(-1 * theta * xData))))
#
#
########################################
def plot2d (plotObject, figure, xData, xLabels, wData, wLabels, dimensions=(0, 1)):
"""
Plot a 2D slice of an N dimensional dataset. Sliced using the dimensions proivided else 1st and 2nd
dimensions chosen by default
Parameters:
plotObject: Matplotlib.pyplot object to be used to plot in
figure: Name or Id for the figure.
xData: Data to be plotted. (n, m) matrix. n is number of data points and m is the number of features.
xLabels: Labels for the dataset.
wData: Prototype data to be plotted. (k, m) matrix.
wLabels: Labels for the prototype.
dimensions: A 2D array of the dimesnions to be used to plot. Defaults to dimensions 0 and 1.
"""
if (len(dimensions) != 2):
# Dimensions passed is more than 2. Raise exception
raise ValueError("Only 2 dimensions are allowed.")
for dms in dimensions:
if (dms > xData.size):
# Invalid dimension passed
raise ValueError(f"Dimension value {dms} overflows the size for the given dataset.")
fig = plotObject.figure(figure)
chart = fig.add_subplot(1, 2, 1)
chart.scatter(xData[:, dimensions[0]], xData[:, dimensions[1]], c=xLabels, cmap='viridis')
chart.scatter(wData[:, dimensions[0]], wData[:, dimensions[1]], c=wLabels, marker='D')
plotObject.show()