-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
70 lines (63 loc) · 2.2 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
if __name__ == "__main__":
# %%
# Load data and show distribution of activities
# and importing the libraries
#
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
import tools
import plots
train = pd.read_csv("data/train.csv")
activities = train["Activity"].value_counts()
activities = {
"index": activities.index,
"count": activities.values
}
plots.activities_distribution(activities)
x_train = train.drop(["subject", "Activity"], axis=1).values
y_train = train.Activity
# Rename the activities to numbers
labels = {
'LAYING': 0,
'SITTING': 1,
'STANDING': 2,
'WALKING': 3,
'WALKING_DOWNSTAIRS': 4,
'WALKING_UPSTAIRS': 5
}
y_train = y_train.map(labels)
# %%
# [task] Apply PCA to the data and visualize the results
#
n_eigenvectors = 154
pca_proj = tools.PCA(x_train, n_eigenvectors)
pca_data = np.matmul(x_train, pca_proj.T)
# Plot the first three principal components
plots.scatter_with_labels(pca_data, y_train, list(labels.keys()))
# %%
# [task] Apply LDA to the data and visualize the results
#
lda_proj = tools.LDA(pca_data, y_train, n_classes=6)
lda_data = np.matmul(pca_data, lda_proj.T)
plots.scatter_with_labels(lda_data, y_train, list(labels.keys()))
# %%
# [task] Use k-nearest neighbors to predict the activity of the test dataset
#
# 1. Create a KNN classifier
knn = KNeighborsClassifier(n_neighbors=20)
# 2. Fit the classifier to the training data
knn.fit(lda_data, y_train)
# %%
# [task] Use the classifier to predict the activity of the test dataset
test = pd.read_csv("data/test.csv")
x_test = test.drop(["subject", "Activity"], axis=1).values
y_test = test.Activity.map(labels).values
x_test = np.matmul(x_test, pca_proj.T)
x_test = np.matmul(x_test, lda_proj.T)
y_pred = knn.predict(x_test)
# Show the confusion matrix
plots.confusion_matrix(confusion_matrix(y_test, y_pred))
# Show the accuracy of the classifier
print("Accuracy:", knn.score(x_test, y_test))