-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Subject wise LDA. subject wise scaling and PCA, renaming folders.
Results for LDA have been added to the LDA file in the comments section. new pca outputs have been stored in the training/pca_data_v2 and testing/pca_data_v2 folders dimensions for the pca output for each subject is not the same.
- Loading branch information
1 parent
416c4de
commit bfb1ef8
Showing
20 changed files
with
55,842 additions
and
10,494 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Large diffs are not rendered by default.
Oops, something went wrong.
6,946 changes: 3,473 additions & 3,473 deletions
6,946
data/Testing/pca_subject2.csv → data/testing/pca_data_v1/pca_subject2.csv
Large diffs are not rendered by default.
Oops, something went wrong.
6,978 changes: 3,489 additions & 3,489 deletions
6,978
data/Testing/pca_subject3.csv → data/testing/pca_data_v1/pca_subject3.csv
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
3,505 changes: 3,504 additions & 1 deletion
3,505
data/testing/test_subject1_psd04.csv
100644 → 100755
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import pandas as pd | ||
import numpy as np | ||
from sklearn.decomposition import PCA | ||
from sklearn.preprocessing import Normalizer | ||
|
||
|
||
|
||
def removeCorrelation(input_data): | ||
#Columns found using analysis. Check rough-sanket.py and R code for code and results | ||
cor = [24, 72] | ||
vec = list(input_data.columns) | ||
for c in cor: | ||
vec.remove(c) | ||
new_data = input_data[vec] | ||
return new_data | ||
|
||
|
||
def runPCA(input_data, test, d): | ||
input_data = removeCorrelation(input_data) | ||
test = removeCorrelation(test) | ||
|
||
normZ = Normalizer() | ||
scaledX = normZ.fit_transform(input_data.iloc[:,:-1]) | ||
scaledTestX = normZ.transform(test) | ||
|
||
pca = PCA() | ||
pcaX = pca.fit_transform(scaledX) | ||
pcaX = pd.DataFrame(pcaX) | ||
print 'Approx 98% variance explained by '+str(d)+' features: ' + str(pca.explained_variance_ratio_[:d].sum()) | ||
|
||
trainY = input_data.iloc[:,-1] | ||
trainY = trainY.reshape(len(trainY), 1) | ||
trainY = pd.DataFrame(trainY) | ||
trainY.columns = ['Class'] | ||
|
||
trainDataAfterPCA = pd.concat([pcaX.iloc[:,:d], trainY], axis=1) | ||
|
||
testDataAfterPCA = pca.transform(scaledTestX) | ||
testDataAfterPCA = pd.DataFrame(testDataAfterPCA) | ||
testDataAfterPCA = testDataAfterPCA.iloc[:,:d] | ||
|
||
return trainDataAfterPCA, testDataAfterPCA | ||
|
||
|
||
def mainPCA(): | ||
d1 = pd.read_csv('../data/Training/train_subject1_psd01.csv',header=None) | ||
d2 = pd.read_csv('../data/Training/train_subject1_psd02.csv',header=None) | ||
d3 = pd.read_csv('../data/Training/train_subject1_psd03.csv',header=None) | ||
input_data_s1 = pd.concat([d1, d2, d3], axis=0) | ||
test_s1 = pd.read_csv('../data/Testing/test_subject1_psd04.csv', header=None) | ||
|
||
d1 = pd.read_csv('../data/Training/train_subject2_psd01.csv',header=None) | ||
d2 = pd.read_csv('../data/Training/train_subject2_psd02.csv',header=None) | ||
d3 = pd.read_csv('../data/Training/train_subject2_psd03.csv',header=None) | ||
input_data_s2 = pd.concat([d1, d2, d3], axis=0) | ||
test_s2 = pd.read_csv('../data/Testing/test_subject2_psd04.csv', header=None) | ||
|
||
d1 = pd.read_csv('../data/Training/train_subject3_psd01.csv',header=None) | ||
d2 = pd.read_csv('../data/Training/train_subject3_psd02.csv',header=None) | ||
d3 = pd.read_csv('../data/Training/train_subject3_psd03.csv',header=None) | ||
input_data_s3 = pd.concat([d1, d2, d3], axis=0) | ||
test_s3 = pd.read_csv('../data/Testing/test_subject3_psd04.csv', header=None) | ||
|
||
train1, test1 = runPCA(input_data_s1, test_s1, 40) | ||
train2, test2 = runPCA(input_data_s2, test_s2, 50) | ||
train3, test3 = runPCA(input_data_s3, test_s3, 60) | ||
|
||
train1.to_csv('../data/Training/pca_data_v2/pca_subject1.csv', index=False) | ||
train2.to_csv('../data/Training/pca_data_v2/pca_subject2.csv', index=False) | ||
train3.to_csv('../data/Training/pca_data_v2/pca_subject3.csv', index=False) | ||
|
||
test1.to_csv('../data/Testing/pca_data_v2/pca_subject1.csv', index=False) | ||
test2.to_csv('../data/Testing/pca_data_v2/pca_subject2.csv', index=False) | ||
test3.to_csv('../data/Testing/pca_data_v2/pca_subject3.csv', index=False) | ||
|
||
if __name__ == '__main__': | ||
mainPCA() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
import pandas as pd | ||
import numpy as np | ||
import sys | ||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis | ||
from sklearn.model_selection import cross_val_score | ||
|
||
|
||
def LDA(input_data, test, actual): | ||
model = LinearDiscriminantAnalysis() | ||
scores = cross_val_score(model, input_data.iloc[:,:-1], input_data.iloc[:,-1], cv = 10) | ||
print 'Cross Validation Accuracy = ' + str(scores.mean()) | ||
model.fit(input_data.iloc[:,:-1], input_data.iloc[:,-1]) | ||
pred = model.predict(test) | ||
accuracy = sum(pred == actual)/float(len(actual)) | ||
print 'Test Accuracy for the subject is = ' + str(accuracy) | ||
|
||
|
||
def mainRawData(): | ||
#Loading Input data (training) | ||
d1 = pd.read_csv('../data/Training/train_subject1_psd01.csv',header=None) | ||
d2 = pd.read_csv('../data/Training/train_subject1_psd02.csv',header=None) | ||
d3 = pd.read_csv('../data/Training/train_subject1_psd03.csv',header=None) | ||
input_data_s1 = pd.concat([d1, d2, d3], axis=0) | ||
|
||
d1 = pd.read_csv('../data/Training/train_subject2_psd01.csv',header=None) | ||
d2 = pd.read_csv('../data/Training/train_subject2_psd02.csv',header=None) | ||
d3 = pd.read_csv('../data/Training/train_subject2_psd03.csv',header=None) | ||
input_data_s2 = pd.concat([d1, d2, d3], axis=0) | ||
|
||
d1 = pd.read_csv('../data/Training/train_subject3_psd01.csv',header=None) | ||
d2 = pd.read_csv('../data/Training/train_subject3_psd02.csv',header=None) | ||
d3 = pd.read_csv('../data/Training/train_subject3_psd03.csv',header=None) | ||
input_data_s3 = pd.concat([d1, d2, d3], axis=0) | ||
|
||
#Loading Test data (all subjects) | ||
test_s1 = pd.read_csv('../data/Testing/test_subject1_psd04.csv', header=None) | ||
actual_s1 = pd.read_csv('../data/Testing/ActualLables/labels_subject1_psd.csv', header=None) | ||
actual_s1 = actual_s1[0] | ||
|
||
test_s2 = pd.read_csv('../data/Testing/test_subject2_psd04.csv', header=None) | ||
actual_s2 = pd.read_csv('../data/Testing/ActualLables/labels_subject2_psd.csv', header=None) | ||
actual_s2 = actual_s2[0] | ||
|
||
test_s3 = pd.read_csv('../data/Testing/test_subject3_psd04.csv', header=None) | ||
actual_s3 = pd.read_csv('../data/Testing/ActualLables/labels_subject3_psd.csv', header=None) | ||
actual_s3 = actual_s3[0] | ||
|
||
LDA(input_data_s1, test_s1, actual_s1) | ||
LDA(input_data_s2, test_s2, actual_s2) | ||
LDA(input_data_s3, test_s3, actual_s3) | ||
|
||
# Results: | ||
#Cross Validation Accuracy = 0.69484335286 | ||
#Test Accuracy for the subject is = 0.714611872146 | ||
# | ||
#Cross Validation Accuracy = 0.539059371463 | ||
#Test Accuracy for the subject is = 0.581221198157 | ||
# | ||
#Cross Validation Accuracy = 0.461690546146 | ||
#Test Accuracy for the subject is = 0.491685779817 | ||
|
||
def mainPCAData(): | ||
input_data_s1 = pd.read_csv('../data/Training/pca_data_v2/pca_subject1.csv') | ||
|
||
input_data_s2 = pd.read_csv('../data/Training/pca_data_v2/pca_subject2.csv') | ||
|
||
input_data_s3 = pd.read_csv('../data/Training/pca_data_v2/pca_subject3.csv') | ||
|
||
#Loading Test data (all subjects) | ||
test_s1 = pd.read_csv('../data/Testing/pca_data_v2/pca_subject1.csv') | ||
actual_s1 = pd.read_csv('../data/Testing/ActualLables/labels_subject1_psd.csv', header=None) | ||
actual_s1 = actual_s1[0] | ||
|
||
test_s2 = pd.read_csv('../data/Testing/pca_data_v2/pca_subject2.csv') | ||
actual_s2 = pd.read_csv('../data/Testing/ActualLables/labels_subject2_psd.csv', header=None) | ||
actual_s2 = actual_s2[0] | ||
|
||
test_s3 = pd.read_csv('../data/Testing/pca_data_v2/pca_subject3.csv') | ||
actual_s3 = pd.read_csv('../data/Testing/ActualLables/labels_subject3_psd.csv', header=None) | ||
actual_s3 = actual_s3[0] | ||
|
||
LDA(input_data_s1, test_s1, actual_s1) | ||
LDA(input_data_s2, test_s2, actual_s2) | ||
LDA(input_data_s3, test_s3, actual_s3) | ||
|
||
# Results: | ||
#Cross Validation Accuracy = 0.719154247244 | ||
#Test Accuracy for the subject is = 0.732591324201 | ||
#Cross Validation Accuracy = 0.565501871535 | ||
#Test Accuracy for the subject is = 0.613479262673 | ||
#Cross Validation Accuracy = 0.482587359569 | ||
#Test Accuracy for the subject is = 0.505447247706 | ||
|
||
if __name__ == '__main__': | ||
mainPCAData() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters