Skip to content

Commit

Permalink
Adding data files and R scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
iankurgarg committed Apr 13, 2017
0 parents commit 301df96
Show file tree
Hide file tree
Showing 26 changed files with 73,472 additions and 0 deletions.
27 changes: 27 additions & 0 deletions Rscripts/eda.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
dataset.test1 = read.csv('train_subject1_psd01.csv')
plot(density(dataset.test1[dataset.test1$X7.00E.00==7,1]))
lines(density(dataset.test1[dataset.test1$X7.00E.00==2,1]))
lines(density(dataset.test1[dataset.test1$X7.00E.00==3,1]))
#lines(density(dataset.test1[,20]))

plot(density(dataset.test1[dataset.test1$X7.00E.00==7,50]))
lines(density(dataset.test1[dataset.test1$X7.00E.00==2,50]))
lines(density(dataset.test1[dataset.test1$X7.00E.00==3,50]))

plot(density(dataset.test1[dataset.test1$X7.00E.00==7,72]))
lines(density(dataset.test1[dataset.test1$X7.00E.00==2,72]))
lines(density(dataset.test1[dataset.test1$X7.00E.00==3,72]))

correlationMatrix = cor(dataset.test1[,-97])
correlationDataframe = data.frame(correlationMatrix)

for(i in seq(1,96,1)){
for(j in seq(i,96,1)){
if(abs(correlationMatrix[i,j])>0.6 && i!=j){
print(paste(i,j,correlationMatrix[i,j]))
}
}
}

var(dataset.test1)
apply(dataset.test1,2,var)
263 changes: 263 additions & 0 deletions data/.Rapp.history
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
q
c*q2
p = c*q2
p + (1.96*q)
p - (1.96*q)
p + (1.96*q)
states <- as.data.frame(#
state.x77[,c("Murder","Population",#
"Illiteracy", "Income", "Frost")])
states
state
state.x77
dim(states)
t(states[1,])
t(states[1,])#
dtrain <- states[1:25,]#
dtest <- states[26:50,]
murderModel <- lm (Murder ~ Population + Illiteracy #
+ Income + Frost, data=dtrain)
summary (murderModel)
gvlma
import gvlma
library(gvlma)
install.packages('gvlma')
libarary(gvlma)
library(gvlma)
gvlma(lm(Murder ~ Population + Illiteracy #
+ Income + Frost), data)
data
states <- as.data.frame(#
state.x77[,c("Murder","Population",#
"Illiteracy", "Income", "Frost")])#
dim(states)#
t(states[1,])#
dtrain <- states[1:25,]#
dtest <- states[26:50,]
gvlma(lm(Murder ~ Population + Illiteracy #
+ Income + Frost), dtrain)
dtrain
gvlma(lm(Murder ~ Population + Illiteracy + Income + Frost), dtrain)
gvlma((Murder ~ Population + Illiteracy + Income + Frost), dtrain)
install.packages('car')
library(car)
crPlots
crPlots(murderModel)
summary(murderModel)
qqplot(dtrain)
qqplot(dtrain['Murder'])
dtrain['Murder']
qqplot(dtrain['Population','Illiteracy','Income','Frost'], dtrain['Murder'])
train['Population','Illiteracy','Income','Frost']
dtrain['Population','Illiteracy','Income','Frost']
dtrain['Population','Illiteracy','Income']
dtrain['Population']
dtrain['Population','Illiteracy']
dtrain['Population. Illiteracy']
dtrain['Population, Illiteracy']
dtrain['Population + Illiteracy']
dtrain[c('Population','Illiteracy')]
qqplot(dtrain[c('Population','Illiteracy','Income','Frost')],dtrain['Murder'])
qqplot(dtrain['Income'], dtrain['Murder'])
dtrain['Income']
gvlma((Murder ~ Population + Illiteracy + Income + Frost), dtrain)
ans = gvlma((Murder ~ Population + Illiteracy + Income + Frost), dtrain)
summary(ans)
plot(ans)
summary(deletion.gvlma(ans))
plot(ans)
summary(ans)
durbinWatsonTest(murderModel)
crPlots(murderModel)
summary(ans)
vif(murderModel)
vif(dtrain)
vif(murderModel)
outlierTest(murderModel)
library(MASS)
chisq.test(dtrain)
chisq.test(dtrain['Muder'])
chisq.test(dtrain['Murder'])
chisq.test(dtrain['Population'])
??chisq.test
?chisq.test
2^2
2**2
2**3
a = [95,85,90]
a = c(95,85,90)
a
mean(a)
sd(a)
pnorm(95,mean(a), sd(a))
exp(-0.5)/(sqrt(2*pi)*5)
var(a)
dnorm(95,mean(a), sd(a))
b = c(125, 100,70,120,60,220,75)
dnorm(95, mean(a), sd(a))
dnorm(95, mean(b), sd(b))
P2 = dnorm(95, mean(b), sd(b))
P1 = dnorm(95, mean(a), sd(a))
(P1*3)/(P2*7)
c(c(2,10), c(2,5), c(8,4), c(5,8), c(7,5), c(6,4), c(1,2), c(4,9))
t = c(c(2,10), c(2,5), c(8,4), c(5,8), c(7,5), c(6,4), c(1,2), c(4,9))
t
cast(t)
matrix(t, ncol=2)
matrix(t, ncol=2, byrow=TRUE)
d = matrix(t, ncol=2, byrow=TRUE)
d
plot(d)
points(2,10, color='red')
points(2,10)
points(2,10, col='red')
points(5,8, col='bluw')
points(5,8, col='blue')
points(1,2, col='green')
d[1]
d[1,]
d[c(3,4,5,6,8),]
mean(d[c(3,4,5,6,8),])
rowmean(d[c(3,4,5,6,8),])
rowmeans(d[c(3,4,5,6,8),])
help(means)
??means
colMeans(d[c(3,4,5,6,8),])
colMeans(d[c(1),])
colMeans(d[c(1,),])
d[1, ]
c(1)
d[c(1), ]
colMeans(d[c(1), ])
colMeans(d[c(2,7), ])
colMeans(d[c(1,8), ])
colMeans(d[c(3,4,5,6), ])
colMeans(d[c(2,7), ])
kmeans(d)
kmeans(d, 3)
kmeans(d, ceners=d[c(1,4,7), ])
kmeans(d, centers=d[c(1,4,7), ])
colMeans(d[c(1,4,8), ])
colMeans(d[c(3,5,6), ])
c = c(1,2,2,3,5,1,1,3,4,3)
c
matrix(c, ncol=2)
d = matrix(c, ncol=2)
d[1,]
d[2,]
d[1,] - d[2,]
abs(d[1,] - d[2,])
max(abs(d[1,] - d[2,]))
maxnorm <- function(p1, p2) {return (max(abs(p1 - p2)))}
p = matrix(NA, nrow=5, ncol=2)
p
p = matrix(NA, nrow=5, ncol=5)
for(i in 1:5) {
for (j in 1:5){
p[i,j] = maxnorm(d[i,], d[j,])
}
p
hclust(d)
hclust(mtcars)
hclust(distr(mtcars))
hclust(dist(mtcars))
plot(hclust(dist(mtcars)))
d1 = rnorm(1000,mean=6, sd=1.5)
d1
plot(d1)
plot(1:1000, d1)
plot(density(d1))
d2 = rnorm(n=1000, )
d1 = rnorm(n=20, mean=6, sd=1.5)
d2 = rnorm(n=20, mean=4, sd=0.8)
plot(density(d1), col='red')
plot(density(d2), col='blue')
plot(density(d1), col='red')
par(new=true)
par(new=TRUE)
plot(density(d2), col='blue')
density(d1)
density(d1)*density(d2)
density(d1)
plot(density(d1), col='red')
lines(density(d2), col='blue')
type(density(d1))
typeof(density(d1))
help(dnorm)
dnorm(mean=0, sd = 1)
dnorm(20, mean=0, sd = 1)
curve(d1)
curve(density(d1))
curve(rnorm(20, 6, 1.5))
density(d1)[0]
density(d1)[1]
density(d1)[2]
density(d1)[3]
density(d1)[4]
density(d1)[5]
density(d1)[6]
density(d1)$data
density(d1)$data.name
plot(density(d1)[1])
plot(density(d1)[1], density(d1)[2])
density(d1)
density(d1)[2]
plot(1:512, density(d1)[1])
plot(1:512, density(d1)[2])
plot(0:512, density(d1)[1])
dim(512, density(d1)[1])
dim(density(d1)[1])
length(density(d1)[1])
(density(d1)[1])
(density(d1)[2])
plot(density(d1)[1]$x, density(d1)[2]$y)
density(d2)[1]$x
density(d1)[1]$x
density(d2)[1]$x
density(d1)[1]$x == density(d2)[1]$x
sum(density(d1)[1]$x == density(d2)[1]$x)
obs <- c(0.4, 0.5, 0.8, 0.1)
pri <- rnorm(10000, 0, 1)
likfun <- function(theta) {
sapply( theta, function(t) prod( dnorm(obs, t, 0.5) ) )
}
tmp <- likfun(pri)
tmp
n = 20
mean.pri = 4
mean.likli = 6
sd.pri = 0.8
sd.likli = 1.5
d1 = rnorm(n, mean.pri, sd.pri)
tmp = dnorm(d1, mean.likli, sd.likli)
tmp
post = sample(pri, 20, prob = tmp)
length(pri)
post = sample(d1, 20, prob = tmp)
post
plot(density(post))
density(post)
mean(post)
sd(post)
sigma = (n/(sd.likli^2)) + (1/(sd.pri)^2)
sigma = 1/sigma
sigma
sqrt(sigma)
d1 = rnorm(10000, mean.pri, sd.pri)
tmp = dnorm(d1, mean.likli, sd.likli)
tmp
plot(temp)
plot(tmp)
plot(density(tmp))
post = sample(d1, 20, prob = tmp)
plot(density(post))
var(post)
sd(post)
post = sample(d1,5000, prob = tmp)
sd(post)
setwd('Google Drive/Courses/Sem 2/CSC 591 - ML/Project/data/')
ls
ls()
d1 = read.csv(file='test_subject1_psd04.csv')
summary(d1)
names(d1)
Loading

0 comments on commit 301df96

Please sign in to comment.