forked from boboppie/kruschke-doing_bayesian_data_analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathJagsCensoringExample.R
146 lines (128 loc) · 5.68 KB
/
JagsCensoringExample.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# This program illustrates estimation of parameters for right-censored data in
# JAGS. To produce the graphics at the end, you must have plotPost.R and
# HDIofMCMC.R in the same folder as this program (and tell R that this is the
# working directory). Run the program as-is to see that JAGS produces good
# estimates of the true underlying parameter values, even for censored data.
# To see what happens when you do *not* tell JAGS that the data are censored,
# just comment out the follow four lines (and run it again).
# In the model specification:
# isCensored[i] ~ dinterval( y[i] , censorLimitVec[i] )
# In the dataList:
# , isCensored = as.numeric(isCensored) # JAGS dinterval needs 0,1 not T,F
# , censorLimitVec = censorLimitVec
# In the initsList:
# , y=yInit
# Notice that the resulting estimated parameters are way too low!
graphics.off()
rm(list=ls(all=TRUE))
fileNameRoot="JagsCensoringExample" # for constructing output filenames
source("openGraphSaveGraph.R")
source("plotPost.R")
require(rjags) # Kruschke, J. K. (2011). Doing Bayesian Data Analysis:
# A Tutorial with R and BUGS. Academic Press / Elsevier.
#------------------------------------------------------------------------------
# THE MODEL.
modelstring = "
model {
for ( i in 1:N ) {
isCensored[i] ~ dinterval( y[i] , censorLimitVec[i] )
y[i] ~ dnorm( mu , tau )
}
tau <- 1/pow(sigma,2)
sigma ~ dunif(0,100)
mu ~ dnorm(0,1E-6)
}
" # close quote for modelstring
writeLines(modelstring,con="model.txt")
#------------------------------------------------------------------------------
# THE DATA.
# For real data, censored values should be recorded as NA, not as the censoring
# limit, and the indicator value isCensored should be set to TRUE.
# Generate some random data:
set.seed(47405)
y = rnorm( n=500 )
desiredMean = 100 ; desiredSD = 15
actualMean = mean(y) ; actualSD = sd(y)
y = desiredSD*(y-actualMean)/actualSD + desiredMean
N = length(y)
# Now artificially right-censor the data.
# First, construct the censoring limits. There is a censoring limit defined
# for every observation, put in the vector censorLimitVec.
censorLimit = quantile( y , probs=c(pnorm(1)) ) # cuts at +1SD
censorLimitVec = rep( censorLimit , length(y) )
# Define indicator vector for which observations are censored:
isCensored = ( y > censorLimitVec ) # Note that this has T,F logical values.
# Now cut the data above the censor limits. For real data, censored values
# should be recorded as NA, not as the censor limit, and the indicator value
# isCensored should be set to TRUE.
y[isCensored] = NA
# Specify the data in a form that is compatible with model, as a list:
dataList = list( y = y , N = N
, isCensored = as.numeric(isCensored) # JAGS dinterval needs 0,1 not T,F
, censorLimitVec = censorLimitVec
)
#------------------------------------------------------------------------------
# INTIALIZE THE CHAINS.
sigmaInit = 15
muInit = 100
# intial values of censored data:
yInit = rep( NA , length(y) )
yInit[isCensored] = censorLimitVec[isCensored]+1
initsList = list( mu=muInit , sigma = sigmaInit
, y=yInit
)
#------------------------------------------------------------------------------
# RUN THE CHAINS
parameters = c( "mu" , "sigma" )
adaptSteps = 1000 # overkill, just shows general form
burnInSteps = 2000 # overkill, just shows general form
nChains = 3
numSavedSteps=50000
thinSteps=1
nIter = ceiling( ( numSavedSteps * thinSteps ) / nChains )
# Create, initialize, and adapt the model:
jagsModel = jags.model( "model.txt" , data=dataList , inits=initsList ,
n.chains=nChains , n.adapt=adaptSteps )
# Burn-in:
cat( "Burning in the MCMC chain...\n" )
update( jagsModel , n.iter=burnInSteps )
# The saved MCMC chain:
cat( "Sampling final MCMC chain...\n" )
codaSamples = coda.samples( jagsModel , variable.names=parameters ,
n.iter=nIter , thin=thinSteps )
# resulting codaSamples object has these indices:
# codaSamples[[ chainIdx ]][ stepIdx , paramIdx ]
#------------------------------------------------------------------------------
# EXAMINE THE RESULTS
checkConvergence = FALSE
if ( checkConvergence ) {
openGraph(width=7,height=7)
autocorr.plot( codaSamples[[1]] , ask=FALSE )
show( gelman.diag( codaSamples ) )
effectiveChainLength = effectiveSize( codaSamples )
show( effectiveChainLength )
}
# Convert coda-object codaSamples to matrix object for easier handling.
# But note that this concatenates the different chains into one long chain.
# Result is mcmcChain[ stepIdx , paramIdx ]
mcmcChain = as.matrix( codaSamples )
openGraph(width=10,height=3.0)
layout(matrix(1:3,nrow=1))
par( mar=c(3.5,1.5,3.0,0.5) , mgp=c(2,0.7,0) )
# plot the data:
xLim = desiredMean+c(-3.5*desiredSD,3.5*desiredSD)
xBreaks = seq( xLim[1] , xLim[2] , desiredSD/2 )
xGrid = seq( xLim[1] , xLim[2] , length=201 )
histInfo = hist( y , main="Data" , xlab="y" , freq=F ,
xlim=xLim , breaks=c(min(y,na.rm=T),xBreaks) ,
col="grey" , border="white" )
lines( xGrid , dnorm( xGrid , mean=desiredMean , sd=desiredSD )/pnorm(1) , lwd=3 )
abline(v=censorLimit,lty="dotted",col="red")
# plot the posterior:
histInfo = plotPost( mcmcChain[,"mu"] , main="Mu" , xlab=expression(mu) ,
showMode=FALSE )
histInfo = plotPost( mcmcChain[,"sigma"] , main="Sigma" , xlab=expression(sigma) ,
showMode=TRUE )
# save plot:
saveGraph( file=fileNameRoot , type="eps" )
saveGraph( file=fileNameRoot , type="jpg" )