diff --git a/chap11/fig11-11-left.R b/chap11/fig11-11-left.R index 39ca9be..d607e80 100644 --- a/chap11/fig11-11-left.R +++ b/chap11/fig11-11-left.R @@ -4,17 +4,17 @@ load('output/result-model11-8.RData') ms <- rstan::extract(fit_vb) probs <- c(0.1, 0.25, 0.5, 0.75, 0.9) -idx <- expand.grid(1:K, 1:I) +idx <- expand.grid(tag=1:K, item=1:I) -d_qua <- t(apply(idx, 1, function(x) quantile(ms$phi[,x[1],x[2]], probs=probs))) -d_qua <- data.frame(idx, d_qua) -colnames(d_qua) <- c('tag', 'item', paste0('p', probs*100)) +qua <- apply(idx, 1, function(x) quantile(ms$phi[,x[1],x[2]], probs=probs)) +d_est <- data.frame(idx, t(qua), check.names=FALSE) -p <- ggplot(data=d_qua, aes(x=item, y=p50)) -p <- p + theme_bw(base_size=18) -p <- p + facet_wrap(~tag, ncol=3) -p <- p + coord_flip() -p <- p + scale_x_reverse(breaks=c(1, seq(20, 120, 20))) -p <- p + geom_bar(stat='identity') -p <- p + labs(x='ItemID', y='phi[k,y]') +p <- ggplot(data=d_est, aes(x=item, y=`50%`)) + + theme_bw(base_size=18) + + theme(axis.text.x=element_text(angle=40, vjust=1, hjust=1)) + + facet_wrap(~tag, ncol=3) + + coord_flip() + + scale_x_reverse(breaks=c(1, seq(20, 120, 20))) + + geom_bar(stat='identity') + + labs(x='ItemID', y='phi[k,y]') ggsave(file='output/fig11-11-left.png', plot=p, dpi=300, w=7, h=5) diff --git a/chap11/fig11-11-right.R b/chap11/fig11-11-right.R index 4551a9a..966ffdd 100644 --- a/chap11/fig11-11-right.R +++ b/chap11/fig11-11-right.R @@ -4,18 +4,17 @@ load('output/result-model11-8.RData') ms <- rstan::extract(fit_vb) probs <- c(0.1, 0.25, 0.5, 0.75, 0.9) -idx <- expand.grid(1:N, 1:K) +idx <- expand.grid(person=1:N, tag=1:K) -d_qua <- t(apply(idx, 1, function(x) quantile(ms$theta[,x[1],x[2]], probs=probs))) -d_qua <- data.frame(idx, d_qua) -colnames(d_qua) <- c('person', 'tag', paste0('p', probs*100)) +qua <- apply(idx, 1, function(x) quantile(ms$theta[,x[1],x[2]], probs=probs)) +d_est <- data.frame(idx, t(qua), check.names=FALSE) -p <- ggplot(data=subset(d_qua, person %in% c(1,50)), aes(x=tag, y=p50)) -p <- p + theme_bw(base_size=18) -p <- p + facet_wrap(~person, nrow=2) -p <- p + coord_flip() -p <- p + scale_x_reverse(breaks=1:6) -p <- p + geom_bar(stat='identity') -p <- p + geom_errorbar(aes(ymin=p25, ymax=p75), width=0.25) -p <- p + labs(x='tag', y='theta[n,k]') +p <- ggplot(data=subset(d_est, person %in% c(1,50)), aes(x=tag, y=`50%`)) + + theme_bw(base_size=18) + + facet_wrap(~person, nrow=2) + + coord_flip() + + scale_x_reverse(breaks=1:6) + + geom_bar(stat='identity') + + geom_errorbar(aes(ymin=`25%`, ymax=`75%`), width=0.25) + + labs(x='tag', y='theta[n,k]') ggsave(file='output/fig11-11-right.png', plot=p, dpi=300, w=5, h=5) diff --git a/chap11/fig11-2.R b/chap11/fig11-2.R index 42001bb..5f68a0c 100644 --- a/chap11/fig11-2.R +++ b/chap11/fig11-2.R @@ -3,10 +3,10 @@ library(ggplot2) d <- read.csv(file='input/data-mix1.txt') dens <- density(d$Y) -p <- ggplot(data=d, aes(x=Y)) -p <- p + theme_bw(base_size=18) -p <- p + geom_histogram(binwidth=0.5, colour='black', fill='white') -p <- p + geom_density(aes(y=..count.. * 0.5), alpha=0.35, colour='black', fill='gray20') -p <- p + scale_y_continuous(breaks=seq(from=0, to=10, by=2)) -p <- p + labs(x='Y') + xlim(range(dens$x)) +p <- ggplot(data=d, aes(x=Y)) + + theme_bw(base_size=18) + + geom_histogram(binwidth=0.5, colour='black', fill='white') + + geom_density(aes(y=..count.. * 0.5), alpha=0.35, colour='black', fill='gray20') + + scale_y_continuous(breaks=seq(from=0, to=10, by=2)) + + labs(x='Y') + xlim(range(dens$x)) ggsave(file='output/fig11-2.png', plot=p, dpi=300, w=4, h=3) diff --git a/chap11/fig11-3.R b/chap11/fig11-3.R index b1d0bde..a02b25c 100644 --- a/chap11/fig11-3.R +++ b/chap11/fig11-3.R @@ -3,9 +3,9 @@ library(ggplot2) d <- read.csv(file='input/data-mix2.txt') dens <- density(d$Y) -p <- ggplot(data=d, aes(x=Y)) -p <- p + theme_bw(base_size=18) -p <- p + geom_histogram(color='black', fill='white') -p <- p + geom_density(aes(y=..count..), alpha=0.35, colour='black', fill='gray20') -p <- p + labs(x='Y') + xlim(range(dens$x)) +p <- ggplot(data=d, aes(x=Y)) + + theme_bw(base_size=18) + + geom_histogram(color='black', fill='white') + + geom_density(aes(y=..count..), alpha=0.35, colour='black', fill='gray20') + + labs(x='Y') + xlim(range(dens$x)) ggsave(file='output/fig11-3.png', plot=p, dpi=300, w=4, h=3) diff --git a/chap11/fig11-4.R b/chap11/fig11-4.R index 030ae11..4a23b4a 100644 --- a/chap11/fig11-4.R +++ b/chap11/fig11-4.R @@ -8,17 +8,17 @@ d$Sake <- as.factor(d$Sake) N_col <- ncol(d) ggp <- ggpairs(d, upper='blank', diag='blank', lower='blank') -for(i in 1:N_col) { +for (i in 1:N_col) { x <- d[,i] - p <- ggplot(data.frame(x), aes(x=x)) - p <- p + theme_bw(base_size=14) - p <- p + theme(axis.text.x=element_text(angle=40, vjust=1, hjust=1)) + p <- ggplot(data.frame(x), aes(x=x)) + + theme_bw(base_size=14) + + theme(axis.text.x=element_text(angle=40, vjust=1, hjust=1)) if (class(x) == 'factor') { p <- p + geom_bar(color='grey5', fill='grey80') } else { bw <- ifelse(colnames(d)[i] == 'Y', 1, (max(x)-min(x))/10) - p <- p + geom_histogram(binwidth=bw, color='grey5', fill='grey80') - p <- p + geom_line(eval(bquote(aes(y=..count..*.(bw)))), stat='density') + p <- p + geom_histogram(binwidth=bw, color='grey5', fill='grey80') + + geom_line(eval(bquote(aes(y=..count..*.(bw)))), stat='density') } p <- p + geom_label(data=data.frame(x=-Inf, y=Inf, label=colnames(d)[i]), aes(x=x, y=y, label=label), hjust=0, vjust=1) ggp <- putPlot(ggp, p, i, i) @@ -27,44 +27,42 @@ for(i in 1:N_col) { zcolat <- seq(-1, 1, length=81) zcolre <- c(zcolat[1:40]+1, rev(zcolat[41:81])) -for(i in 1:(N_col-1)) { - for(j in (i+1):N_col) { +for (i in 1:(N_col-1)) { + for (j in (i+1):N_col) { x <- as.numeric(d[,i]) y <- as.numeric(d[,j]) r <- cor(x, y, method='spearman', use='pairwise.complete.obs') zcol <- lattice::level.colors(r, at=zcolat, col.regions=grey(zcolre)) textcol <- ifelse(abs(r) < 0.4, 'grey20', 'white') ell <- ellipse::ellipse(r, level=0.95, type='l', npoints=50, scale=c(.2, .2), centre=c(.5, .5)) - p <- ggplot(data.frame(ell), aes(x=x, y=y)) - p <- p + theme_bw() + theme( + p <- ggplot(data.frame(ell), aes(x=x, y=y)) + theme_bw() + theme( plot.background=element_blank(), panel.grid.major=element_blank(), panel.grid.minor=element_blank(), - panel.border=element_blank(), axis.ticks=element_blank() - ) - p <- p + geom_polygon(fill=zcol, color=zcol) - p <- p + geom_text(data=NULL, x=.5, y=.5, label=100*round(r, 2), size=6, col=textcol) + panel.border=element_blank(), axis.ticks=element_blank()) + + geom_polygon(fill=zcol, color=zcol) + + geom_text(data=NULL, x=.5, y=.5, label=100*round(r, 2), size=6, col=textcol) ggp <- putPlot(ggp, p, i, j) } } -for(j in 1:(N_col-1)) { - for(i in (j+1):N_col) { +for (j in 1:(N_col-1)) { + for (i in (j+1):N_col) { x <- d[,j] y <- d[,i] if (class(x) == 'factor' && class(y) == 'factor') { - p <- ggplot(reshape2::melt(table(x,y)), aes(x=x, y=y)) - p <- p + theme_bw(base_size=14) - p <- p + geom_point(aes(size=value), color='grey80') - p <- p + geom_text(aes(label=value)) - p <- p + scale_size_area(max_size=8) - p <- p + scale_x_continuous(breaks=0:1, limits=c(-0.5,1.5)) + scale_y_continuous(breaks=0:1, limits=c(-0.5,1.5)) + p <- ggplot(reshape2::melt(table(x,y)), aes(x=x, y=y)) + + theme_bw(base_size=14) + + geom_point(aes(size=value), color='grey80') + + geom_text(aes(label=value)) + + scale_size_area(max_size=8) + + scale_x_continuous(breaks=0:1, limits=c(-0.5,1.5)) + scale_y_continuous(breaks=0:1, limits=c(-0.5,1.5)) } else { - p <- ggplot(data.frame(x, y, Y=d$Y), aes(x=x, y=y, color=Y)) - p <- p + theme_bw(base_size=14) - p <- p + theme(axis.text.x=element_text(angle=40, vjust=1, hjust=1)) + p <- ggplot(data.frame(x, y, Y=d$Y), aes(x=x, y=y, color=Y)) + + theme_bw(base_size=14) + + theme(axis.text.x=element_text(angle=40, vjust=1, hjust=1)) if (class(x) == 'factor') { - p <- p + geom_boxplot(aes(group=x), alpha=3/6, outlier.size=0, fill='white') - p <- p + geom_point(position=position_jitter(w=0.4, h=0), size=1) + p <- p + geom_boxplot(aes(group=x), alpha=3/6, outlier.size=0, fill='white') + + geom_point(position=position_jitter(w=0.4, h=0), size=1) } else { p <- p + geom_point(size=1) } diff --git a/chap11/fig11-5.R b/chap11/fig11-5.R index 52c5759..7a81771 100644 --- a/chap11/fig11-5.R +++ b/chap11/fig11-5.R @@ -7,8 +7,8 @@ d <- subset(data.frame(table(d_ori)), Freq >= 1) d$PersonID <- as.integer(as.character(d$PersonID)) d$ItemID <- as.integer(as.character(d$ItemID)) -p <- ggplot(data=d, aes(x=ItemID, y=PersonID, fill=Freq)) -p <- p + theme_bw(base_size=22) + theme(axis.text=element_blank(), axis.ticks=element_blank()) -p <- p + geom_point(shape=21, color='white', size=1.5) -p <- p + scale_fill_gradient(breaks=c(1,5,10), low='grey80', high='black') +p <- ggplot(data=d, aes(x=ItemID, y=PersonID, fill=Freq)) + + theme_bw(base_size=22) + theme(axis.text=element_blank(), axis.ticks=element_blank()) + + geom_point(shape=21, color='white', size=1.5) + + scale_fill_gradient(breaks=c(1,5,10), low='grey80', high='black') ggsave(file='output/fig11-5.png', plot=p, dpi=300, w=8, h=6) diff --git a/chap11/fig11-6.R b/chap11/fig11-6.R index 85e1e0b..721ecb9 100644 --- a/chap11/fig11-6.R +++ b/chap11/fig11-6.R @@ -1,19 +1,21 @@ +library(dplyr) library(ggplot2) N <- 50 I <- 120 d <- read.csv(file='input/data-lda.txt') -d_cast <- reshape2::dcast(d, PersonID ~ .) -p <- ggplot(data=d_cast, aes(x=.)) -p <- p + theme_bw(base_size=18) -p <- p + geom_bar(color='grey5', fill='grey80') -p <- p + scale_x_continuous(breaks=seq(10,40,10), limits=c(10,40)) -p <- p + labs(x='count by PersonID', y='count') + +d_cast <- d %>% group_by(PersonID) %>% summarise(n=n()) %>% ungroup() +p <- ggplot(data=d_cast, aes(x=n)) + + theme_bw(base_size=18) + + geom_bar(color='grey5', fill='grey80') + + scale_x_continuous(breaks=seq(10,40,10), limits=c(10,40)) + + labs(x='count by PersonID', y='count') ggsave(file='output/fig11-6-left.png', plot=p, dpi=300, w=4, h=3) -d_cast <- reshape2::dcast(d, ItemID ~ .) -p <- ggplot(data=d_cast, aes(x=.)) -p <- p + theme_bw(base_size=18) -p <- p + geom_bar(color='grey5', fill='grey80') -p <- p + labs(x='count by ItemID', y='count') +d_cast <- d %>% group_by(ItemID) %>% summarise(n=n()) %>% ungroup() +p <- ggplot(data=d_cast, aes(x=n)) + + theme_bw(base_size=18) + + geom_bar(color='grey5', fill='grey80') + + labs(x='count by ItemID', y='count') ggsave(file='output/fig11-6-right.png', plot=p, dpi=300, w=4, h=3) diff --git a/chap11/output/fig11-11-left.png b/chap11/output/fig11-11-left.png index 4ee2253..32de247 100644 Binary files a/chap11/output/fig11-11-left.png and b/chap11/output/fig11-11-left.png differ diff --git a/chap11/output/fig11-11-right.png b/chap11/output/fig11-11-right.png index df336d8..e8c7c5a 100644 Binary files a/chap11/output/fig11-11-right.png and b/chap11/output/fig11-11-right.png differ diff --git a/chap11/output/fig11-11.png b/chap11/output/fig11-11.png index 390aeec..da56e48 100644 Binary files a/chap11/output/fig11-11.png and b/chap11/output/fig11-11.png differ diff --git a/chap11/output/fig11-2.png b/chap11/output/fig11-2.png index d8d8852..456e503 100644 Binary files a/chap11/output/fig11-2.png and b/chap11/output/fig11-2.png differ diff --git a/chap11/output/fig11-3.png b/chap11/output/fig11-3.png index ea50c79..3d7a16b 100644 Binary files a/chap11/output/fig11-3.png and b/chap11/output/fig11-3.png differ diff --git a/chap11/output/fig11-4.png b/chap11/output/fig11-4.png index 418f3ed..747b10f 100644 Binary files a/chap11/output/fig11-4.png and b/chap11/output/fig11-4.png differ diff --git a/chap11/output/fig11-5.png b/chap11/output/fig11-5.png index aca5118..447a40e 100644 Binary files a/chap11/output/fig11-5.png and b/chap11/output/fig11-5.png differ diff --git a/chap11/output/fig11-6-left.png b/chap11/output/fig11-6-left.png index 58b35a5..d395636 100644 Binary files a/chap11/output/fig11-6-left.png and b/chap11/output/fig11-6-left.png differ diff --git a/chap11/output/fig11-6-right.png b/chap11/output/fig11-6-right.png index e7528b9..7b31ec7 100644 Binary files a/chap11/output/fig11-6-right.png and b/chap11/output/fig11-6-right.png differ diff --git a/chap11/output/fig11-6.png b/chap11/output/fig11-6.png index 77c2496..8c1b5d2 100644 Binary files a/chap11/output/fig11-6.png and b/chap11/output/fig11-6.png differ diff --git a/chap11/run-model11-7.R b/chap11/run-model11-7.R index 9a1fb0f..ed3fc56 100644 --- a/chap11/run-model11-7.R +++ b/chap11/run-model11-7.R @@ -12,5 +12,5 @@ N_mcmc <- length(ms$lp__) r <- sapply(1:N_mcmc, function(i) cor(ms$lambda[i,], ms$q[i,], method='spearman')) quantile(r, prob=c(0.025, 0.25, 0.5, 0.75, 0.975)) -# 2.5% 25% 50% 75% 97.5% -# -0.7990 -0.6955 -0.6496 -0.6034 -0.4713 \ No newline at end of file +# 2.5% 25% 50% 75% 97.5% +# -0.8063798 -0.6972511 -0.6501928 -0.6052952 -0.4610613 diff --git a/chap11/run-model11-8.R b/chap11/run-model11-8.R index 66abea9..0439e82 100644 --- a/chap11/run-model11-8.R +++ b/chap11/run-model11-8.R @@ -14,4 +14,4 @@ stanmodel <- stan_model(file='model/model11-8.stan') # fit_nuts <- sampling(stanmodel, data=data, seed=123) fit_vb <- vb(stanmodel, data=data, seed=123) -save.image('output/result-model11-8.RData') \ No newline at end of file +save.image('output/result-model11-8.RData')