diff --git a/chap04/exercise/ex1.R b/chap04/exercise/ex1.R index 5b85c0b..68489fb 100644 --- a/chap04/exercise/ex1.R +++ b/chap04/exercise/ex1.R @@ -7,8 +7,7 @@ d2 <- data.frame(group=2, Y=Y2) d <- rbind(d1, d2) d$group <- as.factor(d$group) -p <- ggplot(data=d, aes(x=group, y=Y, group=group, col=group)) -p <- p + geom_boxplot(outlier.size=0) -p <- p + geom_point(position=position_jitter(w=0.4, h=0), size=2) +p <- ggplot(data=d, aes(x=group, y=Y, group=group, col=group)) + + geom_boxplot(outlier.shape=NA, alpha=0.3) + + geom_point(position=position_jitter(w=0.1, h=0), size=2) ggsave(file='fig-ex1.png', plot=p, dpi=300, w=4, h=3) - diff --git a/chap04/exercise/fig-ex1.png b/chap04/exercise/fig-ex1.png index f74506d..5551343 100644 Binary files a/chap04/exercise/fig-ex1.png and b/chap04/exercise/fig-ex1.png differ diff --git a/chap04/fig4-2.R b/chap04/fig4-2.R index f7fc01e..313e5f0 100644 --- a/chap04/fig4-2.R +++ b/chap04/fig4-2.R @@ -1,7 +1,7 @@ library(ggplot2) d <- read.csv(file='input/data-salary.txt') -p <- ggplot(data=d, aes(x=X, y=Y)) -p <- p + theme_bw(base_size=18) -p <- p + geom_point(shape=1, size=3) +p <- ggplot(data=d, aes(x=X, y=Y)) + + theme_bw(base_size=18) + + geom_point(shape=1, size=3) ggsave(file='output/fig4-2.png', plot=p, dpi=300, w=4, h=3) diff --git a/chap04/fig4-3.R b/chap04/fig4-3.R index e82423e..2293718 100644 --- a/chap04/fig4-3.R +++ b/chap04/fig4-3.R @@ -13,22 +13,22 @@ pred_95 <- data.frame(X_new, pred_95) pred_50 <- predict(res_lm, X_new, interval='prediction', level=0.50) pred_50 <- data.frame(X_new, pred_50) -p <- ggplot() -p <- p + theme_bw(base_size=18) -p <- p + geom_ribbon(data=conf_95, aes(x=X, ymin=lwr, ymax=upr), alpha=1/6) -p <- p + geom_ribbon(data=conf_50, aes(x=X, ymin=lwr, ymax=upr), alpha=2/6) -p <- p + geom_line(data=conf_50, aes(x=X, y=fit), size=1) -p <- p + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) -p <- p + labs(x='X', y='Y') + coord_cartesian(xlim=c(22, 61), ylim=c(200, 1400)) -p <- p + scale_y_continuous(breaks=seq(from=200, to=1400, by=400)) +p <- ggplot() + + theme_bw(base_size=18) + + geom_ribbon(data=conf_95, aes(x=X, ymin=lwr, ymax=upr), alpha=1/6) + + geom_ribbon(data=conf_50, aes(x=X, ymin=lwr, ymax=upr), alpha=2/6) + + geom_line(data=conf_50, aes(x=X, y=fit), size=1) + + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) + + labs(x='X', y='Y') + coord_cartesian(xlim=c(22, 61), ylim=c(200, 1400)) + + scale_y_continuous(breaks=seq(from=200, to=1400, by=400)) ggsave(p, file='output/fig4-3-left.png', dpi=300, w=4, h=3) -p <- ggplot() -p <- p + theme_bw(base_size=18) -p <- p + geom_ribbon(data=pred_95, aes(x=X, ymin=lwr, ymax=upr), alpha=1/6) -p <- p + geom_ribbon(data=pred_50, aes(x=X, ymin=lwr, ymax=upr), alpha=2/6) -p <- p + geom_line(data=pred_50, aes(x=X, y=fit), size=1) -p <- p + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) -p <- p + labs(x='X', y='Y') + coord_cartesian(xlim=c(22, 61), ylim=c(200, 1400)) -p <- p + scale_y_continuous(breaks=seq(from=200, to=1400, by=400)) +p <- ggplot() + + theme_bw(base_size=18) + + geom_ribbon(data=pred_95, aes(x=X, ymin=lwr, ymax=upr), alpha=1/6) + + geom_ribbon(data=pred_50, aes(x=X, ymin=lwr, ymax=upr), alpha=2/6) + + geom_line(data=pred_50, aes(x=X, y=fit), size=1) + + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) + + labs(x='X', y='Y') + coord_cartesian(xlim=c(22, 61), ylim=c(200, 1400)) + + scale_y_continuous(breaks=seq(from=200, to=1400, by=400)) ggsave(p, file='output/fig4-3-right.png', dpi=300, w=4, h=3) diff --git a/chap04/fig4-4.R b/chap04/fig4-4.R index 516124c..96dda7e 100644 --- a/chap04/fig4-4.R +++ b/chap04/fig4-4.R @@ -2,8 +2,8 @@ library(ggmcmc) load('output/result-model4-5.RData') -p <- ggs_traceplot(ggs(fit, inc_warmup=TRUE, stan_include_auxiliar=TRUE)) -p <- p + theme_bw(base_size=18) -p <- p + scale_colour_manual(values=c('#dcdcdc','#a9a9a9','#696969','#000000')) -p <- p + labs(color='Chain') +p <- ggs_traceplot(ggs(fit, inc_warmup=TRUE, stan_include_auxiliar=TRUE)) + + theme_bw(base_size=18) + + scale_colour_manual(values=c('#dcdcdc','#a9a9a9','#696969','#000000')) + + labs(color='Chain') ggsave(p, file='output/fig4-4.png', dpi=300, w=7, h=10) diff --git a/chap04/fig4-7.R b/chap04/fig4-7.R index 8e3e438..35946df 100644 --- a/chap04/fig4-7.R +++ b/chap04/fig4-7.R @@ -1,45 +1,46 @@ library(ggplot2) -source('../common.R') +library(patchwork) load('output/result-model4-5.RData') ms <- rstan::extract(fit) df_mcmc <- data.frame(a=ms$a, b=ms$b, sigma=ms$sigma) -p_xy <- ggplot(df_mcmc,aes(x=a,y=b)) + - my_theme() + +x_range <- c(-420, 210) +y_range <- c(14.5, 29) +x_breaks <- seq(-400, 200, 200) +y_breaks <- seq(15, 25, 5) + +p_xy <- ggplot(df_mcmc, aes(x=a,y=b)) + + theme_bw(base_size=18) + + coord_cartesian(xlim = x_range, ylim = y_range) + geom_point(alpha=1/4, size=2, shape=1) + - scale_x_continuous(breaks=seq(-400, 200, 200), limits=c(-420, 210)) + - scale_y_continuous(breaks=seq(15, 25, 5), limits=c(14.5, 29)) + scale_x_continuous(breaks=x_breaks) + + scale_y_continuous(breaks=y_breaks) p_x <- ggplot(df_mcmc, aes(x=a)) + - my_theme0() + + theme_bw(base_size=18) + + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks.x=element_blank(), axis.ticks.y=element_blank()) + + coord_cartesian(xlim=x_range) + geom_histogram(aes(y=..density..), colour='black', fill='white') + geom_density(alpha=0.3, fill='gray20') + - scale_x_continuous(breaks=seq(-400, 200, 200), limits=c(-420, 210)) + + scale_x_continuous(breaks=x_breaks) + labs(x='', y='') p_y <- ggplot(df_mcmc, aes(x=b)) + - my_theme0() + - coord_flip() + + theme_bw(base_size=18) + + theme(axis.title.y=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks.x=element_blank(), axis.ticks.y=element_blank()) + + coord_flip(xlim=y_range) + geom_histogram(aes(y=..density..), colour='black', fill='white') + geom_density(alpha=0.3, fill='gray20') + - scale_x_continuous(breaks=seq(15, 25, 5), limits=c(14.5, 29)) + + scale_x_continuous(breaks=y_breaks) + labs(x='', y='') -p_emp <- ggplot(data.frame(0,0)) + theme_bw() + theme(rect=element_rect(fill='white'), panel.border=element_blank()) - -g_xy <- ggplotGrob(p_xy) -g_x <- ggplotGrob(p_x) -g_y <- ggplotGrob(p_y) -g_emp <- ggplotGrob(p_emp) - -g1 <- cbind(g_x, g_emp, size='first') -g2 <- cbind(g_xy, g_y, size='first') -g <- rbind(g1, g2, size='first') -g$widths[1:3] <- grid::unit.pmax(g1$widths[1:3], g2$widths[1:3]) -g$heights[8:13] <- g$widths[6:11] <- rep(unit(0.5,'mm'), 6) -g$heights[7] <- g$widths[14] <- unit(3,'cm') +p <- wrap_plots( + p_x, plot_spacer(), + p_xy, p_y, + nrow = 2, + widths = c(1, 0.3), + heights = c(0.3, 1) +) -png(file='output/fig4-7.png', res=300, w=1800, h=1800) -grid::grid.draw(g) -dev.off() +ggsave(p, file='output/fig4-7.png', dpi=250, w=6, h=6) diff --git a/chap04/fig4-8.R b/chap04/fig4-8.R index 7c417bd..596c6f5 100644 --- a/chap04/fig4-8.R +++ b/chap04/fig4-8.R @@ -1,5 +1,4 @@ library(ggplot2) -source('../common.R') load('output/result-model4-5.RData') ms <- rstan::extract(fit) @@ -16,21 +15,41 @@ for (i in 1:N_X) { y_mcmc[,i] <- rnorm(n=N_mcmc, mean=y_base_mcmc[,i], sd=ms$sigma) } -customize.ggplot.axis <- function(p) { - p <- p + labs(x='X', y='Y') - p <- p + scale_y_continuous(breaks=seq(from=200, to=1400, by=400)) - p <- p + coord_cartesian(xlim=c(22, 61), ylim=c(200, 1400)) - return(p) -} -d_est <- data.frame.quantile.mcmc(x=X_new, y_mcmc=y_base_mcmc) -p <- ggplot.5quantile(data=d_est) -p <- p + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) -p <- customize.ggplot.axis(p) -ggsave(file='output/fig4-8-left.png', plot=p, dpi=300, w=4, h=3) - -d_est <- data.frame.quantile.mcmc(x=X_new, y_mcmc=y_mcmc) -p <- ggplot.5quantile(data=d_est) -p <- p + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) -p <- customize.ggplot.axis(p) -ggsave(file='output/fig4-8-right.png', plot=p, dpi=300, w=4, h=3) +## プロットするために50%信用区間と95%信用区間を作ります(ここが1番大切です) +# y_base_mcmcは行方向がMCMCサンプル、列方向がX方向です。 +# applyの2つめの引数である「2」は, 列方向「2」を残して行方向「1」についてまとめるという意味です。 +# data.frame関数の引数「check.names = FALSE」は数字はじまりや%を含む文字列もそのまま列名にするという意味です。 +# 個人的には変な変換されるよりそのままの列名の方が分かりやすいのでそのようにしています。 +# ただし、数字始まりや%を含む列名を指定するときはバッククォートで囲む必要があります(後述)。 +qua <- apply(y_base_mcmc, 2, quantile, probs=c(0.025, 0.25, 0.50, 0.75, 0.975)) +d_est <- data.frame(X=X_new, t(qua), check.names = FALSE) + +## plot +# ggplot(data = d_est, aes(...)) のようにggplot関数の中でdataを指定する方法もありますが、 +# 複数種類以上のデータを使う場合には、各geom_xxx関数の中でdataを指定する方が分かりやすいかもしれないと思ったのでそのようにしました。 +p <- ggplot() + + theme_bw(base_size=18) + + geom_ribbon(data=d_est, aes(x=X, ymin=`2.5%`, ymax=`97.5%`), fill='black', alpha=1/6) + + geom_ribbon(data=d_est, aes(x=X, ymin=`25%`, ymax=`75%`), fill='black', alpha=2/6) + + geom_line(data=d_est, aes(x=X, y=`50%`), size=1) + + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) + + coord_cartesian(xlim=c(22, 61), ylim=c(200, 1400)) + + scale_y_continuous(breaks=seq(from=200, to=1400, by=400)) + + labs(y='Y') +ggsave(p, file='output/fig4-8-left.png', dpi=300, w=4, h=3) + + +qua <- apply(y_mcmc, 2, quantile, probs=c(0.025, 0.25, 0.50, 0.75, 0.975)) +d_est <- data.frame(X=X_new, t(qua), check.names = FALSE) + +p <- ggplot() + + theme_bw(base_size=18) + + geom_ribbon(data=d_est, aes(x=X, ymin=`2.5%`, ymax=`97.5%`), fill='black', alpha=1/6) + + geom_ribbon(data=d_est, aes(x=X, ymin=`25%`, ymax=`75%`), fill='black', alpha=2/6) + + geom_line(data=d_est, aes(x=X, y=`50%`), size=1) + + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) + + coord_cartesian(xlim=c(22, 61), ylim=c(200, 1400)) + + scale_y_continuous(breaks=seq(from=200, to=1400, by=400)) + + labs(y='Y') +ggsave(p, file='output/fig4-8-right.png', dpi=300, w=4, h=3) diff --git a/chap04/output/fig4-2.png b/chap04/output/fig4-2.png index 365fd28..c0526e6 100644 Binary files a/chap04/output/fig4-2.png and b/chap04/output/fig4-2.png differ diff --git a/chap04/output/fig4-3-left.png b/chap04/output/fig4-3-left.png index e7bd085..c3e7da6 100644 Binary files a/chap04/output/fig4-3-left.png and b/chap04/output/fig4-3-left.png differ diff --git a/chap04/output/fig4-3-right.png b/chap04/output/fig4-3-right.png index 351426b..c544ebd 100644 Binary files a/chap04/output/fig4-3-right.png and b/chap04/output/fig4-3-right.png differ diff --git a/chap04/output/fig4-3.png b/chap04/output/fig4-3.png index 15322ed..907b71b 100644 Binary files a/chap04/output/fig4-3.png and b/chap04/output/fig4-3.png differ diff --git a/chap04/output/fig4-4.png b/chap04/output/fig4-4.png index 4b90a9e..8f1c3e1 100644 Binary files a/chap04/output/fig4-4.png and b/chap04/output/fig4-4.png differ diff --git a/chap04/output/fig4-7.png b/chap04/output/fig4-7.png index 17c9c6d..7bdaece 100644 Binary files a/chap04/output/fig4-7.png and b/chap04/output/fig4-7.png differ diff --git a/chap04/output/fig4-8-left-2.png b/chap04/output/fig4-8-left-2.png index 47562b3..0807b50 100644 Binary files a/chap04/output/fig4-8-left-2.png and b/chap04/output/fig4-8-left-2.png differ diff --git a/chap04/output/fig4-8-left.png b/chap04/output/fig4-8-left.png index 0152a74..71f329e 100644 Binary files a/chap04/output/fig4-8-left.png and b/chap04/output/fig4-8-left.png differ diff --git a/chap04/output/fig4-8-right-2.png b/chap04/output/fig4-8-right-2.png index f377507..fd60be3 100644 Binary files a/chap04/output/fig4-8-right-2.png and b/chap04/output/fig4-8-right-2.png differ diff --git a/chap04/output/fig4-8-right.png b/chap04/output/fig4-8-right.png index c9283ed..061e22d 100644 Binary files a/chap04/output/fig4-8-right.png and b/chap04/output/fig4-8-right.png differ diff --git a/chap04/output/fig4-8.png b/chap04/output/fig4-8.png index 52861b6..c6afccc 100644 Binary files a/chap04/output/fig4-8.png and b/chap04/output/fig4-8.png differ diff --git a/chap04/rstan-save-diagnostics.R b/chap04/rstan-save-diagnostics.R index 59a3093..a8adf29 100644 --- a/chap04/rstan-save-diagnostics.R +++ b/chap04/rstan-save-diagnostics.R @@ -1,14 +1,18 @@ library(rstan) -library(ggmcmc) load('output/result-model4-5.RData') -write.table(data.frame(summary(fit)$summary), - file='output/fit-summary.txt', sep='\t', quote=FALSE, col.names=NA) +write.table(data.frame(summary(fit)$summary, check.names=FALSE), + file='output/fit-summary.csv', sep=',', quote=TRUE, col.names=NA) + +library(ggmcmc) ggmcmc(ggs(fit, inc_warmup=TRUE, stan_include_auxiliar=TRUE), file='output/fit-traceplot.pdf', plot='traceplot') - ggmcmc(ggs(fit), file='output/fit-ggmcmc.pdf') -# ggmcmc(ggs(fit), file='output/fit-ggmcmc.pdf', -# plot=c('traceplot', 'density', 'running', 'autocorrelation')) + + +library(coda) +pdf(file='output/fit-traceplot-coda.pdf') +plot(As.mcmc.list(fit)) +dev.off() diff --git a/chap04/run-model4-4.R b/chap04/run-model4-4.R index fcff967..870b995 100644 --- a/chap04/run-model4-4.R +++ b/chap04/run-model4-4.R @@ -7,38 +7,31 @@ data <- list(N=nrow(d), X=d$X, Y=d$Y, N_new=length(X_new), X_new=X_new) fit <- stan(file='model/model4-4.stan', data=data, seed=1234) ms <- rstan::extract(fit) -data.frame.quantile.mcmc <- function(x, y_mcmc, - probs=c(2.5, 25, 50, 75, 97.5)/100) { - qua <- apply(y_mcmc, 2, quantile, probs=probs) - d <- data.frame(X=x, t(qua)) - colnames(d) <- c('X', paste0('p', probs*100)) - return(d) -} +qua <- apply(ms$y_base_new, 2, quantile, probs=c(0.025, 0.25, 0.50, 0.75, 0.975)) +d_est <- data.frame(X=X_new, t(qua), check.names = FALSE) -ggplot.5quantile <- function(data) { - p <- ggplot(data=data, aes(x=X, y=p50)) - p <- p + theme_bw(base_size=18) - p <- p + geom_ribbon(aes(ymin=p2.5, ymax=p97.5), fill='black', alpha=1/6) - p <- p + geom_ribbon(aes(ymin=p25, ymax=p75), fill='black', alpha=2/6) - p <- p + geom_line(size=1) - return(p) -} +p <- ggplot() + + theme_bw(base_size=18) + + geom_ribbon(data=d_est, aes(x=X, ymin=`2.5%`, ymax=`97.5%`), fill='black', alpha=1/6) + + geom_ribbon(data=d_est, aes(x=X, ymin=`25%`, ymax=`75%`), fill='black', alpha=2/6) + + geom_line(data=d_est, aes(x=X, y=`50%`), size=1) + + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) + + coord_cartesian(xlim=c(22, 61), ylim=c(200, 1400)) + + scale_y_continuous(breaks=seq(from=200, to=1400, by=400)) + + labs(y='Y') +ggsave(p, file='output/fig4-8-left-2.png', dpi=300, w=4, h=3) -customize.ggplot.axis <- function(p) { - p <- p + labs(x='X', y='Y') - p <- p + scale_y_continuous(breaks=seq(from=200, to=1400, by=400)) - p <- p + coord_cartesian(xlim=c(22, 61), ylim=c(200, 1400)) - return(p) -} -d_est <- data.frame.quantile.mcmc(x=X_new, y_mcmc=ms$y_base_new) -p <- ggplot.5quantile(data=d_est) -p <- p + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) -p <- customize.ggplot.axis(p) -ggsave(file='output/fig4-8-left-2.png', plot=p, dpi=300, w=4, h=3) +qua <- apply(ms$y_new, 2, quantile, probs=c(0.025, 0.25, 0.50, 0.75, 0.975)) +d_est <- data.frame(X=X_new, t(qua), check.names = FALSE) -d_est <- data.frame.quantile.mcmc(x=X_new, y_mcmc=ms$y_new) -p <- ggplot.5quantile(data=d_est) -p <- p + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) -p <- customize.ggplot.axis(p) -ggsave(file='output/fig4-8-right-2.png', plot=p, dpi=300, w=4, h=3) +p <- ggplot() + + theme_bw(base_size=18) + + geom_ribbon(data=d_est, aes(x=X, ymin=`2.5%`, ymax=`97.5%`), fill='black', alpha=1/6) + + geom_ribbon(data=d_est, aes(x=X, ymin=`25%`, ymax=`75%`), fill='black', alpha=2/6) + + geom_line(data=d_est, aes(x=X, y=`50%`), size=1) + + geom_point(data=d, aes(x=X, y=Y), shape=1, size=3) + + coord_cartesian(xlim=c(22, 61), ylim=c(200, 1400)) + + scale_y_continuous(breaks=seq(from=200, to=1400, by=400)) + + labs(y='Y') +ggsave(p, file='output/fig4-8-right-2.png', dpi=300, w=4, h=3)