-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmaster_presentation.tex
354 lines (314 loc) · 12.1 KB
/
master_presentation.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
% !TEX program = xelatex
\documentclass[10pt, dvipsnames]{beamer}
\usetheme[progressbar=frametitle]{metropolis}
\usepackage{appendixnumberbeamer}
\usepackage{booktabs}
\usepackage[ngerman]{babel}
\usepackage[ngerman]{varioref}
\usepackage[scale=2]{ccicons}
\usepackage[absolute,overlay]{textpos}
\usepackage{graphicx}
\usepackage{amsmath}
\usepackage[justification=centering]{caption}
\usepackage[autostyle]{csquotes}
\usepackage{natbib}
\usepackage{appendixnumberbeamer}
\usepackage{listings} %% for R Code
\setbeamercolor{background canvas}{bg=white}
%%% BibTex Style
\setcitestyle{authoryear, open = { ( }, close = { ) }}
\def\bibfont{\small} % smaller bibliography
%%% Listings options
\lstdefinelanguage{Renhanced}[]{R}{
otherkeywords={!,!=,~,*,\&,\%/\%,\%*\%,\%\%,<-,<<-, ::},
morekeywords={},
deletekeywords={hist, runif, plot, data, family , read.table, read, check, text, file, attributes, quote, missing, c, list, any, which, na, deparse, structure, install},
alsoletter={.\%},%
alsoother={:_\$}}
\lstset{
language=Renhanced, % the language of the code
basicstyle=\small\ttfamily, % the size of the fonts that are used for the code
numbers=left, % where to put the line-numbers
numberstyle=\tiny\color{Blue}, % the style that is used for the line-numbers
stepnumber=1, % the step between two line-numbers. If it is 1, each line will be numbered
numbersep=10pt, % how far the line-numbers are from the code
backgroundcolor=\color{white}, % choose the background color. You must add \usepackage{color}
showspaces=false, % show spaces adding particular underscores
showstringspaces=false, % underline spaces within strings
showtabs=false, % show tabs within strings adding particular underscores
frame=false, % adds a frame around the code
rulecolor=\color{black}, % if not set, the frame-color may be changed on line-breaks within not-black text (e.g. commens (green here))
tabsize=2, % sets default tabsize to 2 spaces
captionpos=b, % sets the caption-position to bottom
breaklines=true, % sets automatic line breaking
breakatwhitespace=false, % sets if automatic breaks should only happen at whitespace
keywordstyle=\color{RoyalBlue}, % keyword style
commentstyle=\color{YellowGreen}, % comment style
stringstyle=\color{ForestGreen} % string literal style
}
\renewcommand{\lstlistingname}{Code-Chunk}
%%% New commands
\newcommand{\li}{\lstinline}
\addto{\captionsngerman}{%
\renewcommand*{\contentsname}{Inhalt}
\renewcommand*{\listfigurename}{Figures}
\renewcommand*{\listtablename}{Table}
\renewcommand*{\figurename}{Figure}
}
\setsansfont[BoldFont={Fira Sans SemiBold}]{Fira Sans Book}
\usepackage{pgfplots}
\usepgfplotslibrary{dateplot}
\usepackage{xspace}
\newcommand{\themename}{\textbf{\textsc{metropolis}}\xspace}
\title{bamlss.vis}
\subtitle{An R Package to Interactively Analyze and Visualize Bayesian Additive Models for Location, Scale and Shape (bamlss) Using the Shiny Framework}
\date{7. December 2017}
\author{Stanislaus Stadlmann}
\institute{Georg-August University of Göttingen}
\titlegraphic{
\vspace{-0.2cm}~%
\hspace*{1.25cm}~%
\includegraphics[height = 1.4cm]{images/00_logo.png}
}
\begin{document}
\maketitle
\begin{frame}{Table of contents}
\setbeamertemplate{section in toc}[sections numbered]
\tableofcontents
\end{frame}
\section{Introduction}
\begin{frame}{Introduction}
\textbf{Distributional Regression} \\
\begin{itemize}
\item An emerging field in regression methods
\item Each parameter of a response distribution beyond the mean can be modeled using a set of predictors
\pause
\item Notable frameworks:
\begin{enumerate}
\item Generalized Additive Models for Location, Scale and Shape, coined by \citet*{gamlss2001}
\item Bayesian Additive Models for Location, Scale and Shape, coined by \citet*{bamlss2017}
\end{enumerate}
\item Differences: Estimation techniques - Likelihood/Bayes
\end{itemize}
\end{frame}
\begin{frame}{Introduction}
\textbf{bamlss.vis} \\
\begin{itemize}
\item R package based on the Shiny framework
\item Built upon R package \li{bamlss}
\item Requires a fitted \li{bamlss} object
\item Yields the abilities to
\begin{enumerate}
\item visualize predictions for user-chosen covariate combinations
\item visualize the influence of a certain covariate on distributional moments
\end{enumerate}
\end{itemize}
\end{frame}
\section{BAMLSS ancestry}
\begin{frame}{Additive Models (AM)}
\textbf{Overview} \\
\begin{itemize}
\item Proposed by \citet*{friedman1981projection}
\item Dependent variable $y$ is related to non-parametric effects in an additive way
\end{itemize}
\pause
\textbf{Model specification}
\begin{equation*}
\begin{split}
y_i & = f_1(z_{i1}) + f_2(z_{i2}) + \ldots + f_K(z_{iK}) + \epsilon_i \quad \text{(only nonparametric effects)} \\
y_i & = \sum\limits_{j = 1}^K f_j(z_{ij}) + \sum\limits_{l = 1}^Q \beta_l x_{il} + \epsilon_i \quad \text{(with parametric effects)}
\end{split}
\end{equation*}
\pause
\textbf{Why additive?}
\begin{itemize}
\item Curse of dimensionality
\item Easier to separate covariate effects
\end{itemize}
\end{frame}
\begin{frame}{Structured Additive Regression (STAR) Models}
\textbf{Motivation} \\
\begin{itemize}
\item AM allow for non-parametric effects, but sometimes even more flexibility is needed
\item STAR \citep{fahrmeir2003} also support structured terms, which include:
\pause
\begin{enumerate}
\item Nonlinear effects of a single variable
\item Spatial effects of location index $s$
\item Interactions between a continuous covariate and a categorical variable
\item Nonlinear interactions between two continuous covariates
\item Random Effects with intercept $\nu_0$ and slope $\nu_j$ deviations from main effects
\end{enumerate}
\end{itemize}
\pause
\textbf{Model specification}
\begin{equation*}
y_i = \underbrace{\kappa_i^{add}}_{\text{AM components}} + \: f_{struc}(\mathbf{z}_{iF}) + \epsilon_i
\end{equation*}
where $\mathbf{z}_{F}$ can be a one- or multidimensional variable.
\end{frame}
\begin{frame}{Generalized STAR Models}
\textbf{Motivation} \\
\begin{itemize}
\item AM and STAR assume normalty and directly model $E(y)$
\item Generalized STAR models use link function $g(\cdot)$ of Generalized Linear Models
\item Adds ability to model $E(y)$ of all exponential families, e.g. binomial or poisson distribution
\end{itemize}
\pause
\textbf{Model specification}
\begin{equation*}
\begin{split}
g(\mu_i) & = \eta_i \\
\eta_i & = f_1(\mathbf{z}_{i1}) + \ldots + f_J(\mathbf{z}_{iJ})
\end{split}
\end{equation*}
where $f_j(\cdot)$ can be any structured effect.
\end{frame}
\begin{frame}{Structured Additive Distributional Regression}
\textbf{Motivation} \\
\begin{itemize}
\item Often, more than just the location (Expected Value) of a distribution is of interest
\item Scale/Shape (Variance, Kurtosis) might also be dependent on covariates
\item Structured Additive Distributional Regression allows modeling of all distributional parameters $\theta_l$
\end{itemize}
\pause
\textbf{Model specification}\\
Let $y \sim D(\theta_1, \ldots, \theta_L)$. Then:
\begin{equation*}
\begin{split}
g_{l}(\theta_{il}) & = \eta_{il} \\
\eta_{il} & = f_{1l}(\mathbf{z}_{i1l}) + \ldots + f_{J_{l}l}(\mathbf{z}_{iJ_{l}l})
\end{split}
\end{equation*}
where every $\theta_l$ can be modeled with effect types of different subsets of $\mathbf{Z}$.
\end{frame}
\begin{frame}{Bayesian Models for Location, Scale and Shape}
\textbf{Overview} \\
\begin{itemize}
\item Coined by \citet*{bamlss2017}
\item Bayesian variant of Structured Additive Distributional Regression
\item \enquote{Full} Bayesian inference with
\begin{enumerate}
\item Posterior distribution maximisation and
\item Markov Chain Monte Carlo Sampling
\end{enumerate}
\end{itemize}
\pause
\textbf{Differences/Advantages over GAMLSS} \\
\begin{itemize}
\item Valid credible intervals in comparison to CI based on asymptotics
\item Structured Additive Effects
\item Support of Multivariate Distributions
\end{itemize}
\textbf{but} \\
\begin{itemize}
\item Slower estimation
\end{itemize}
\end{frame}
\section{Motivation for bamlss.vis}
\begin{frame}{Motivation for bamlss.vis}
\textbf{Problem} \\
\begin{itemize}
\item Often, distribution parameters $\theta_l$ do not directly equate to $E(y)$, $Var(y)$
\pause
\item Therefore hard to know influence of covariates on moments because:
\begin{enumerate}
\item Link function $h_l(\cdot)$ transforms effects
\item Transformed effects are for parameters $\theta_l$, which are often not directly moments
\end{enumerate}
\end{itemize}
\end{frame}
\begin{frame}{Motivation for bamlss.vis}
\textbf{An example} \\
Consider the censored normal distribution $y^* \sim CN(\mu = 0, \sigma^2 = 1)$ with cut-off point $a = 0$. \\[0.5cm]
\begin{columns}[T, onlytextwidth]
\column{0.5\textwidth}
\textbf{The Problem} \\
\begin{itemize}
\item Blue line depicts the expected value
\item Parameters $\mu$ and $\sigma^2$ are not the first two moments of the CN distribution!
\end{itemize}
$\Rightarrow$ Any predicted parameters $\hat{\mu}$ and $\hat{\sigma^2}$ need transformation to $E(y^*) / V(y^*)$
\column{0.5\textwidth}
\begin{figure}
\begin{centering}
\includegraphics[scale = 0.65]{images/030_cnorm_plot.pdf}
\caption{PDF of CN with expected value as blue line.}
\label{cnorm_plot}
\end{centering}
\end{figure}
\end{columns}
\end{frame}
\begin{frame}[t]{Motivation for bamlss.vis}
\textbf{Solution} \\
\begin{itemize}
\item Thus: Package needed which
\begin{enumerate}
\item Makes it easy to graphically display and compare predicted distributions
\item Displays the influence of a covariate on the distributional moments
\end{enumerate}
\item $\Rightarrow$ \li{bamlss.vis} was born, solving these problems in R with a Shiny App.
\end{itemize}
\end{frame}
\section{Case-Study}
\begin{frame}{Case-Study}
$\Rightarrow$ Use real data to illustrate \li{bamlss.vis}' capabilities \\[0.5cm]
\textbf{The Data}
\begin{itemize}
\item \li{Wage} dataset, by \citet*{uscensus}
\item Depicts yearly income in 1000\$ of males from the US East Coast based on:
\begin{enumerate}
\item \textbf{age}
\item \textbf{year}
\item \textbf{race}
\item \textbf{education}
\item \textbf{health}
\end{enumerate}
\end{itemize}
\end{frame}
\begin{frame}{Case-Study}
\textbf{First look}
\begin{figure}
\begin{centering}
\includegraphics[scale = 0.55]{images/030_wage_plot.pdf}
\caption{Gaussian kernel density estimates for wages split up by education level.}
\label{wage_density}
\end{centering}
\end{figure}
$\Rightarrow$ Model both $\mu$ and $\sigma^2$ depending on education level
\end{frame}
\begin{frame}[fragile]{Case-Study}
\textbf{The model}
\begin{lstlisting}[caption = R code for fitting the bamlss based on Wage dataset, label = bamlss_fitting]
cnorm_model <- bamlss(
list(wage ~ s(age) + race + year + education + health,
sigma ~ s(age) + race + year + education + health),
data = wage_sub,
family = cnorm_bamlss()
)
\end{lstlisting}
\end{frame}
\section{bamlss.vis}
\begin{frame}{bamlss.vis}
\centering
Let's start up \li{bamlss.vis}!
\end{frame}
\begin{frame}[fragile]{bamlss.vis}
\textbf{Installation} \\
You can install \li{bamlss.vis} today! Run the following code:
\begin{lstlisting}
if (!require(devtools))
install.packages("devtools")
devtools::install_github("Stan125/bamlss.vis")
\end{lstlisting}
\end{frame}
\begin{frame}{Thanks!}
\centering
Thanks for your attention!
\end{frame}
\appendix
\begin{frame}[allowframebreaks]{References}
\bibliography{bibliography}
\bibliographystyle{plainnat}
\end{frame}
\end{document}