-
Notifications
You must be signed in to change notification settings - Fork 1
/
variance_structures.Rmd
306 lines (255 loc) · 15.1 KB
/
variance_structures.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
---
title: "Variance Structures"
output:
html_document:
includes:
after_body: footer.html
---
```{r, echo=FALSE, warning=FALSE, message=FALSE}
options(knitr.kable.NA = '')
# packages for better formatting tables for html output
library(kableExtra)
library(formattable)
library(htmltools)
pacman::p_load(dplyr, purrr, tibble, tidyr, # data handling
ggplot2, viridis, # plot
nlme, lme4, glmmTMB, sommer, # mixed modelling
AICcmodavg, broom.mixed) # mixed model extractions
{
dat <- agridat::mcconway.turnip %>%
as_tibble() %>%
mutate(densf = density %>% as.factor) %>%
mutate(date_densf = interaction(date, densf)) # needed for mod5
mod1.nlme <- nlme::lme(fixed = yield ~ gen * date * densf,
random = ~ 1|block,
weights = NULL, # default, i.e. homoscedastic errors
data = dat)
mod4.nlme <- mod1.nlme %>%
update(weights = varComb(varIdent(form=~1|date),
varIdent(form=~1|densf)))
mod5.nlme <- mod1.nlme %>%
update(weights = varIdent(form=~1|date_densf))
mod1.nlme.VC <- tibble(grp="homoscedastic", varStruct=1) %>%
mutate(sigma = mod1.nlme$sigma) %>%
mutate(StandardError = sigma*varStruct) %>%
mutate(Variance = StandardError^2)
mod4.nlme.varStruct.A <- mod4.nlme$modelStruct$varStruct$A %>%
coef(unconstrained=FALSE, allCoef=TRUE) %>%
enframe(name="grpA", value="varStructA")
mod4.nlme.varStruct.B <- mod4.nlme$modelStruct$varStruct$B %>%
coef(unconstrained=FALSE, allCoef=TRUE) %>%
enframe(name="grpB", value="varStructB")
mod4.nlme.VC <- expand.grid(mod4.nlme.varStruct.A$grpA,
mod4.nlme.varStruct.B$grpB,
stringsAsFactors=FALSE) %>%
rename(grpA=Var1, grpB=Var2) %>%
left_join(x=., y=mod4.nlme.varStruct.A, by="grpA") %>%
left_join(x=., y=mod4.nlme.varStruct.B, by="grpB") %>%
mutate(sigma = mod4.nlme$sigma) %>%
mutate(StandardError = sigma*varStructA*varStructB) %>%
mutate(Variance = StandardError^2)
mod5.nlme.VC <- mod5.nlme$modelStruct$varStruct %>%
coef(unconstrained=FALSE, allCoef=TRUE) %>%
enframe(name="grp", value="varStruct") %>%
separate(grp, sep="[.]", into=c("grpA","grpB")) %>%
mutate(sigma = mod5.nlme$sigma) %>%
mutate(StandardError = sigma*varStruct) %>%
mutate(Variance = StandardError^2)
} # get nlme VC from het diag example
setup <- dat %>%
filter(block=="B1") %>%
arrange(date, density) %>%
dplyr::select(-yield) %>%
mutate(label=paste0(date_densf,".",block),
nID=1:n())
{
mat1 <- left_join(x = setup %>% mutate(grp="homoscedastic"),
y = mod1.nlme.VC %>% dplyr::select(-StandardError) ,
by = c("grp"="grp")) %>%
left_join(x = expand.grid(X=.$nID, Y=.$nID),
y = .,
by = c("X"="nID")) %>% as_tibble() %>%
mutate(Variance = case_when(X==Y~Variance, TRUE~NA_real_))
mat4 <- left_join(x = setup,
y = mod4.nlme.VC %>% dplyr::select(-StandardError),
by = c("date"="grpA", "densf"="grpB")) %>%
left_join(x = expand.grid(X=.$nID, Y=.$nID),
y = .,
by = c("X"="nID")) %>% as_tibble() %>%
mutate(Variance = case_when(X==Y~Variance, TRUE~NA_real_))
mat5 <- left_join(x = setup,
y = mod5.nlme.VC %>% dplyr::select(-StandardError),
by = c("date"="grpA", "densf"="grpB")) %>%
left_join(x = expand.grid(X=.$nID, Y=.$nID),
y = .,
by = c("X"="nID")) %>% as_tibble() %>%
mutate(Variance = case_when(X==Y~Variance, TRUE~NA_real_))
} # get mat1 - mat5
```
Here you will find a list of variance structures (*a.k.a* covariance structures, variance-covariance-structures, correlation structures) with short explanations and possibly examples.
# IID - Independent & Identically Distributed
**This covariance structure has homogeneous variances and zero correlation between elements.**
Number of parameters = 1 (*i.e.* $\sigma$)
$$
\left(\begin{array}{cc}
\sigma^2 & 0 & 0 & 0\\
& \sigma^2 & 0 & 0\\
& & \sigma^2 & 0\\
& & & \sigma^2\\
\end{array}\right) =
\left(\begin{array}{cc}
1 & 0 & 0 & 0\\
& 1 & 0 & 0\\
& & 1 & 0\\
& & & 1\\
\end{array}\right) \sigma^2
$$
This is the simplest covariance structure and the default setting in most, if not all, linear modelling packages.
# Diagonal
**This covariance structure has heterogenous variances and zero correlation between elements.**
Number of parameters: $t$ (*i.e.* $\sigma_1$, $\sigma_2$, ..., $\sigma_t$), which is the overall dimension of the covariance matrix (*e.g.* number of treatement levels).
$$
\left(\begin{array}{cc}
\sigma_1^2 & 0 & 0 & 0\\
& \sigma_2^2 & 0 & 0\\
& & \sigma_3^2 & 0\\
& & & \sigma_4^2\\
\end{array}\right) =
\left(\begin{array}{cc}
1 & 0 & 0 & 0\\
& k_1 & 0 & 0\\
& & k_2 & 0\\
& & & k_3\\
\end{array}\right) \sigma^2
$$
In our [chapter on heterogeneous error variances](heterogeneous_error_variance.html){target="_blank"}, we fit models in which we allow for different error variances for two of the treatments. Thus, the off-diagonals are all 0, but there are multiple variances on the diagonal. More specifically, in `mod5` we allows for 8 different error variances - one for each factor-level-combination of the respective factor effects `date` and `density`. This is visualized in the plot below as 8 different colors. Speaking in the [syntax of `nlme`](https://cran.r-project.org/web/packages/nlme/nlme.pdf#Rfn.nlmeStruct.1){target="_blank"}, we obtain 8 parameter estimates: 1 estimate for the [model-object's `sigma`](https://cran.r-project.org/web/packages/nlme/nlme.pdf#Rfn.nlmeObject.1){target="_blank"} (= standard deviation for error term) and 7 estimates (that are different from 1) in the [model-object's `varStruct`](https://cran.r-project.org/web/packages/nlme/nlme.pdf#Rfn.nlmeStruct.1){target="_blank"} as can be seen on the y-axis:
```{r, echo=FALSE}
MAT <- mat5
labs <- MAT %>%
dplyr::select(X, date_densf, gen, varStruct) %>% unique
ggplot(data=MAT, aes(x=X, y=Y)) + coord_fixed() +
geom_tile(aes( fill=Variance), color="grey") +
scale_fill_viridis(option="D", na.value="white",
guide ="legend", name="Variance\nEstimate",
breaks = MAT %>% pull(Variance) %>% unique %>% sort,
labels = scales::number_format(accuracy = 0.01)) +
scale_x_continuous(name = "date-density-combination",
expand = c(0,0),
breaks = labs %>% pull(X),
labels = labs %>% pull(date_densf),
sec.axis = sec_axis(~ .,
name = "genotype",
breaks = labs %>% pull(X),
labels = labs %>% pull(gen))) +
scale_y_continuous(name = "VarStruct date-density-combination",
trans = "reverse",
expand = c(0,0),
breaks = labs %>% pull(X),
labels = labs %>% pull(varStruct) %>% round(2)) +
theme(legend.text.align = 1,
axis.ticks.x = element_line(color="grey"),
axis.title.x = element_text(color="grey"),
axis.text.x = element_text(angle=90, hjust=1, color="grey"),
legend.position = "right")
```
> In order to give a clearer picture, the variance matrix presented here was reduced to data of a single block in order to have dimensions 16x16. Since there were [4 complete blocks in the dataset](heterogeneous_error_variance.html){target="_blank"}, the entire variance matrix of the error term has dimensions 64x64. However, given that data/errors are sorted accordingly, our presented matrix is simply 1 out of 4 blocks in a [block diagonal matrix](https://www.wikiwand.com/en/Block_matrix#/Block_diagonal_matrices){target="_blank"}.
# First order autoregressive AR(1)
**This covariance structure has homogeneous variances, while the correlation between any two elements gets smaller the further apart they are separated (e.g. in terms of time or space).**
Number of parameters: 2 (*i.e.* $\sigma$ and $\rho$).
$$
\left(\begin{array}{cc}
\sigma^2 & \sigma^2\rho & \sigma^2\rho^2 & \sigma^2\rho^3\\
& \sigma^2 & \sigma^2\rho & \sigma^2\rho^2\\
& & \sigma^2 & \sigma^2\rho\\
& & & \sigma^2\\
\end{array}\right) =
\left(\begin{array}{cc}
1 & \rho & \rho^2 & \rho^3\\
& 1 & \rho & \rho^2\\
& & 1 & \rho\\
& & & 1\\
\end{array}\right) \sigma^2
$$
As can be seen, the correlation between any two elements can be described more speficically as: it is equal to $\rho$ for adjacent elements, $\rho^2$ for elements that are separated by a third, and so on. Note that since it is a correlation, we have -1 < $\rho$ < 1 and therefore $\rho$ indeed gets smaller when squared etc. This correlation model is useful, if all time points are equally spaced.
As an explicit example, take the correlation between errors of two adjacent time points, in this case weeks, to be $\rho=0.9$. The correlation between errors that are two weeks apart is then $\rho^2=0.9^2=0.81$ and when they are three weeks apart it is $\rho^3=0.9^3=0.729$ and so on. Thus, the advantage here is that correlations become smaller over time and thus we have different correlation estimates, yet only a single correlation parameter $\rho$ is fitted in the model.
# Multiplicative
**It is possible to combine any two or more variance structures via direct multiplication *a.k.a.* the [Kronecker product](https://www.wikiwand.com/en/Kronecker_product){target="_blank"}.**
$$
\left(\begin{array}{cc}
1 & 0 \\
& k_1 \\
\end{array}\right)
\otimes
\left(\begin{array}{cc}
1 & 0 & 0 \\
& k_2 & 0 \\
& & k_3 \\
\end{array}\right) \sigma^2
=
\left(\begin{array}{cc}
1\cdot1 & 0 & 0 & 0 & 0 & 0 \\
& 1\cdot k_2 & 0 & 0 & 0 & 0 \\
& & 1\cdot k_3 & 0 & 0 & 0 \\
& & & k_1\cdot 1 & 0 & 0 \\
& & & & k_1\cdot k_2 & 0 \\
& & & & & k_1\cdot k_3 \\
\end{array}\right) \sigma^2
$$
This operation on two matrices of arbitrary size resulting in a block matrix is sometimes denoted by $\otimes$. To give an example, we refer to `mod4` in the [chapter on heterogeneous error variances](heterogeneous_error_variance.html){target="_blank"}. Here, a multiplicative variance structure results from the kronecker product of two diagonal variance structures. The first diagonal variance structure allows for different variances for the 2 levels of `date`, while the second diagonal variance structure allows for different variances for the 4 levels of `density`. Their Kronecker product therefore results in 8 different variances, visualized in the plot below as 8 different colors.
```{r, echo=FALSE}
MAT <- mat4
labs <- MAT %>%
dplyr::select(X, date_densf, gen, varStructA, varStructB) %>% unique
ggplot(data=MAT, aes(x=X, y=Y)) + coord_fixed() +
geom_tile(aes( fill=Variance), color="grey") +
scale_fill_viridis(option="D", na.value="white",
guide ="legend", name="Variance\nEstimate",
breaks = MAT %>% pull(Variance) %>% unique %>% sort,
labels = scales::number_format(accuracy = 0.01)) +
scale_x_continuous(name = "date-density-combination",
expand = c(0,0),
breaks = labs %>% pull(X),
labels = labs %>% pull(date_densf),
sec.axis = sec_axis(~ .,
name = "genotype",
breaks = labs %>% pull(X),
labels = labs %>% pull(gen))) +
scale_y_continuous(name = "VarStruct density",
trans = "reverse",
expand = c(0,0),
breaks = labs %>% pull(X),
labels = labs %>% pull(varStructB) %>% round(2),
sec.axis = sec_axis(~ .,
name = "VarStruct date",
breaks = labs %>% pull(X),
labels = labs %>% pull(varStructA) %>% round(2))) +
theme(legend.text.align = 1,
axis.ticks.x = element_line(color="grey"),
axis.title.x = element_text(color="grey"),
axis.text.x = element_text(angle=90, hjust=1, color="grey"),
legend.position = "right")
```
## Advantage
One may now ask where the difference lies between this multiplicative variance structure for `mod4` on the one hand, and the simple diagonal variance structure for all 8 `date`-`density`-combinations in `mod5` (see [diagonal section above](#Diagonal)) on the other hand. The question comes intuitively, since both lead to obtaining 8 different variance estimates for the error term. However, while the combinations for which the 8 estimates are obtained are the same, the estimates themselves are different between `mod4` and `mod5`. In order to understand this, one must realize that fewer parameters need to be estimated here for `mod4` (= 6 parameters) compared to the simple diagonal variance structure for [`mod5`](#Diagonal) (= 8 parameters) - even though both result in 8 different variance estimates! One can retrace this manually by counting the number of `varStruct` values on the y-axes of the two plots. There should be 5 values for `mod4` and 7 values for `mod5` **that are not equal to 1** and in addition, [`sigma`](https://cran.r-project.org/web/packages/nlme/nlme.pdf#Rfn.nlmeObject.1){target="_blank"} (= standard deviation for error term) itself is the *missing* parameter here.
Therefore, **direct multiplication can lead to the desired structure with fewer parameters needing to be estimated**. Notice that the number of parameters penalizes the AIC and therefore has a direct impact on model selection decisions. In the underlying [chapter on heterogeneous error variances](heterogeneous_error_variance.html){target="_blank"}, `mod4` (= multiplicative) is indeed chosen over `mod5` based on the AIC.
# Summary
```{r, echo=FALSE}
tribble(
~`Variance Structure`, ~total, ~var, ~cor,~nlme, ~lme4, ~glmmTMB, ~sommer, ~SAS,
"Identitiy", "1", "1", "0" ,"default" ,"default","default","default","VC",
"Diagonal", "t", "t", "0" ,"varIdent", "--", "diag", "ds","UN(1)",
"First order autoregressive ", "2", "1", "1" ,"corAR1" , "--", "ar1", "AR1","AR(1)"
) %>%
kable(escape = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = FALSE) %>%
add_header_above(c(" ",
"number of parameters"=3,
"name in package"=5)) %>%
footnote(general = "t = overall dimension of the covariance matrix (e.g. number of treatement levels).")
```
# More on this
[ASReml-R documentation](https://asreml.kb.vsni.co.uk/wp-content/uploads/sites/3/2018/02/ASReml-R-Reference-Manual-4.pdf#section.4.2){target="_blank"}
[SAS documentation](https://documentation.sas.com/?docsetId=statug&docsetTarget=statug_mixed_syntax14.htm&docsetVersion=14.3&locale=en#statug.mixed.repeatedstmt_type){target="_blank"}
[SPSS documentation](https://www.ibm.com/support/knowledgecenter/SSLVMB_23.0.0/spss/advanced/covariance_structures.html){target="_blank"}
<span style="color:red">in progress</span>