Skip to content

Commit

Permalink
work on compare pairs
Browse files Browse the repository at this point in the history
  • Loading branch information
tjmahr committed Nov 3, 2017
1 parent f31c3cc commit 4d7aa70
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 51 deletions.
2 changes: 1 addition & 1 deletion R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ compare_pairs <- function(data, levels, values, f = `-`) {
create_pairs()

wide <- data %>%
tidyr::spread(data, !! levels, !! values)
tidyr::spread(key = !! levels, value = !! values)

for (row_i in seq_len(nrow(pairs))) {
pair_i <- pairs[row_i, ]
Expand Down
118 changes: 74 additions & 44 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -109,50 +109,80 @@ to_compare %>%
mutate_if(is.numeric, round, 1)
```

<!-- I use it to compute posterior differences in Bayesian models. For example, let's -->
<!-- fit a Bayesian model of average sepal length for each species in `iris`. -->

<!-- ```{r, results = "hide"} -->
<!-- library(rstanarm) -->
<!-- m <- stan_glm( -->
<!-- Sepal.Length ~ Species - 1, -->
<!-- iris, -->
<!-- family = gaussian, -->
<!-- prior = normal(0, 1), -->
<!-- prior_intercept = normal(0, 1)) -->
<!-- ``` -->

<!-- Now, we have a posterior distributions of species means. -->

<!-- ```{r} -->
<!-- newdata <- data.frame(Species = unique(iris$Species)) -->

<!-- p_means <- posterior_linpred(m, newdata = newdata) %>% -->
<!-- as.data.frame() %>% -->
<!-- tibble::as_tibble() %>% -->
<!-- setNames(newdata$Species) %>% -->
<!-- tibble::rowid_to_column("draw") %>% -->
<!-- tidyr::gather(species, mean, -draw) %>% -->
<!-- print() -->
<!-- ``` -->

<!-- For each posterior sample, we can compute pairwise differences of means. -->

<!-- ```{r pairs, fig.width = 4, fig.height = 2.5} -->
<!-- pair_diffs <- compare_pairs(data, species, mean) %>% -->
<!-- print() -->

<!-- library(ggplot2) -->

<!-- ggplot(pair_diffs) + -->
<!-- aes(x = pair, y = value) + -->
<!-- stat_summary(fun.data = median_hilow, geom = "linerange") + -->
<!-- stat_summary(fun.data = median_hilow, fun.args = list(conf.int = .8), -->
<!-- size = 2, geom = "linerange") + -->
<!-- stat_summary(fun.y = median, size = 5, shape = 3, geom = "point") + -->
<!-- labs(x = NULL, y = "Difference in posterior means") + -->
<!-- coord_flip() -->
<!-- ``` -->
I have used it to compute posterior differences in Bayesian models. For the sake
of example, let's fit a Bayesian model of average sepal length for each species
in `iris`.

```{r, results = "hide"}
library(rstanarm)
m <- stan_glm(
Sepal.Length ~ Species - 1,
iris,
family = gaussian,
prior = normal(0, 1),
prior_intercept = normal(0, 1))
```

Now, we have a posterior distribution of species means.

```{r}
newdata <- data.frame(Species = unique(iris$Species))
p_means <- posterior_linpred(m, newdata = newdata) %>%
as.data.frame() %>%
tibble::as_tibble() %>%
setNames(newdata$Species) %>%
tibble::rowid_to_column("draw") %>%
tidyr::gather(species, mean, -draw) %>%
print()
```

For each posterior sample, we can compute pairwise differences of means with
`compare_means()`.

```{r pairs, fig.width = 4, fig.height = 2.5}
pair_diffs <- compare_pairs(p_means, species, mean) %>%
print()
library(ggplot2)
ggplot(pair_diffs) +
aes(x = pair, y = value) +
stat_summary(fun.data = median_hilow, geom = "linerange") +
stat_summary(fun.data = median_hilow, fun.args = list(conf.int = .8),
size = 2, geom = "linerange") +
stat_summary(fun.y = median, size = 5, shape = 3, geom = "point") +
labs(x = NULL, y = "Difference in posterior means") +
coord_flip()
```

...which should look like the effect ranges in the dummy-coded models.

```{r, results = "hide"}
m2 <- update(m, Sepal.Length ~ Species)
m3 <- update(m, Sepal.Length ~ Species,
data = iris %>% mutate(Species = forcats::fct_rev(Species)))
```

```{r}
# setosa verus others
m2 %>%
posterior_interval(regex_pars = "Species") %>%
round(2)
# virginica versus others
m3 %>%
rstanarm::posterior_interval(regex_pars = "Species") %>%
round(2)
# differences from compare_pairs()
pair_diffs %>%
tidyr::spread(pair, value) %>%
select(-draw) %>%
as.matrix() %>%
posterior_interval() %>%
round(2)
```


### Et cetera
Expand Down
24 changes: 18 additions & 6 deletions tests/testthat/test-dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,29 @@ test_that("sample_n_of() samples n rows if no groups given", {


test_that("compare_pairs() calculates differences in pairs", {
testthat::skip("test not finished")
means <- iris %>%
dplyr::group_by(Species) %>%
summarise(Sepal.Length = mean(Sepal.Length))

dplyr::summarise(Sepal.Length = mean(Sepal.Length))

result <- compare_pairs(means, Species, Sepal.Length)

by_hand <- tapply(iris$Sepal.Length, iris$Species, mean, simplify = FALSE)

pairs <- result$pair %>%
as.character() %>%
strsplit("-") %>%
setNames(result$pair)

for (pair in names(pairs)) {
x1 <- pairs[[pair]][1]
x2 <- pairs[[pair]][2]
by_hand_diff <- by_hand[[x1]] - by_hand[[x2]]
diff <- result[result$pair == pair, "value"] %>%
unlist(use.names = FALSE) %>%
expect_equal(by_hand_diff)
}

# Check pair names
make_pairs_hard_way <- function(xs) {
indices <- seq_along(xs)
heads <- rev(seq_along(xs)[-1])
Expand All @@ -67,8 +80,7 @@ test_that("compare_pairs() calculates differences in pairs", {
}
results
}
pairs <- make_pairs_hard_way(names(by_hand))

result

pair_names <- make_pairs_hard_way(names(by_hand))
expect_true(all(pair_names %in% as.character(result$pair)))
})

0 comments on commit 4d7aa70

Please sign in to comment.