work on compare pairs

amrcode1 · Nov 3, 2017 · 4d7aa70 · 4d7aa70
1 parent f31c3cc
commit 4d7aa70
Show file tree

Hide file tree

Showing 3 changed files with 93 additions and 51 deletions.
diff --git a/R/dplyr.R b/R/dplyr.R
@@ -83,7 +83,7 @@ compare_pairs <- function(data, levels, values, f = `-`) {
     create_pairs()
 
   wide <- data %>%
-    tidyr::spread(data, !! levels, !! values)
+    tidyr::spread(key = !! levels, value = !! values)
 
   for (row_i in seq_len(nrow(pairs))) {
     pair_i <- pairs[row_i, ]

diff --git a/README.Rmd b/README.Rmd
@@ -109,50 +109,80 @@ to_compare %>%
   mutate_if(is.numeric, round, 1)
 ```
 
-<!-- I use it to compute posterior differences in Bayesian models. For example, let's -->
-<!-- fit a Bayesian model of average sepal length for each species in `iris`. -->
-
-<!-- ```{r, results = "hide"} -->
-<!-- library(rstanarm) -->
-<!-- m <- stan_glm( -->
-<!--   Sepal.Length ~ Species - 1, -->
-<!--   iris, -->
-<!--   family = gaussian, -->
-<!--   prior = normal(0, 1), -->
-<!--   prior_intercept = normal(0, 1)) -->
-<!-- ``` -->
-
-<!-- Now, we have a posterior distributions of species means. -->
-
-<!-- ```{r} -->
-<!-- newdata <- data.frame(Species = unique(iris$Species)) -->
-
-<!-- p_means <- posterior_linpred(m, newdata = newdata) %>% -->
-<!--   as.data.frame() %>% -->
-<!--   tibble::as_tibble() %>% -->
-<!--   setNames(newdata$Species) %>% -->
-<!--   tibble::rowid_to_column("draw") %>% -->
-<!--   tidyr::gather(species, mean, -draw) %>% -->
-<!--   print() -->
-<!-- ``` -->
-
-<!-- For each posterior sample, we can compute pairwise differences of means. -->
-
-<!-- ```{r pairs, fig.width = 4, fig.height = 2.5} -->
-<!-- pair_diffs <- compare_pairs(data, species, mean) %>% -->
-<!--   print() -->
-
-<!-- library(ggplot2) -->
-
-<!-- ggplot(pair_diffs) + -->
-<!--   aes(x = pair, y = value) + -->
-<!--   stat_summary(fun.data = median_hilow, geom = "linerange") + -->
-<!--   stat_summary(fun.data = median_hilow, fun.args = list(conf.int = .8), -->
-<!--                size = 2, geom = "linerange") + -->
-<!--   stat_summary(fun.y = median, size = 5, shape = 3, geom = "point") + -->
-<!--   labs(x = NULL, y = "Difference in posterior means") + -->
-<!--   coord_flip() -->
-<!-- ``` -->
+I have used it to compute posterior differences in Bayesian models. For the sake
+of example, let's fit a Bayesian model of average sepal length for each species
+in `iris`.
+
+```{r, results = "hide"}
+library(rstanarm)
+m <- stan_glm(
+  Sepal.Length ~ Species - 1,
+  iris,
+  family = gaussian,
+  prior = normal(0, 1),
+  prior_intercept = normal(0, 1))
+```
+
+Now, we have a posterior distribution of species means.
+
+```{r}
+newdata <- data.frame(Species = unique(iris$Species))
+
+p_means <- posterior_linpred(m, newdata = newdata) %>%
+  as.data.frame() %>%
+  tibble::as_tibble() %>%
+  setNames(newdata$Species) %>%
+  tibble::rowid_to_column("draw") %>%
+  tidyr::gather(species, mean, -draw) %>%
+  print()
+```
+
+For each posterior sample, we can compute pairwise differences of means with
+`compare_means()`.
+
+```{r pairs, fig.width = 4, fig.height = 2.5}
+pair_diffs <- compare_pairs(p_means, species, mean) %>%
+  print()
+
+library(ggplot2)
+
+ggplot(pair_diffs) +
+  aes(x = pair, y = value) +
+  stat_summary(fun.data = median_hilow, geom = "linerange") +
+  stat_summary(fun.data = median_hilow, fun.args = list(conf.int = .8),
+               size = 2, geom = "linerange") +
+  stat_summary(fun.y = median, size = 5, shape = 3, geom = "point") +
+  labs(x = NULL, y = "Difference in posterior means") +
+  coord_flip()
+```
+
+...which should look like the effect ranges in the dummy-coded models.
+
+```{r, results = "hide"}
+m2 <- update(m, Sepal.Length ~ Species)
+m3 <- update(m, Sepal.Length ~ Species, 
+             data = iris %>% mutate(Species = forcats::fct_rev(Species)))
+```
+
+```{r}
+# setosa verus others
+m2 %>% 
+  posterior_interval(regex_pars = "Species") %>% 
+  round(2)
+
+# virginica versus others
+m3 %>% 
+  rstanarm::posterior_interval(regex_pars = "Species") %>% 
+  round(2)
+
+# differences from compare_pairs()
+pair_diffs %>% 
+  tidyr::spread(pair, value) %>% 
+  select(-draw) %>% 
+  as.matrix() %>% 
+  posterior_interval() %>% 
+  round(2)
+```
 
 
 ### Et cetera

diff --git a/tests/testthat/test-dplyr.R b/tests/testthat/test-dplyr.R
@@ -47,16 +47,29 @@ test_that("sample_n_of() samples n rows if no groups given", {
 
 
 test_that("compare_pairs() calculates differences in pairs", {
-  testthat::skip("test not finished")
   means <- iris %>%
     dplyr::group_by(Species) %>%
-    summarise(Sepal.Length = mean(Sepal.Length))
-
+    dplyr::summarise(Sepal.Length = mean(Sepal.Length))
 
   result <- compare_pairs(means, Species, Sepal.Length)
 
   by_hand <- tapply(iris$Sepal.Length, iris$Species, mean, simplify = FALSE)
 
+  pairs <- result$pair %>%
+    as.character() %>%
+    strsplit("-") %>%
+    setNames(result$pair)
+
+  for (pair in names(pairs)) {
+    x1 <- pairs[[pair]][1]
+    x2 <- pairs[[pair]][2]
+    by_hand_diff <- by_hand[[x1]] - by_hand[[x2]]
+    diff <- result[result$pair == pair, "value"] %>%
+      unlist(use.names = FALSE) %>%
+      expect_equal(by_hand_diff)
+  }
+
+  # Check pair names
   make_pairs_hard_way <- function(xs) {
     indices <- seq_along(xs)
     heads <- rev(seq_along(xs)[-1])
@@ -67,8 +80,7 @@ test_that("compare_pairs() calculates differences in pairs", {
     }
     results
   }
-  pairs <- make_pairs_hard_way(names(by_hand))
-
-  result
 
+  pair_names <- make_pairs_hard_way(names(by_hand))
+  expect_true(all(pair_names %in% as.character(result$pair)))
 })