-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
97 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#' Normal No-Pooling model | ||
#' | ||
#' This function implements the Normal No-Pooling model from \cite{AA}. | ||
#' @param data \code{data.frame} | ||
#' @param ln Unquoted variable indicating the nominal list (number of potential | ||
#' voters) at each polling station. | ||
#' @param stratum Unquoted variable indicating the stratum for each polling | ||
#' station. | ||
#' @param data_stratum Data frame with stratum variable (named exactly as in | ||
#' \code{data}) and number of polling stations per strata. | ||
#' @param n_stratum Unquoted variable indicating the number of polling stations | ||
#' in each stratum. | ||
#' @param ... Unquoted variables indicating the number of votes in each polling | ||
#' station for each candidate. | ||
#' @param std_errors Logical value indicating whether to compute standard errors | ||
#' (using bootstrap), defaults to TRUE. | ||
#' @param B Number of bootstrap replicates used to compute standard errors, | ||
#' defaults to 50. | ||
#' @param seed integer value used to set the state of the random number | ||
#' generator (optional). It will only be used when computing standard errors. | ||
#' @return A list with two entries: | ||
#' \enumerate{ | ||
#' \item lambdas_summary \code{data.frame} including posterior mean, median, | ||
#' standard error, and quantiles (0.025 and 0.975) for each party. | ||
#' \item lamdas_sim \code{data.frame} with simulations for each party. | ||
#' } | ||
#' @examples | ||
#' # count number of polling stations per stratum | ||
#' gto_stratum_sizes <- gto_2012 %>% | ||
#' dplyr::group_by(distrito_loc_17) %>% | ||
#' dplyr::summarise(n_stratum = n()) | ||
#' gto_sample <- select_sample_prop(gto_2012, stratum = distrito_loc_17, 0.06) | ||
#' gto_nnp <- nnp_estimation(data = gto_sample, ln = ln_total, | ||
#' stratum = distrito_loc_17, | ||
#' data_stratum = gto_stratum_sizes, n_stratum = n_stratum, | ||
#' pri_pvem:otros, n_sims = 100) | ||
#' gto_nnp$lambdas_summary | ||
#' @importFrom magrittr %>% | ||
#' @importFrom rlang !! !!! := | ||
#' @export | ||
nnp_estimation <- function(data, ln, stratum, data_stratum, n_stratum, | ||
..., n_sims = 1000){ | ||
stratum_enquo <- dplyr::enquo(stratum) | ||
n_stratum_enquo <- dplyr::enquo(n_stratum) | ||
ln_enquo <- dplyr::enquo(ln) | ||
parties_enquo <- dplyr::quos(...) | ||
data_long <- data %>% | ||
dplyr::mutate(casilla_id = 1:dplyr::n()) %>% | ||
dplyr::rename(strata = !!stratum_enquo, ln_total = !!ln_enquo) %>% | ||
dplyr::select(casilla_id, strata, ln_total, !!!parties_enquo) %>% | ||
tidyr::gather(party, votes, !!!parties_enquo) | ||
data_stratum <- data_stratum %>% | ||
dplyr::rename(strata = !!stratum_enquo, n_strata = !!n_stratum_enquo) | ||
theta_sims <- data_long %>% | ||
split(list(.$strata, .$party)) %>% | ||
purrr::map_df(~sims_posterior(x_j = .$votes, n = .$ln_total, | ||
n_sims = n_sims)$theta) %>% | ||
dplyr::mutate(n_sim = 1:dplyr::n()) %>% | ||
tidyr::gather(estrato_partido, theta, -n_sim) %>% | ||
tidyr::separate(estrato_partido, into = c("strata", "party"), | ||
sep = "[.]") %>% | ||
dplyr::mutate(strata = as.numeric(strata)) %>% | ||
dplyr::left_join(data_stratum, by = "strata") | ||
lambdas <- theta_sims %>% | ||
dplyr::group_by(party, n_sim) %>% | ||
dplyr::summarise(theta_wgt = sum(n_strata * theta / sum(n_strata))) %>% | ||
dplyr::group_by(n_sim) %>% | ||
dplyr::mutate(lambda = 100 * theta_wgt / sum(theta_wgt)) %>% | ||
dplyr::ungroup() %>% | ||
dplyr::select(-theta_wgt) | ||
|
||
lambdas_summary <- lambdas %>% | ||
dplyr::group_by(party) %>% | ||
dplyr::summarise( | ||
mean_post = mean(lambda), | ||
median_post = median(lambda), | ||
std_error = sd(lambda), | ||
q_low = quantile(lambda, 0.025), | ||
q_sup = quantile(lambda, 0.975) | ||
) %>% | ||
dplyr::ungroup() | ||
return(list(lambdas_summary = lambdas_summary, lambdas_sim = lambdas)) | ||
} | ||
sims_posterior <- function(x_j, n, n_sims = 200){ | ||
if(length(n) < 2){ | ||
theta_sims <- runif(n_sims) | ||
return(list(theta = theta_sims, tau = NA)) | ||
} | ||
a_gamma <- (length(n) - 1) / 2 | ||
b_gamma <- 1/ 2 * (sum(x_j ^ 2 / n) - sum(x_j) ^ 2 / sum(n)) | ||
b_gamma <- ifelse(b_gamma == 0, 0.05, b_gamma) | ||
tau_sims <- rgamma(n_sims, shape = a_gamma, rate = b_gamma) | ||
media_normal <- sum(x_j) / sum(n) | ||
desv_normal <- purrr::map_dbl(tau_sims, ~ sqrt(1 / (. * sum(n)))) | ||
theta_sims <- purrr::map_dbl(desv_normal, ~MCMCglmm::rtnorm(1, media_normal, ., 0, 1)) | ||
return(list(theta = theta_sims, tau = tau_sims)) | ||
} |