Skip to content

Commit

Permalink
Add model from Nieto Mendoza
Browse files Browse the repository at this point in the history
  • Loading branch information
tereom committed Jan 24, 2019
1 parent 3e16120 commit f3f902f
Showing 1 changed file with 97 additions and 0 deletions.
97 changes: 97 additions & 0 deletions R/nnp_estimation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#' Normal No-Pooling model
#'
#' This function implements the Normal No-Pooling model from \cite{AA}.
#' @param data \code{data.frame}
#' @param ln Unquoted variable indicating the nominal list (number of potential
#' voters) at each polling station.
#' @param stratum Unquoted variable indicating the stratum for each polling
#' station.
#' @param data_stratum Data frame with stratum variable (named exactly as in
#' \code{data}) and number of polling stations per strata.
#' @param n_stratum Unquoted variable indicating the number of polling stations
#' in each stratum.
#' @param ... Unquoted variables indicating the number of votes in each polling
#' station for each candidate.
#' @param std_errors Logical value indicating whether to compute standard errors
#' (using bootstrap), defaults to TRUE.
#' @param B Number of bootstrap replicates used to compute standard errors,
#' defaults to 50.
#' @param seed integer value used to set the state of the random number
#' generator (optional). It will only be used when computing standard errors.
#' @return A list with two entries:
#' \enumerate{
#' \item lambdas_summary \code{data.frame} including posterior mean, median,
#' standard error, and quantiles (0.025 and 0.975) for each party.
#' \item lamdas_sim \code{data.frame} with simulations for each party.
#' }
#' @examples
#' # count number of polling stations per stratum
#' gto_stratum_sizes <- gto_2012 %>%
#' dplyr::group_by(distrito_loc_17) %>%
#' dplyr::summarise(n_stratum = n())
#' gto_sample <- select_sample_prop(gto_2012, stratum = distrito_loc_17, 0.06)
#' gto_nnp <- nnp_estimation(data = gto_sample, ln = ln_total,
#' stratum = distrito_loc_17,
#' data_stratum = gto_stratum_sizes, n_stratum = n_stratum,
#' pri_pvem:otros, n_sims = 100)
#' gto_nnp$lambdas_summary
#' @importFrom magrittr %>%
#' @importFrom rlang !! !!! :=
#' @export
nnp_estimation <- function(data, ln, stratum, data_stratum, n_stratum,
..., n_sims = 1000){
stratum_enquo <- dplyr::enquo(stratum)
n_stratum_enquo <- dplyr::enquo(n_stratum)
ln_enquo <- dplyr::enquo(ln)
parties_enquo <- dplyr::quos(...)
data_long <- data %>%
dplyr::mutate(casilla_id = 1:dplyr::n()) %>%
dplyr::rename(strata = !!stratum_enquo, ln_total = !!ln_enquo) %>%
dplyr::select(casilla_id, strata, ln_total, !!!parties_enquo) %>%
tidyr::gather(party, votes, !!!parties_enquo)
data_stratum <- data_stratum %>%
dplyr::rename(strata = !!stratum_enquo, n_strata = !!n_stratum_enquo)
theta_sims <- data_long %>%
split(list(.$strata, .$party)) %>%
purrr::map_df(~sims_posterior(x_j = .$votes, n = .$ln_total,
n_sims = n_sims)$theta) %>%
dplyr::mutate(n_sim = 1:dplyr::n()) %>%
tidyr::gather(estrato_partido, theta, -n_sim) %>%
tidyr::separate(estrato_partido, into = c("strata", "party"),
sep = "[.]") %>%
dplyr::mutate(strata = as.numeric(strata)) %>%
dplyr::left_join(data_stratum, by = "strata")
lambdas <- theta_sims %>%
dplyr::group_by(party, n_sim) %>%
dplyr::summarise(theta_wgt = sum(n_strata * theta / sum(n_strata))) %>%
dplyr::group_by(n_sim) %>%
dplyr::mutate(lambda = 100 * theta_wgt / sum(theta_wgt)) %>%
dplyr::ungroup() %>%
dplyr::select(-theta_wgt)

lambdas_summary <- lambdas %>%
dplyr::group_by(party) %>%
dplyr::summarise(
mean_post = mean(lambda),
median_post = median(lambda),
std_error = sd(lambda),
q_low = quantile(lambda, 0.025),
q_sup = quantile(lambda, 0.975)
) %>%
dplyr::ungroup()
return(list(lambdas_summary = lambdas_summary, lambdas_sim = lambdas))
}
sims_posterior <- function(x_j, n, n_sims = 200){
if(length(n) < 2){
theta_sims <- runif(n_sims)
return(list(theta = theta_sims, tau = NA))
}
a_gamma <- (length(n) - 1) / 2
b_gamma <- 1/ 2 * (sum(x_j ^ 2 / n) - sum(x_j) ^ 2 / sum(n))
b_gamma <- ifelse(b_gamma == 0, 0.05, b_gamma)
tau_sims <- rgamma(n_sims, shape = a_gamma, rate = b_gamma)
media_normal <- sum(x_j) / sum(n)
desv_normal <- purrr::map_dbl(tau_sims, ~ sqrt(1 / (. * sum(n))))
theta_sims <- purrr::map_dbl(desv_normal, ~MCMCglmm::rtnorm(1, media_normal, ., 0, 1))
return(list(theta = theta_sims, tau = tau_sims))
}

0 comments on commit f3f902f

Please sign in to comment.