Nothing
# ============================================
# Authors: PA
# Maintainers: PA, MG
# Copyright: 2023, HRDAG, GPL v2 or later
# ============================================
#' Calculate the proportions of each level of a variable after
#' applying `combine_replicates` to completed data (includes imputed values).
#'
#' @param complete_data A data frame containing the output from `combine_replicates`.
#' @param strata_vars A vector of column names identifying the variables to be
#' used for stratification.
#' @param digits Number of decimal places to round the results to. Default value
#' is 2.
#'
#' @return A data frame that contains the proportions after applying
#' `combine_replicates`.
#' @export
#' @examples
#' local_dir <- system.file("extdata", "right", package = "verdata")
#' replicates_data <- read_replicates(replicates_dir = local_dir,
#' violation = "reclutamiento", replicate_nums = c(1, 2), version = "v1",
#' crash = TRUE)
#' replicates_obs_data <- summary_observed("reclutamiento", replicates_data,
#' strata_vars = "sexo", conflict_filter = FALSE, forced_dis_filter = FALSE,
#' edad_minors_filter = FALSE, include_props = FALSE)
#' tab_combine <- combine_replicates("reclutamiento", replicates_obs_data,
#' replicates_data, strata_vars = 'sexo', conflict_filter = TRUE,
#' forced_dis_filter = FALSE, edad_minors_filter = FALSE, include_props = FALSE)
#' prop_data_complete <- proportions_imputed(tab_combine, strata_vars = "sexo",
#' digits = 2)
proportions_imputed <- function(complete_data,
strata_vars,
digits = 2) {
if (!is.data.frame(complete_data)) {
stop("This argument must be a data.frame")
}
if (digits < 0) {stop("Cannot round to negative decimal places")}
proportions_data <- complete_data %>%
dplyr::mutate(imp_lo_p = round(imp_lo / sum(imp_mean, na.rm = TRUE),
digits = digits),
imp_mean_p = round(imp_mean / sum(imp_mean, na.rm = TRUE),
digits = digits),
imp_hi_p = round(imp_hi / sum(imp_mean, na.rm = TRUE),
digits = digits))
proportions_data <- proportions_data %>%
dplyr::select(all_of({{strata_vars}}),
imp_lo, imp_mean, imp_hi,
imp_lo_p, imp_mean_p, imp_hi_p)
return(proportions_data)
}
#' Combine imputed replicates according to calculate totals. Combination
#' is done using the standard approach that makes use of the laws of total
#' expectation and total variance.
#'
#' @param replicates_obs_data The data frame that results from applying `summary_observed`.
#' @param replicates_data A data frame containing replicates data.
#' @param strata_vars Variable with all observations (without missing values).
#' @param conflict_filter Filter that indicates if the data is filtered using
#' the "is_conflict" rule.
#' @param violation Violation to be analyzed. Options are "homicidio", "secuestro",
#' "reclutamiento" and "desaparicion".
#' @param forced_dis_filter Filter that indicates if the data is filtered using the
#' "is_forced_dis" rule.
#' @param edad_minors_filter Optional filter by age (`edad`) < 18.
#' @param include_props A logical value indicating whether or not to include
#' the proportions from the calculations before merging with summary_observed's output.
#' @param digits Number of decimal places to round the results to. Default value
#' is 2.
#' @return A data frame with 5 or more columns: name of variable(s), `observed`
#' the number of observations in each category for every variable, `imp_lo` the
#' lower bound of the 95% confidence interval, `imp_hi` the upper bound of the
#' 95% confidence interval, and `imp_mean` the point estimate of the mean value.
#'
#' @export
#' @importFrom dplyr %>%
#'
#' @examples
#' \donttest{
#' local_dir <- system.file("extdata", "right", package = "verdata")
#' replicates_data <- read_replicates(local_dir, "reclutamiento", c(1, 2),
#' version = "v1")
#' replicates_obs_data <- summary_observed("reclutamiento", replicates_data,
#' strata_vars = "sexo", conflict_filter = FALSE, forced_dis_filter = FALSE,
#' edad_minors_filter = FALSE, include_props = FALSE, digits = 2)
#' tab_combine <- combine_replicates("reclutamiento", replicates_obs_data,
#' replicates_data, strata_vars = 'sexo', conflict_filter = TRUE,
#' forced_dis_filter = FALSE, edad_minors_filter = FALSE, include_props = FALSE,
#' digits = 2)
#' }
combine_replicates <- function(violation,
replicates_obs_data,
replicates_data,
strata_vars = NULL,
conflict_filter = TRUE,
forced_dis_filter = FALSE,
edad_minors_filter = FALSE,
include_props = FALSE,
digits = 2) {
if (!(violation %in% c("homicidio", "secuestro", "reclutamiento", "desaparicion"))) {
stop("Violation argument incorrectly specified. Please put any of the following
violations (in quotes and in lower case): homicidio, secuestro,
reclutamiento or desaparicion")
}
if (!is.data.frame(replicates_obs_data)) {
stop("The argument 'replicates_obs_data' must be a data frame")
}
if (!is.data.frame(replicates_data)) {
stop("The argument 'replicates_data' must be a data frame")
}
if (!is.null(strata_vars)) {
strata_vars_missing <- setdiff(strata_vars, names(replicates_data))
if (length(strata_vars_missing) > 0) {
stop("This variable is not found in the replicates. Please check if
it exists or if it has another name.")
}
}
if (forced_dis_filter == TRUE & violation != "desaparicion") {
stop("This argument only applies to 'desaparicion'. Please change the
TRUE option to FALSE")
}
num_replicates <- dplyr::n_distinct(replicates_data$replica)
if (num_replicates == 1) {
stop("Results cannot be calculated using only 1 replicate. For more
consistent results please work with more replicates.")
}
else {
logger::log_info("You are working with {num_replicates} replicates according to filter")
}
if (digits < 0) {stop("Cannot round to negative decimal places")}
if (conflict_filter == TRUE) {
logger::log_info("Analyzing victims related to armed conflict")
prep_data <- replicates_data %>%
dplyr::mutate(is_conflict = as.integer(is_conflict)) %>%
dplyr::filter(is_conflict == 1)
} else {
logger::log_info("You are working with all victims (related and not related to is_conflict)")
prep_data <- replicates_data %>%
dplyr::mutate(is_conflict = as.integer(is_conflict))
}
if (edad_minors_filter == TRUE) {
logger::log_info("Analyzing victims under 18 years of age")
prep_data <- prep_data %>%
dplyr::filter(edad_jep == "INFANCIA" |
edad_jep == "ADOLESCENCIA")
} else {
logger::log_info("Analyzing victims of all ages")
prep_data <- prep_data
}
if (violation == "desaparicion" & forced_dis_filter == TRUE) {
logger::log_info("Analyzing the documented victims who were victims of forced disappearance")
prep_data <- prep_data %>%
dplyr::mutate(is_forced_dis = as.integer(is_forced_dis)) %>%
dplyr::filter(is_forced_dis == 1)
} else {
logger::log_info("Not filtering in is_forced_dis")
}
prep_data <- prep_data %>%
dplyr::mutate(dplyr::across(all_of({{strata_vars}}), as.character)) %>%
dplyr::group_by(replica, dplyr::across(all_of({{strata_vars}}))) %>%
dplyr::summarise(Freq = dplyr::n()) %>%
dplyr::ungroup()
theta <- prep_data %>%
dplyr::group_by(dplyr::across(all_of({{strata_vars}}))) %>%
dplyr::summarize(theta = round(mean(Freq), 0)) %>%
dplyr::ungroup()
rep_data <- prep_data %>%
dplyr::left_join(theta) %>%
dplyr::mutate(vb1 = (Freq - theta)^2) %>%
dplyr::group_by(dplyr::across(all_of({{strata_vars}}))) %>%
dplyr::summarize(between_variance = (1 / (num_replicates - 1)) * sum(vb1)) %>%
dplyr::ungroup() %>%
dplyr::mutate(total_variance = 0 + ((num_replicates + 1) / num_replicates) * between_variance) %>%
dplyr::mutate(se_b = sqrt(total_variance)) %>%
dplyr::select(-total_variance) %>%
dplyr::inner_join(theta) %>%
dplyr::mutate(lower_ci = round(theta - (1.96 * se_b), 0)) %>%
dplyr::mutate(upper_ci = round(theta + (1.96 * se_b), 0)) %>%
dplyr::select(all_of({{strata_vars}}), lower_ci, theta, upper_ci) %>%
dplyr::rename(imp_mean = theta, imp_lo = lower_ci, imp_hi = upper_ci)
if (include_props == TRUE) {
logger::log_info("Including the proportions")
rep_data <- proportions_imputed(rep_data, strata_vars, digits = digits)
rep_data <- rep_data %>%
dplyr::mutate(imp_lo_p = dplyr::if_else(imp_lo_p < 0, 0, imp_lo_p))
} else {
logger::log_info("Don't include the proportions")
}
final_data <- rep_data %>%
dplyr::mutate(dplyr::across(all_of(strata_vars), as.character))
replicates_obs_data <- replicates_obs_data %>%
dplyr::mutate(dplyr::across(all_of(strata_vars), as.character))
final_data <- dplyr::full_join(rep_data,
replicates_obs_data,
by = {{strata_vars}}) %>%
dplyr::mutate(imp_lo = dplyr::if_else(imp_lo < observed,
observed, imp_lo))
final_data <- final_data %>%
dplyr::select(all_of({{strata_vars}}), observed,
dplyr::everything()) %>%
dplyr::arrange(dplyr::desc(imp_mean))
return(final_data)
}
# --- Done
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.