#' get_max_1
#'
#' @param wide_data
#' @param var
#'
get_max_1 <- function(wide_data, var) {
temp <- wide_data[complete.cases(wide_data %>% dplyr::select(!! var)),] %>%
dplyr::group_by(division_numeric_code) %>%
dplyr::summarise(max = max(!!var))
return(temp)
}
#' method_divs
#'
#' @param wide_data
#' @param var
#'
#' @return a list of two vectors of div codes, one vector of div codes which require method 1 imputation and another for method 2 imputation
method_divs <- function(wide_data, var) {
#var <- rlang::ensym(var)
wide_data$ones <- 1
temp <- wide_data %>%
dplyr::group_by(division_numeric_code) %>%
dplyr::mutate(sum_na = sum(is.na(!! var))) %>%
dplyr::mutate(sum_obs = sum(ones))
div_impute_1 <- temp$division_numeric_code[temp$sum_na == temp$sum_obs] %>%
unique() %>%
unlist()
div_impute_2 <- temp$division_numeric_code[temp$sum_na != temp$sum_obs] %>%
unique() %>%
unlist()
div_list <- list(div_impute_1 = div_impute_1, div_impute_2 = div_impute_2)
return(div_list)
}
#' impute_1
#'
#' @param wide_data
#' @param var
#'
#' @return data frame with inputed values for var specified, implements method 1
impute_1 <- function(wide_data, var, method_divs) {
#var <- rlang::ensym(var)
med <- wide_data %>% get_max_1(var) %>%
dplyr::select("max") %>%
unlist() %>%
as.numeric() %>%
median()
core_var <- wide_data %>%
dplyr::select(!! var) %>% # a vector of the SE of interest
dplyr::ungroup() %>%
unlist()
core_var[wide_data$division_numeric_code %in% method_divs$div_impute_1] <- med
wide_data[paste(var)] <- core_var
return(wide_data)
}
#wide_data <- impute_1(wide_data, se_log_r_modern_no_use, div_list)
#' impute_2
#'
#' @param wide_data
#' @param var
#' @param method_divs
#' @param char
#'
#' @return data frame with imputed values for var specified, implements method 2
impute_2 <- function(wide_data, var, method_divs) {
#var <- rlang::ensym(var)
char <- rlang::quo_text(var)
#var <- rlang::enquo(var)
div <- method_divs$div_impute_2
var_vec <- wide_data %>% dplyr::select(!! var)
imp_i <- wide_data %>%
dplyr::filter(division_numeric_code %in% div) %>%
dplyr::group_by(division_numeric_code) %>%
dplyr::summarize(med = max(!! var, na.rm=TRUE))
for(i in 1:dim(wide_data)[1]) {
if (is.na(var_vec[i,]) &
wide_data$division_numeric_code[i] %in% div) {
imp <- imp_i %>%
dplyr::filter(division_numeric_code == wide_data$division_numeric_code[i]) %>%
dplyr::ungroup() %>%
dplyr::select(med) %>%
unlist()
wide_data[char][i,] <- imp
}
}
return(wide_data)
}
#' get_global_med_se
#'
#' @param wide_data
#' @param var
#'
#' @return
#'
get_global_med_se <- function(wide_data, var) {
med <- wide_data %>% get_max_1(var) %>%
dplyr::select("max") %>%
unlist() %>%
as.numeric() %>%
median()
return(med)
}
#' impute_global_se
#'
#' @param wide_data
#'
#' @return
#'
impute_global_se <- function(wide_data) {
vars <- list(
rlang::quo(se_log_r_unmet_no_need),
rlang::quo(se_log_r_traditional_no_use),
rlang::quo(se_log_r_modern_no_use)
)
ints <- as.integer(NA)
medians <- list(
se_log_r_unmet_no_need = ints,
se_log_r_traditional_no_use = ints,
se_log_r_modern_no_use = ints
)
for(i in 1:length(vars)) {
medians[[i]] <- get_global_med_se(wide_data, vars[[i]])
}
return(medians)
}
#' Impute se
#'
#' @param wide_data \emph{'Data.frame'} A data.frame from \code{\link[fpemdata:contraceptive_use]{fpemdata::contraceptive_use}}
#'
#' @return \emph{'Data.frame'} The input data with se imputation from Cahill et al 2017
#'
impute_se <- function(wide_data) {
vars <- list(
rlang::quo(se_log_r_unmet_no_need),
rlang::quo(se_log_r_traditional_no_use),
rlang::quo(se_log_r_modern_no_use)
)
for(i in 1:length(vars)) {
temp <- method_divs(wide_data, var = vars[[i]])
wide_data <- impute_1(wide_data, var = vars[[i]], method_divs = temp)
wide_data <- impute_2(wide_data, var = vars[[i]], method_divs = temp)
}
wide_data$`~` <- NULL
return(wide_data)
}
#' Generate max se data
#'
#' @param wide_data \emph{'Data.frame'} A data.frame from \code{\link[fpemdata:contraceptive_use]{fpemdata::contraceptive_use}}
#'
#' @return \emph{'Data.frame'} The input data augmented with imputed max se
#'
gen_max_se_data <- function(wide_data) {
vars <- list(
rlang::quo(se_log_r_unmet_no_need),
rlang::quo(se_log_r_traditional_no_use),
rlang::quo(se_log_r_modern_no_use)
)
ints <- rep(as.integer(NA),nrow(unique(wide_data["division_numeric_code"])))
df <- data.frame(division_numeric_code = ints,
se_log_r_unmet_no_need = ints,
se_log_r_traditional_no_use = ints,
se_log_r_modern_no_use = ints
)
for(i in 1:length(vars)) {
char <- rlang::quo_text(vars[[i]])
temp <- wide_data %>% get_max_1(vars[[i]])
df[,1] <- temp["division_numeric_code"] %>%
unlist() %>%
as.numeric()
df[,i+1] <- temp["max"] %>%
unlist() %>%
as.numeric()
}
return(df)
}
#' impute_user_se
#'
#' @param user_data
#' @param subnational
#' @param is_in_union
#'
#' @return
#'
impute_user_se <- function(user_data, subnational, is_in_union) {
user_data <- user_data %>% as.data.frame()
div <- user_data["division_numeric_code"]
div <- div[1,]
if (subnational) {
imputed_max_se <- fpemdata::imputed_data$medians %>%
dplyr::filter(is_in_union == !!is_in_union)
vars <- names(imputed_max_se)[1:3]
} else {
imputed_max_se <- fpemdata::imputed_data$imputed_max_se %>% #comes from gen_max_se
dplyr::filter(division_numeric_code == div) %>%
dplyr::filter(is_in_union == !!is_in_union)
vars <- colnames(imputed_max_se)[2:4]
}
user_data %>%
dplyr::mutate(se_log_r_modern_no_use_impute_ind := is.na(!!rlang::sym(vars[1]))) %>%
dplyr::mutate(se_log_r_traditional_no_use_impute_ind := is.na(!!rlang::sym(vars[2]))) %>%
dplyr::mutate(se_log_r_unmet_no_need_impute_ind := is.na(!!rlang::sym(vars[3])))
if (all(!(vars %in% colnames(user_data)))) {
warning("user_data does not have sampling error columns")
for(i in 1:length(vars)) {
user_data[vars[i]] <- rep(imputed_max_se[vars[i]], nrow(user_data))
}
} else {
for(i in 1:length(vars)) {
user_data[vars[i]][is.na(user_data[vars[i]])] <- unlist(imputed_max_se[vars[i]])
}
}
return(user_data)
}
#' Impute indicator
#'
#' @param wide_data \emph{'Data.frame'} A data.frame from \code{\link[fpemdata:contrapcetive_use]{fpemdata::contrapcetive_use}}
#'
#' @return \emph{'Data.frame'} A data.frame with logical indicator TRUE indicating it is imputed FALSE being a core value
impute_indicator <- function(data) {
var_names <- c(
"se_log_r_modern_no_use",
"se_log_r_traditional_no_use",
"se_log_r_unmet_no_need"
)
data %>%
dplyr::mutate(se_log_r_modern_no_use_impute_ind := is.na(!!rlang::sym(var_names[1]))) %>%
dplyr::mutate(se_log_r_traditional_no_use_impute_ind := is.na(!!rlang::sym(var_names[2]))) %>%
dplyr::mutate(se_log_r_unmet_no_need_impute_ind := is.na(!!rlang::sym(var_names[3])))
}
#' impute_package_data
#'
#' @param is_in_union
#'
#' @return \emph{'List'}
impute_package_data <- function(is_in_union) {
wide_data_imputed <- fpemdata::contraceptive_use %>%
dplyr::filter(is_in_union == !!is_in_union) %>%
dplyr::filter(age_range == "15-49") %>%
fpemdata:::impute_indicator() %>%
fpemdata:::impute_se()
imputed_max_se <-
fpemdata:::gen_max_se_data(wide_data = wide_data_imputed)
medians <-
fpemdata:::impute_global_se(fpemdata::contraceptive_use %>%
dplyr::filter(is_in_union == !!is_in_union))
return(
list(
imputed_max_se = imputed_max_se,
medians = medians,
contraceptive_use_imputed = wide_data_imputed
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.