#' Quick impute
#'
#' Given a dataset, singly imputes specified variables within that dataset. Meant for
#' tasks that need data to be filled in as an intermediate step, such as filling in a
#' small amount of missing values in variables that will be used for raking. Not meant
#' to be used for applications for which measuring the uncertainty due to imputation
#' is important.
#'
#' @param .data The \code{data.frame} to be imputed.
#' @param to_impute The variables in the dataset for which missing data should be imputed.
#' Can be a \code{character} vector of names, a \code{numeric} vector of column positions,
#' or a list of columns generated by \code{dplyr::vars}. (See \code{help("select_at")} for
#' details.) Must have at least two variables.
#' Defaults to \code{NULL}, in which case this function will search for variables with the
#' prefix "rk_", meant to fit in a workflow where missing data is imputed for variables to
#' be used in weighting.
#' @param method The imputation method, passed to \code{mice()}.
#' The default method is random forest imputation via the \code{ranger()} package, which is
#' a custom method that comes with the \code{pewmethods} package. Other methods built into
#' the \code{mice} package will work.
#' @param seed Ensures that the missing values will be filled in the exact same way when
#' rerun. No seed is set by default.
#' @param ... Other arguments passed to \code{mice::mice}.
#'
#' @return The data frame with missing values filled in for only the raking variables,
#' leaving the original ones as they were.
#'
#' @details This function is a wrapper around \code{mice::mice} that does only one imputation
#' and does not output any diagnostics. The main use of this function is to quickly impute
#' only some variables in a dataset. Quick imputation is useful for some limited purposes
#' such as the need to fill in the generally small amounts of missing data in variables
#' to be used in raking.
#'
#' Note that the imputation model will only use data from the variables you pass to this
#' function. If you pass only two variables, then only those two variables and nothing else
#' will be used to fill in missing values. If there are other variables in your data that are
#' strongly related to the variables to be imputed, they should be specified in
#' \code{to_impute}, even if they have no missing data.
#'
#' @examples
#' library(dplyr)
#' # We can use dk_to_na to create new versions of variables where certain factor labels
#' # are recoded as missing, then impute those variables. If the to_impute argument is not
#' # specified, the function will by default look for variables starting with "rk_".
#' dec13_excerpt_raking <- dec13_excerpt %>%
#' mutate(rk_sex = sex,
#' rk_recage = dk_to_na(recage, pattern = "DK/Ref"),
#' rk_receduc = dk_to_na(receduc, pattern = "DK/Ref"),
#' rk_racethn2 = dk_to_na(racethn2, pattern = "Ref")) %>%
#' impute_vars(.)
#' # We can also pass specific variables to impute
#' # In this example, only q1 has missing data, but we want to fill in q1 based on values of
#' # age, education, gender and race/ethnicity, so we have to pass those variables in as well
#' dec13_excerpt_raking <- dec13_excerpt %>%
#' mutate(q1 = dk_to_na(q1, pattern = "Refused")) %>%
#' impute_vars(to_impute = c("q1", "recage", "receduc", "racethn2", "sex"))
#'
#' @import dplyr
#' @importFrom mice mice complete
#' @export
impute_vars <- function(.data, to_impute = NULL, method = "ranger", seed = NA, ...) {
if (!requireNamespace("mice", quietly = TRUE)) {
stop("Package \"mice\" needed for this function to work. Please install it.",
call. = FALSE)
}
varnames <- names(.data)
if(is.null(to_impute)) {
message("No input to to_impute argument found. Imputing variables with prefix rk_ by default.")
rk_vars <- .data %>% select(starts_with("rk_"))
if (ncol(rk_vars) == 0) {
stop("No variables with prefix rk_ found. If you want to impute other variables, please provide input to the to_impute argument in this function.")
}
}
else if (!is.null(to_impute)) {
rk_vars <- .data %>% select_at(.vars = to_impute)
}
imputed <- mice::complete(mice::mice(rk_vars, m = 1, method = method, seed = seed, ...))
res <- .data %>%
select_at(.vars = setdiff(varnames, names(rk_vars))) %>%
bind_cols(imputed) %>%
select_at(.vars = varnames)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.