R/selectVariables.R

Defines functions selectVariables

#' @title select imputation variables
#' @description selects variables relevant to the imputation process
#' @importFrom md.log md.log
#' @param data data.frame
#' @return list
#' @keywords Internal
#' @noRd

selectVariables <- function(data, ignore=NULL, verbose=FALSE, report=NULL) {

  suppressPackageStartupMessages({requireNamespace("md.log")})

  # select the variables with missing, excluding fully missing vars
  vars2impute <- vapply(data[, , drop = FALSE], FUN.VALUE = TRUE,
                        function(z) anyNA(z) && !all(is.na(z)))

  # make sure that these variables were not meant to be ignored
  if (!is.null(ignore)) vars2impute[colnames(data)[vars2impute] %in% ignore] <- FALSE



  #data[, vars2impute] <- converted$X

  #if (verbose) {
  #  message("\n  Variables to impute:\t\t")
  #  message(colnames(data)[vars2impute], sep = ", ")
  #  message("\n")
  #
  #}

  if (!is.null(report)) {
    md.log(paste("Variables to impute:", paste(colnames(data)[vars2impute],
                                               collapse = ", ")))
  }

  # Get missing indicators and order variables by number of missings
  dataNA <- is.na(data[, vars2impute, drop = FALSE])
  vars2impute <- names(sort(colSums(dataNA)))
  # ============================================================

  # specify the list of all predictors, which were not ignored by the user
  allPredictors <- colnames(data)[!colnames(data) %in% ignore]
  X <- setdiff(allPredictors, vars2impute)

  return(list(
    #data <- data,
    dataNA = dataNA,
    #converted = converted,
    allPredictors = allPredictors,
    vars2impute = vars2impute,
    X = X))
}

Try the mlim package in your browser

Any scripts or data that you put into this service are public.

mlim documentation built on Dec. 28, 2022, 2:33 a.m.