R/drop_const.R

Defines functions drop_const

Documented in drop_const

#' Drop constant columns
#'
#' @param df A dataframe
#' @param verbose
#' @export
#' @importFrom dplyr select
#' @importFrom dplyr select_if
#' @importFrom magrittr %>%
#' @importFrom Rcpp compileAttributes




drop_const <- function(df, verbose = T){
  
  # perform basic column check on dataframe input
  check_df_cols(df)
  unique_1 <- function(v) length(unique(v)) == 1
  
  # cols for removal
  names_to_drop <- c()
  na_binarise <- c()
  # first pass_df
  n_first_pass <- min(200, nrow(df))
  
  # numeric constant colums to remove
  df_num <- df %>% select_if(is.numeric)
  if(ncol(df_num) > 0){
    df_num_first_pass <- df_num[1:n_first_pass, ]
    rm_inds <- which(sapply(df_num_first_pass, sd, na.rm = T) == 0)
    # if the first pass surfaces anything, then run a full pass
    if(length(rm_inds) > 0){
      rm_inds_full <- which(sapply(df_num[, rm_inds], sd, na.rm = T) == 0)
      # if the full pass returns anything, check whether there 
      # any of them have missingness over a threshold that could be useful
      # this will be recoded as binary
      if(length(rm_inds_full) > 0){
        
      }
    }
    names_to_drop <- c(names_to_drop, colnames(df_numeric)[rm_inds])
  }
  
  
  df_int <- df %>% select_if(is.integer)
  if(ncol(df_int) > 0){
    df_int_first_pass <- df_int[1:n_first_pass, ]
    rm_inds <- which(sapply(df_int_first_pass, sd) == 0)
    # first pass
    rm_inds <- which(sapply(df_integer, sd) == 0)
    names_to_drop <- c(names_to_drop, colnames(df_integer)[rm_inds])
  }

  
  # character constant columns to remove
  df_character <- df %>% select_if(is.character)
  if(ncol(df_character) > 0){
    rm_inds <- which(sapply(df_character, unique_1))
    names_to_drop <- c(names_to_drop, colnames(df_character)[rm_inds])
  }
  
  # factor constant columns to remove
  df_factor <- df %>% select_if(is.factor)
  if(ncol(df_factor) > 0){
    rm_inds <- which(sapply(df_factor, unique_1))
    names_to_drop <- c(names_to_drop, colnames(df_factor)[rm_inds])
  }
  
  # return a message, if requested
  if(verbose){
    if(length(names_to_drop) > 1){
      names_to_drop <- sort(unique(names_to_drop))
      column_drop_console(names_to_drop = names_to_drop, type = "Constant columns dropped:")
      df <- df %>% select(-names_to_drop)
    } else {
      column_drop_console(type = "Constant columns dropped:")
    }
  }
  
  # invisibly return the df for further summaries
  invisible(df)
}
alastairrushworth/mlblitz documentation built on Nov. 1, 2019, 9:06 p.m.