R/drop_imb.R

Defines functions drop_imb

Documented in drop_imb

#' Drop highly imbalanced columns
#'
#' @param df A dataframe
#' @param thresh
#' @param verbose
#' @export
#' @importFrom inspectdf inspect_imb
#' @importFrom tibble as.tibble
#' @importFrom dplyr filter
#' @importFrom dplyr select
#' @importFrom dplyr select_if
#' @importFrom magrittr %>%

drop_imb <- function(df, thresh = 0.95,  verbose = T){
  
  # perform basic column check on dataframe input
  check_df_cols(df)
  
  # get imbalance
  df_imb <- df %>% inspect_imb()
  if(nrow(df_imb) > 0){
    df_imb <- df_imb %>% filter(pcnt > thresh * 100)
    if(nrow(df_imb) > 0){
      # remove columns from df
      imb_ind <- match(df_imb$col_name, colnames(df))
      df <- df[ , -imb_ind]
      column_drop_console(type = "Imbalanced columns dropped:", df_imb$col_name)
    } else {
      column_drop_console(type = "Imbalanced columns dropped:")
    }
  } else {
    column_drop_console(type = "Imbalanced columns dropped:")
  }
  
  # invisibly return the df for further summaries
  invisible(df)
}
alastairrushworth/mlblitz documentation built on Nov. 1, 2019, 9:06 p.m.