
#' Drop static columns
#' @param df a dataframe
#' @return Processed dataframe that removes static or columns that are close to static.
#' @export
#' @examples
#' Static_Missing_Vars(mtcars)

Static_Missing_Vars <- function (df) {
  list = c()
  for(i in 1:length(colnames(df))){
    # Agg data
    Cases = table(df[colnames(df)[i]])
    Cases = sort(Cases,decreasing=T)
    # Drop Variables that are > 95% missing
    if(sum(is.na(df[colnames(df)[i]]))/nrow(df) >.95)
      list = c(list , c=colnames(df)[i])
    # Drop variables that are close to static (Warning sometimes the minroity help explain variance)  
    else if(Cases[1]/nrow(df)>.95)
      list = c(list , c=colnames(df)[i])     
    else if(Cases[1]/nrow(df)>.8)
      print(paste('Variable',colnames(df)[i],'was not dropped, but',Cases[1]/nrow(df),'of the cases are static'))
    print('The following variables will be dropped')
    df = df[, !(colnames(df) %in% c((list)))]
moone009/tmp_preprocess documentation built on May 23, 2019, 6:10 a.m.