R/fix_types.R

Defines functions check_num_to_int check_char_to_num convert_num_to_int convert_char_to_num

#' Fix any columns with wrong type
#'
#' @param df A dataframe
#' @export
#' @importFrom dplyr select_if
#' @importFrom tibble rownames_to_column
#' @importFrom crayon silver
#' @importFrom magrittr %>%


check_num_to_int <- function(df, target = NULL){
  df <- df[-match(target, colnames(df))]
  check_df_cols(df)
  df_dbl <- df %>% select_if(is.double)
  is_int <- function(v) suppressWarnings(sum(abs(v - as.integer(v)), na.rm = T) == 0)
  
  if(ncol(df_dbl) > 0){
    # initial check 
    int_ind_init <- which(sapply(df_dbl[1:min(100, nrow(df_dbl)), ], is_int))
    if(length(int_ind_init) > 0){
      # full check on subset
      int_ind <- which(sapply(df_dbl[, int_ind_init], is_int))
      if(length(int_ind) > 0){
        names_to_convert <- colnames(df_dbl[, int_ind_init])[int_ind]
      }
    }
  }
  return(names_to_convert)
}


check_char_to_num <- function(df, target = NULL){
  df <- df[-match(target, colnames(df))]
  check_df_cols(df)
  df_character <- df %>% select_if(is.character)
  # function to check coercibility to character
  is_num <- function(v, check = FALSE){
    # try first few rows
    v_num <- suppressWarnings(as.numeric(v))
    numnum <- try(sum(is.na(v_num)) - sum(is.na(v)), silent = T) == 0
    if(numnum) return(v_num) else return(NULL)
  }
  if(ncol(df_character) > 0){
    names_to_convert <- c()
    for(i in 1:ncol(df_character)){
      col_name_i <- colnames(df_character)[i]
      # minimal check
      check_num1 <- is_num(unlist(df_character[1:min(100, nrow(df_character)), i]))
      if(!is.null(check_num1)){
        # try a bigger check of all rows
        check_num2 <- is_num(df_character[, i])
        if(!is.null(check_num2)){
          names_to_convert <- c(names_to_convert, col_name_i)
        }
      }
    }
  }
  return(names_to_convert)
}


convert_num_to_int <- function(df, names_to_convert){
  if(length(names_to_convert) > 0){
    int_df <- match(names_to_convert, colnames(df))
    for(i in int_df) df[ , i] <- suppressWarnings(as.integer(df[[i]]))
  }
  invisible(df)
}
convert_char_to_num <- function(df, names_to_convert){
  if(length(names_to_convert) > 0){
    num_df <- match(names_to_convert, colnames(df))
    for(i in num_df) df[ , i] <- suppressWarnings(as.double(df[[i]]))
  }
  invisible(df)
}
alastairrushworth/mlblitz documentation built on Nov. 1, 2019, 9:06 p.m.