R/prepare.R

Defines functions prepare

prepare <- function(df, prepared_df = NULL, target = NULL){
  
  # if preparation is not provided, then cook one
  if(is.null(prepared_df)){
    # check if any character fields can be converted to numeric
    char_to_num_fields <- check_char_to_num(df, target = target)
    df <- convert_char_to_num(df, char_to_num_fields)
    # check if any numeric fields can be converted to integers
    num_to_int_fields <- check_num_to_int(df, target = target)
    df <- convert_num_to_int(df, num_to_int_fields)
    # check if any columns with high missingness can be dropped
    drop_fields_na <- check_na(df, target = target)
    df <- drop_na(df, drop_fields_na)
    
    prepare_recipe <- list(char_to_num_fields = char_to_num_fields, 
                           num_to_int_fields = num_to_int_fields, 
                           drop_fields_na = drop_fields_na)
  } else {
    # otherwise extact the recipe
    prepare_recipe <- attr(prepared_df, "prepare_recipe")
    df <- convert_char_to_num(df, prepare_recipe$char_to_num_fields)
    df <- convert_num_to_int(df, prepare_recipe$num_to_int_fields)
    df <- drop_na(df, prepare_recipe$drop_fields_na)
  }
  
  # return the updated dataframe
  attr(df, "prepare_recipe") <- prepare_recipe
  return(df)
}
alastairrushworth/mlblitz documentation built on Nov. 1, 2019, 9:06 p.m.