R/adjust_grid_for_regression.r

#' @title adjust_grid_for_regression.r
#'
#' @description adjusts a \code{\link[base]{data.frame}} to be more conducive to dredging. It automatically:
#' \enumerate{
#'   \item converts \code{\link[base]{character}} columns into \code{\link[base]{factor}} columns
#'   \item applies \code{\link[mnmacros]{apply_bcskew0}} to \code{\link[base]{numeric}} and \code{\link[base]{integer}} columns
#'   \item applies \code{\link[base]{scale}} to \code{\link[base]{numeric}} and \code{\link[base]{integer}} columns
#' }
#'
#' @export
#'
#' @param data a \code{\link[base]{data.frame}} to operate over
#' @return the adjusted \code{\link[base]{data.frame}}
#'
#' @author Mark Newman, \email{mark@trinetteandmark.com}
#' @keywords utilities
#' @family utilities
#'
#' @examples
#'   \dontshow{
#'     library(magrittr)
#'     library(mndredge) }
#'   set.seed(0)
#'   data.frame(
#'     a = 1:10,
#'     b = rep("a", 10),
#'     c = rnorm(10, mean = 4),
#'     stringsAsFactors = FALSE) %>%
#'     adjust_grid_for_regression()
#'
adjust_grid_for_regression <- function(data) {
  
  column_names <- data %>% colnames()
  character_column_filter <- function(x) { x == "character" }
  character_columns <- column_names[data %>% sapply(class) %>% sapply(character_column_filter)]
  character_column_count <- character_columns %>% length()
  number_column_filter <- function(x) { x == "numeric" || x == "integer" }
  number_columns <- column_names[data %>% sapply(class) %>% sapply(number_column_filter)]
  number_column_count <- number_columns %>% length()
  
  adjusted_data <- data
  
  if(character_column_count > 0)
    for(i in 1:character_column_count) {
      adjusted_data[[character_columns[i]]] <-
        adjusted_data[[character_columns[i]]] %>%
        factor()
    }
  
  n <- data %>% nrow()
  if(number_column_count > 0)
    for(i in 1:number_column_count) {
      v <-
        adjusted_data[[number_columns[i]]] %>%
        unique() %>%
        length()
      if(v > 1) {
        if(n >= 8) {
          adjusted_data[[number_columns[i]]] <-
            adjusted_data[[number_columns[i]]] %>%
            apply_bcskew0()
        }
        if(n >= 2) {
          adjusted_data[[number_columns[i]]] <-
            adjusted_data[[number_columns[i]]] %>%
            scale() %>%
            .[,1]
      }}}
  
  adjusted_data
}
markanewman/mndredge documentation built on May 9, 2019, 5:52 a.m.