R/helpers.R

Defines functions df_infos Q1 Q3 Q1.numeric Q3.numeric Q1.Date Q3.Date sum_na prop_na is_quantitative is_categorical var_type dispatch_bivar fact_reorder_freq

pkg_env <- new.env()
pkg_env$univar_color <- "#337ab7"

df_infos <- function(x) {
  # general description of the data frame
  df <- tibble::tibble(
    rows = dim(x)[1],
    columns = dim(x)[2],
    complete = sum(complete.cases(x))
  )
  df <- dplyr::mutate(df, prop_complete = complete / rows)

  # description of the variables
  vars <- data.frame()

  for (name in colnames(x)) {
    line <- list(name = name,
                 type = class(x[[name]]),
                 missing = sum(is.na(x[[name]])))

    vars <- rbind(vars, line, stringsAsFactors = FALSE)
  }

  vars$prop_missing <- vars$missing / df$rows

  list("df" = df, "vars" = vars)
}

Q1 <- function(x, ...) UseMethod("Q1", x)
Q3 <- function(x, ...) UseMethod("Q3", x)

Q1.numeric <- function(x, ...) quantile(x, probs = 0.25, na.rm = T, names = F)
Q3.numeric <- function(x, ...) quantile(x, probs = 0.75, na.rm = T, names = F)

Q1.Date <- function(x, ...) as.Date(Q1(as.numeric(x)), origin = "1970-01-01")
Q3.Date <- function(x, ...) as.Date(Q3(as.numeric(x)), origin = "1970-01-01")

sum_na <- function(x, ...) sum(is.na(x))
prop_na <- function(x, ...) sum(is.na(x)) / length(x)

is_quantitative <- function(x) is.numeric(x) | is(x, "Date")
is_categorical <- function(x) is.factor(x) | is.character(x) | is.logical(x)

var_type <- function(x) {
  if (is_quantitative(x)) v_type <- "quantitative"
  else if (is_categorical(x)) v_type <- "categorical"
  else {
    stop("This variable is not defined as categorical or quantitative.
          Report an issue on github if need be.")
  }
  
  v_type
}

dispatch_bivar <- function(x, y, f_type) {
  # get the type of x and y (categorical or quantitative)
  x_type <- var_type(x)
  y_type <- var_type(y)
  
  # dispatch to the corect function
  if (x_type != y_type) {
    # when they are of a different type
    # make sure x is categorical and y quantitative
    if (x_type == "quantitative") {
      tmp <- x
      x <- y
      y <- tmp
    } 
    f <- paste0(f_type, "_bivar_both")
  } else {
    if (x_type == "quantitative") {
      f <- paste0(f_type, "_bivar_quant")
    } else {
      f <- paste0(f_type, "_bivar_categ")
    }
  }
  
  f <- get(f)
  list(x, y, f)
}

#' Reassign levels in a factor using frequency
#' 
#' @param x a factor
fact_reorder_freq <- function(x) {
  reorder(x, x, FUN = function(x) -length(x))
}
AdrienLeGuillou/descriptor documentation built on May 22, 2019, 7:55 p.m.