R/univar_summaries.R

Defines functions describe describe.data.frame desc_quant describe.numeric describe.Date describe.factor describe.character describe.logical describe_grp

describe <- function(x, ...) UseMethod("describe", x)

describe.data.frame <- function(x) {
  out <- list()

### description of the data frame
  infos <- df_infos(x)
  df <- infos$df
  vars <- infos$vars

### individual description of variables
  
  desc_var_type <- function(names) {
    Reduce(
      function(init, name) {
        desc <- dplyr::mutate(describe(x[[name]]), name = name)
        
        dplyr::bind_rows(
          init, 
          dplyr::select(desc, name, dplyr::everything())
        )
      }, 
      names, 
      init = tibble::tibble()
    )
  }
  
  # vectors with numerical and categorical variables
  num_vars <- colnames(x)[vapply(x, is.numeric, T)]
  date_vars <- colnames(x)[vapply(x, function(x) is(x, "Date"), T)]
  quali_vars <- colnames(x)[vapply(x, is_categorical, T)]

  quantitatives <- desc_var_type(num_vars)
  temporals <- desc_var_type(date_vars)
  categoricals <- desc_var_type(quali_vars)
  
  # output
  list("df" = df, "vars" = vars,
       "quantitatives" = quantitatives,
       "categoricals" = categoricals,
       "temporals" = temporals)
}


#' Make quantitative summary statistics
#' 
#' @param x a `quantitative` vector to describe
#' @param default should the default metric be used
#' @param funs a list of named functions to describe `x`. Each function should 
#'        return a vector of length one
desc_quant <- function(x, default = TRUE, funs = list()) {
  default_fun <- list(
    n = length,
    mean = function(x) mean(x, na.rm = TRUE), 
    sd = function(x) sd(x, na.rm = TRUE),
    min = function(x) min(x, na.rm = TRUE), 
    q1 = function(x) Q1(x, na.rm = TRUE), 
    median = function(x) median(x, na.rm = TRUE), 
    q3 = function(x) Q3(x, na.rm = TRUE), 
    max = function(x) max(x, na.rm = TRUE),
    sum_na = function(x) sum_na(x, na.rm = TRUE), 
    prop_na = function(x) prop_na(x, na.rm = TRUE)
  )
  
  if (default) funs <- c(default_fun, funs)
  
  out_list <- lapply(funs, function(f) f(x))
  tibble::as_tibble(out_list, optional = TRUE)
}

describe.numeric <- function(x) desc_quant(x)

describe.Date <- function(x) desc_quant(x)

describe.factor <- function(x) {
  x <- fact_reorder_freq(x)

  tibble::tibble(
    n = length(x),
    levels = length(levels(x)),
    mode = levels(x)[1],
    sum_na = sum_na(x),
    prop_na = prop_na(x)
  )
}


describe.character <- function(x) describe(as.factor(x))

describe.logical <- function(x) describe(as.factor(x))

describe_grp <- function(x, grp) {
  lvls <- unique(grp)
  # out <- lapply(lvls, function(grp_lvl) x[which(grp == grp_lvl)])
  # out <- lapply(lvls, function(grp_lvl) describe(x[which(grp == grp_lvl)]))
  out <- Reduce(
    function(init, grp_lvl) {
      elt <- dplyr::mutate(describe(x[which(grp == grp_lvl)]), grp_lvl = grp_lvl)
      dplyr::bind_rows(init, elt)
    }, 
    lvls, 
    init = tibble::tibble()
  )
  
  dplyr::select(out, grp_lvl, dplyr::everything())
}
AdrienLeGuillou/descriptor documentation built on May 22, 2019, 7:55 p.m.