R/df_sum.R

#' Summary of a data.frame
#'
#' Calculate summary statistics from quant. variables and retur a data.frame.
#' @param df An object of class \code{data.frame}
#' @param groups A char vector of var names (used for grouping)
#' @param vars A char vector of var names
#' @param logic If \code{FALSE} (default), logical variables will be excluded
#' @param stats A char vector of function names
#' @importFrom dplyr %>% select_ group_by_ summarise_all
#' @importFrom tidyr gather_ spread separate
#' @export

df_sum = function(df, groups = NULL, vars = NULL, logic = FALSE,
                  stats  = c("mean", "median", "var", "sd", "length")) {

  stopifnot(inherits(df, "data.frame"))

  if (logic)
    classes = c("integer", "numeric", "logical")
  else
    classes = c("integer", "numeric")

  if (is.null(vars)) {

    vars = names(df)[
      vapply(df, inherits, what = classes, logical(1))
      ]
    vars = vars[!vars %in% groups]

  } else {

    vars = vars[vapply(df[, vars], inherits, what = classes, logical(1))]

  }

  df_cols = c(groups, vars)

  df_group = df %>%
    select_(.dots = df_cols) %>%
    group_by_(.dots = groups)

  df_stats = df_group %>%
    summarise_all(.funs = stats)

  if (length(stats) == 1) {

    df_long = df_stats %>%
      gather_(key_col = "variable", value_col = "value",
              gather_cols = vars) %>%
      mutate(stat = stats)

  } else {

    df_long = df_stats %>%
      gather_(key_col = "variable", value_col = "value",
              gather_cols = combine_vectors(vars, stats)) %>%
      separate(variable, into = c("variable", "stat"))
  }

  spread(df_long, stat, value)
}

#' Combine vectors
#'
#' Create a new vectors that contains all combinations from initial vectors.
#' @param ... Initial vectors
#' @param sep Separator (char scalar)
#' @export
#' @examples
#' vec1 = c("look", "step", "jump")
#' vec2 = c("left", "right")
#' combine_vectors(vec1, vec2)

combine_vectors = function(..., sep = "_") {

  df = expand.grid(
    ...,
    KEEP.OUT.ATTRS = FALSE,
    stringsAsFactors = FALSE
  )

  tidyr::unite(df, "x", 1:ncol(df), sep = sep)[["x"]]
}
jchrom/jcmisc documentation built on May 18, 2019, 10:23 p.m.