R/descriptive_stats.R

Defines functions descriptive_stats

Documented in descriptive_stats

#' Compute descriptive statistics
#'
#' @description This function provides a range of descriptive statistics for one single numeric variable or a series of numeric variables that are relevant for item analysis.
#' @param .data A data frame.
#' @param ... \code{<data-masking>} The name(s) or column number(s) of the numeric variable(s) to compute the descriptive statistics for.
#'
#' @return A tibble containing the following statistics:
#' \itemize{
#'  \item{`n`} {Number of valid observations}
#'  \item{`miss`} {Percentage of missings}
#'  \item{`sd`} {Standard deviation}
#'  \item{`min`} {Minimum}
#'  \item{`p25`} {25th percentile}
#'  \item{`median`} {Median}
#'  \item{`p75`} {75th percentile}
#'  \item{`max`} {Maximum}
#'  \item{`skewness`} {Skewness}
#'  \item{`kurtosis`} {Kurtosis}
#' }
#'
#' @export
#'
#' @examples
#' descriptive_stats(studach, read)
#' descriptive_stats(studach, 9:12)
#' descriptive_stats(studach, where(is.numeric))
descriptive_stats <- function(.data, ...) {

  fn <- name <- value <- NULL
  cols <- rlang::quos(...)

  out <- dplyr::summarize(.data, dplyr::across((!!!cols), list(n = ~ sum(!is.na(.x)),
                                                               miss = ~ sum(is.na(.x))/length(.x),
                                                               mean = ~ mean(.x, na.rm = T),
                                                               sd = ~ stats::sd(.x, na.rm = T),
                                                               min = ~ min(.x, na.rm = T),
                                                               p25 = ~ stats::quantile(.x, probs = .25, na.rm = T),
                                                               median = ~ stats::median(.x, na.rm = T),
                                                               p75 = ~ stats::quantile(.x, probs = .75, na.rm = T),
                                                               max = ~ max(.x, na.rm = T),
                                                               skewness = ~ moments::skewness(.x, na.rm = T),
                                                               kurtosis = ~ moments::kurtosis(.x, na.rm = T)),
                                               .names = c("{.fn}.{.col}")))
  out <- tidyr::pivot_longer(out, dplyr::everything())
  out <- dplyr::mutate(out, fn = stringr::str_extract(name, "^[^.]+"), name = stringr::str_extract(name, "(?<=[.]).*"))
  out <- tidyr::pivot_wider(out, names_from = fn, values_from = value)
  out <- dplyr::rename(out, variable = name)
  out
}
sophiestallasch/mdRtools documentation built on Jan. 20, 2022, 12:09 a.m.