R/descriptive_stats_by.R

Defines functions descriptive_stats_by

Documented in descriptive_stats_by

#' Compute descriptive statistics by group
#'
#' @description This function provides a range of descriptive statistics for one single numeric variable or a series of numeric variables computed separately by a specified group that are relevant for item analysis.
#' @param .data A data frame.
#' @param ... \code{<data-masking>} The name/s or column number/s of the numeric variable to compute the descriptive statistics for.
#' @param group The name of the grouping variable.
#'
#'
#' @return A tibble grouped by \code{group} containing the following statistics:
#' \itemize{
#'  \item{`n`} {Number of valid observations}
#'  \item{`miss`} {Percentage of missings}
#'  \item{`sd`} {Standard deviation}
#'  \item{`min`} {Minimum}
#'  \item{`p25`} {25th percentile}
#'  \item{`median`} {Median}
#'  \item{`p75`} {75th percentile}
#'  \item{`max`} {Maximum}
#'  \item{`skewness`} {Skewness}
#'  \item{`kurtosis`} {Kurtosis}
#' }
#'
#' @export
#'
#'
#' @importFrom magrittr %>%
#' @examples
#' descriptive_stats_by(studach, gender:read, group = ts)
descriptive_stats_by <- function(.data, ..., group) {

  fn <- name <- value <- NULL

  cols <- rlang::quos(...)
  group <- rlang::enquo(group)

  out <- dplyr::group_by(.data, !!group)
  out <- dplyr::summarize(out,
                          dplyr::across((!!!cols),
                                        list(n = ~ sum(!is.na(.x)),
                                             mean = ~ mean(.x, na.rm = T),
                                             sd = ~ sd(.x, na.rm = T),
                                             min = ~ min(.x, na.rm = T),
                                             p25 = ~ quantile(.x, probs = .25, na.rm = T),
                                             median = ~ median(.x, na.rm = T),
                                             p75 = ~ quantile(.x, probs = .75, na.rm = T),
                                             max = ~ max(.x, na.rm = T),
                                             skewness = ~ moments::skewness(.x, na.rm = T),
                                             kurtosis = ~ moments::kurtosis(.x, na.rm = T)),
                                        .names = c("{.fn}.{.col}")))
  out <- tidyr::pivot_longer(out, -(!!group))
  out <- dplyr::mutate(out, fn = stringr::str_extract(name, "^[^.]+"), name = stringr::str_extract(name, "(?<=[.]).*"))
  out <- tidyr::pivot_wider(out, names_from = fn, values_from = value)
  out <- dplyr::rename(out, variable = name)
  out
}
sophiestallasch/mdRtools documentation built on Jan. 20, 2022, 12:09 a.m.