#' Compute descriptive statistics
#'
#' @description This function provides a range of descriptive statistics for one single numeric variable or a series of numeric variables that are relevant for item analysis.
#' @param .data A data frame.
#' @param ... \code{<data-masking>} The name(s) or column number(s) of the numeric variable(s) to compute the descriptive statistics for.
#'
#' @return A tibble containing the following statistics:
#' \itemize{
#' \item{`n`} {Number of valid observations}
#' \item{`miss`} {Percentage of missings}
#' \item{`sd`} {Standard deviation}
#' \item{`min`} {Minimum}
#' \item{`p25`} {25th percentile}
#' \item{`median`} {Median}
#' \item{`p75`} {75th percentile}
#' \item{`max`} {Maximum}
#' \item{`skewness`} {Skewness}
#' \item{`kurtosis`} {Kurtosis}
#' }
#'
#' @export
#'
#' @examples
#' descriptive_stats(studach, read)
#' descriptive_stats(studach, 9:12)
#' descriptive_stats(studach, where(is.numeric))
descriptive_stats <- function(.data, ...) {
fn <- name <- value <- NULL
cols <- rlang::quos(...)
out <- dplyr::summarize(.data, dplyr::across((!!!cols), list(n = ~ sum(!is.na(.x)),
miss = ~ sum(is.na(.x))/length(.x),
mean = ~ mean(.x, na.rm = T),
sd = ~ stats::sd(.x, na.rm = T),
min = ~ min(.x, na.rm = T),
p25 = ~ stats::quantile(.x, probs = .25, na.rm = T),
median = ~ stats::median(.x, na.rm = T),
p75 = ~ stats::quantile(.x, probs = .75, na.rm = T),
max = ~ max(.x, na.rm = T),
skewness = ~ moments::skewness(.x, na.rm = T),
kurtosis = ~ moments::kurtosis(.x, na.rm = T)),
.names = c("{.fn}.{.col}")))
out <- tidyr::pivot_longer(out, dplyr::everything())
out <- dplyr::mutate(out, fn = stringr::str_extract(name, "^[^.]+"), name = stringr::str_extract(name, "(?<=[.]).*"))
out <- tidyr::pivot_wider(out, names_from = fn, values_from = value)
out <- dplyr::rename(out, variable = name)
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.