#' Descriptive statistics
#'
#' Returns descriptive statistics for a numeric vector.
#'
#' @param vector a numeric vector
#' @param output_type if \code{output_type = "vector"}, return a vector
#' of descriptive statistics; if \code{output_type = "dt"}, return a
#' data.table of descriptive statistics (default = "vector")
#' @param sigfigs number of significant digits to round to (default = 3)
#' @param se_of_mean logical. Should the standard errors around
#' the mean be included in the descriptive stats?
#' (default = FALSE)
#' @param ci logical. Should 95% CI be included in the descriptive stats?
#' (default = FALSE)
#' @param pi logical. Should 95% PI be included in the descriptive stats?
#' (default = FALSE)
#' @param skewness logical. Should the skewness statistic be included
#' in the descriptive stats?
#' (default = FALSE)
#' @param kurtosis logical. Should the kurtosis statistic be included
#' in the descriptive stats?
#' (default = FALSE)
#' @param notify_na_count if \code{TRUE}, notify how many observations
#' were removed due to missing values. By default, NA count will be printed
#' only if there are any NA values.
#' @param print_dt if \code{TRUE}, print the descriptive stats data.table
#' @return if \code{output_type = "vector"}, the output will be a
#' named numeric vector of descriptive statistics;
#' if \code{output_type = "dt"}, the output will be data.table of
#' descriptive statistics.
#' @examples
#' desc_stats(1:100)
#' desc_stats(1:100, ci = TRUE, pi = TRUE, sigfigs = 2)
#' desc_stats(1:100, se_of_mean = TRUE,
#' ci = TRUE, pi = TRUE, sigfigs = 2,
#' skewness = TRUE, kurtosis = TRUE)
#' desc_stats(c(1:100, NA))
#' example_dt <- desc_stats(vector = c(1:100, NA), output_type = "dt")
#' example_dt
#' @export
desc_stats <- function(
vector = NULL,
output_type = "vector",
sigfigs = 3,
se_of_mean = FALSE,
ci = FALSE,
pi = FALSE,
skewness = FALSE,
kurtosis = FALSE,
notify_na_count = NULL,
print_dt = FALSE) {
# deal with NA values
v_no_na <- vector[!is.na(vector)]
na_count <- length(vector) - length(v_no_na)
# by default, notify only if NA values are present
if (is.null(notify_na_count)) {
notify_na_count <- ifelse(na_count > 0, TRUE, FALSE)
}
if (notify_na_count == TRUE) {
message(paste0(
"\n", na_count,
" observation(s) were removed due to missing values.\n"
))
}
# get stats
n <- length(v_no_na)
mean <- mean(v_no_na)
sd <- stats::sd(v_no_na)
median <- stats::median(v_no_na)
min <- min(v_no_na)
max <- max(v_no_na)
if (ci == TRUE) {
ci_95_ll <- tryCatch(
as.numeric(stats::t.test(v_no_na)[["conf.int"]][1]),
warning = function(w) NA_real_, error = function(e) NA_real_)
ci_95_ul <- tryCatch(
as.numeric(stats::t.test(v_no_na)[["conf.int"]][2]),
warning = function(w) NA_real_, error = function(e) NA_real_)
}
if (pi == TRUE) {
pi_95_ll <- tryCatch(
as.numeric(mean(v_no_na) + stats::sd(v_no_na) *
stats::qt(0.025, length(v_no_na) - 1)),
warning = function(w) NA_real_, error = function(e) NA_real_)
pi_95_ul <- tryCatch(
as.numeric(mean(v_no_na) + stats::sd(v_no_na) *
stats::qt(0.975, length(v_no_na) - 1)),
warning = function(w) NA_real_, error = function(e) NA_real_)
}
# stats to report
stats_to_report <- list(
n = n,
mean = mean,
sd = sd,
median = median,
min = min,
max = max)
# add se of mean
if (se_of_mean == TRUE) {
stats_to_report <- c(
stats_to_report,
se_of_mean = kim::se_of_mean(v_no_na, notify_na_count = FALSE))
}
# add ci
if (ci == TRUE) {
stats_to_report <- c(
stats_to_report,
ci_95_ll = ci_95_ll,
ci_95_ul = ci_95_ul)
}
# add pi
if (pi == TRUE) {
stats_to_report <- c(
stats_to_report,
pi_95_ll = pi_95_ll,
pi_95_ul = pi_95_ul)
}
# add skewness
if (skewness == TRUE) {
stats_to_report <- c(
stats_to_report,
skewness = kim::skewness(v_no_na))
}
# add kurtosis
if (kurtosis == TRUE) {
stats_to_report <- c(
stats_to_report,
kurtosis = kim::kurtosis(v_no_na))
}
# stats in a data table format
dt <- data.table::setDT(stats_to_report)
# round
stats_to_round <- c(
"mean", "sd", "median", "se_of_mean", "ci_95_ll", "ci_95_ul",
"pi_95_ll", "pi_95_ul", "skewness", "kurtosis")
for (j in stats_to_round) {
if (j %in% names(dt)) {
data.table::set(
dt, j = j,
value = kim::round_flexibly(dt[[j]], sigfigs))
}
}
# print the data table
if (print_dt == TRUE) {
print(dt)
cat("\n")
}
# return data table
if (output_type == "dt") {
invisible(dt)
} else {
output_vector <- vapply(dt, as.numeric, FUN.VALUE = numeric(1L))
return(output_vector)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.