Nothing
#' Calculates descriptive statistics for a continuous variable
#'
#'
#' @description
#' This function calculates basic descriptive statistics for a numerical
#' variable. It can calculate an overall summary, or broken down by
#' the levels of a grouping variable. Inputs can be summary data,
#' vectors, or a data frame.
#'
#'
#' @details
#' If equal variance is not assumed, each group is treated independently. In
#' that case, the estimated mean, CI, and SE is from [statpsych::ci.mean1()],
#' and the estimated median, CI, and SE is from [statpsych::ci.median1()]. If
#' equal variance is assumed, each group CI is calculated as with respect to all
#' group data, using [statpsych::ci.lc.mean.bs()] and
#' [statpsych::ci.lc.median.bs()]
#'
#'
#' @param data - for raw data, a data frame or tibble
#' @param outcome_variable - for raw data, either a vector containing numerical
#' data or the name of a data-frame column containing a factor
#' @param grouping_variable - optional; for raw data either a vector containing
#' a factor or the name of a data frame column containing a factor
#' @param means For summary data - A vector of 1 or more numerical means
#' @param sds For summary data - A vector of standard deviations, same length as
#' means
#' @param ns For summary data - A vector of sample sizes, same length as means
#' @param grouping_variable_levels For summary data - An optional vector of
#' group labels, same length as means. If not passed, auto-generated.
#' @param outcome_variable_name Optional friendly name for the outcome variable.
#' Defaults to 'My Outcome Variable'. Ignored if a data-frame is passed,
#' this argument is ignored.
#' @param grouping_variable_name Optional friendly name for the grouping
#' variable. If a data frame is passed, this argument is ignored.
#' @param conf_level The confidence level for the confidence interval. Given in
#' decimal form. Defaults to 0.95.
#' @param assume_equal_variance Defaults to FALSE
#'
#'
#' @return Returns a table of descriptive statistics
#' - **overview**
#' - *outcome_variable_name* -
#' - *grouping_variable_name* -
#' - *grouping_variable_level* -
#' - *mean* -
#' - *mean_LL* -
#' - *mean_UL* -
#' - *median* -
#' - *median_LL* -
#' - *median_UL* -
#' - *sd* -
#' - *min* -
#' - *max* -
#' - *q1* -
#' - *q3* -
#' - *n* -
#' - *missing* -
#' - *df* -
#' - *mean_SE* -
#' - *median_SE* -
#'
#'
#' @examples
#' # example code
#' esci::overview(data_latimier_3groups, "Test%", "Group")
#'
#'
#' @export
overview <- function(
data = NULL,
outcome_variable = NULL,
grouping_variable = NULL,
means = NULL,
sds = NULL,
ns = NULL,
grouping_variable_levels = NULL,
outcome_variable_name = "My Outcome Variable",
grouping_variable_name = NULL,
conf_level = 0.95,
assume_equal_variance = FALSE
) {
# I wanted this function to be flexible for different inputs
# But I also found use_method to be klunky and hard to follow.
# So this function is a self-rolled dispatcher--it inspects the
# arguments passed and then dispatches to handler functions.
# I guess time will tell if this is a reasonable thing to have done.
analysis_type <- "Undefined"
# Check to see if summary data has been passed
if (!is.null(means)) {
# Summary data is passed, so check to make sure raw data not included
if(!is.null(data)) stop(
"You have passed summary statistics,
so don't pass the 'data' parameter used for raw data.")
if(!is.null(grouping_variable)) stop(
"You have passed summary statistics,
so don't pass the 'grouping_variable' parameter used for raw data.")
if(!is.null(outcome_variable)) stop(
"You have passed summary statistics,
so don't pass the 'grouping_variable' parameter used for raw data.")
# Looks good, we can pass on to summary data
analysis_type <- "summary"
} else {
# Raw data has been passed, first sure summary data is not passed
if(!is.null(means)) stop(
"You have passed raw data,
so don't pass the 'means' parameter used for summary data.")
if(!is.null(sds)) stop(
"You have passed raw data,
so don't pass the 'sds' parameter used for summary data.")
if(!is.null(ns)) stop(
"You have passed raw data,
so don't pass the 'ns' parameter used for summary data.")
if(!is.null(grouping_variable_levels)) stop(
"You have passed raw data,
so don't pass the 'grouping_variable_levels' parameter used for summary data.")
# Check grouping_variable -- if it is an unquoted column name
# turn it into a string and store back to grouping_variable
is_column_name <- try(grouping_variable, silent = TRUE)
if(is(is_column_name, "try-error")) {
grouping_variable_enquo <- rlang::enquo(grouping_variable)
grouping_variable_enquo_name <- try(
eval(rlang::as_name(grouping_variable_enquo)), silent = TRUE
)
if (!is(grouping_variable_enquo_name, "try-error")) {
# This only succeeds if the columns were passed unquoted
# So now replace grouping_variable with a quoted version
grouping_variable <- grouping_variable_enquo_name
}
}
# Now we have to figure out what type of raw data:
# could be tidy column names, string column names, or vectors
# We check to see if we have a tidy column name by trying to evaluate it
is_column_name <- try(outcome_variable, silent = TRUE)
if(is(is_column_name, "try-error")) {
# Column names have been passed, check if need to be quoted up
outcome_variable_enquo <- rlang::enquo(outcome_variable)
outcome_variable_quoname <- try(
eval(rlang::as_name(outcome_variable_enquo)), silent = TRUE
)
if (!is(outcome_variable_quoname, "try-error")) {
# This only succeeds if outcome_variable was passed unquoted
# Reset outcome_variable to be fully quoted
outcome_variable <- outcome_variable_quoname
}
# Ready to be analyzed as a list of string column names
analysis_type <- "data.frame"
} else if (is(outcome_variable, "numeric")) {
# At this stage, we know that outcome_variable was not a tidy column name,
# so it should be either a vector of raw data (class = numeric)
# or a vector of column names passed as strings
analysis_type <- "vector"
} else if (is(outcome_variable, "character")) {
# Ok, must have been string column names
if (length(outcome_variable) == 1) {
analysis_type <- "data.frame"
} else {
analysis_type <- "jamovi"
}
}
}
# At this point, we've figured out the type of data passed
# so we can dispatch
# I put all the dispatches here, at the end, to make it easier to
# update in case the underlying function parameters change
if(analysis_type == "data.frame") {
if (!is.null(grouping_variable)) {
grouping_variable <- grouping_variable
}
return(
overview.data.frame(
data = data,
grouping_variable = grouping_variable,
outcome_variable = outcome_variable,
conf_level = conf_level,
assume_equal_variance = assume_equal_variance
)
)
} else if (analysis_type == "jamovi") {
return(
overview.jamovi(
data = data,
grouping_variable = grouping_variable,
outcome_variables = outcome_variable,
conf_level = conf_level,
assume_equal_variance = assume_equal_variance
)
)
} else if (analysis_type == "summary") {
return(
overview.summary(
means = means,
sds = sds,
ns = ns,
grouping_variable_levels = grouping_variable_levels,
outcome_variable_name = outcome_variable_name,
grouping_variable_name = grouping_variable_name,
conf_level = conf_level,
assume_equal_variance = assume_equal_variance
)
)
} else if (analysis_type == "vector") {
if (is.null(grouping_variable_name)) {
grouping_variable_name <- deparse(substitute(grouping_variable))
}
if (outcome_variable_name == "My Outcome Variable") {
outcome_variable_name <- deparse(substitute(outcome_variable))
}
return(
overview.vector(
grouping_variable = grouping_variable,
outcome_variable = outcome_variable,
outcome_variable_name = outcome_variable_name,
grouping_variable_name = grouping_variable_name,
conf_level = conf_level,
assume_equal_variance = assume_equal_variance
)
)
}
stop("Something went wrong dispatching this function")
}
# Takes an overview table and fills in mean CI, df, se, and MoE
# If assume_equal_variance, uses a pooled sd and total df for estimating means
# Otherwise, just uses group sd and df for estimating each group mean
# Uses CI_mdiff_contrast_bs as the base function for calculating CIs
overview.base <- function(
overview_table,
conf_level = 0.95,
assume_equal_variance = FALSE
) {
# Input checks -------------------------
# This is the base function for generating an estimated contrast
# It expects:
# overview_table - has more than 1 row
# conf_level should be a numeric value > 0 and <1
# assume_equal_variance should be a logical value, TRUE or FALSE
# Check conf.level
esci_assert_type(conf_level, "is.numeric")
esci_assert_range(
conf_level,
lower = 0,
upper = 1,
lower_inclusive = FALSE,
upper_inclusive = FALSE
)
# Check assume_equal_variance
esci_assert_type(assume_equal_variance, "is.logical")
# Prep --------------------------------------------
n_means <- nrow(overview_table)
contrasts <- matrix(data = 0, nrow = n_means, ncol = n_means)
diag(contrasts) <- 1
statpsych_row <- if (assume_equal_variance | n_means == 1) 1 else 2
statpsych_cnames <- c("LL", "UL", "df", "SE")
esci_column_names <- c("mean_LL", "mean_UL", "df", "mean_SE")
statpsych_cnames_mdn <- c("LL", "UL")
esci_column_names_mdn <- c("median_LL", "median_UL")
# Analysis ----------------------------------------
if (n_means == 1) {
res <- as.data.frame(
wrapper_ci.mean (
alpha = 1 - conf_level,
m = overview_table$mean,
sd = overview_table$sd,
n = overview_table$n
)
)
res$df <- overview_table$n - 1
overview_table[1, esci_column_names] <- res[1, statpsych_cnames]
} else {
for (x in 1:n_means) {
res <- statpsych::ci.lc.mean.bs(
alpha = 1 - conf_level,
m = overview_table$mean,
s = overview_table$sd,
n = overview_table$n,
v = contrasts[x,]
)
overview_table[x, esci_column_names] <- res[statpsych_row, statpsych_cnames]
if (!is.null(overview_table$median_SE)) {
res_median <- statpsych::ci.lc.median.bs(
alpha = 1 - conf_level,
m = overview_table$median,
s = overview_table$median_SE,
v = contrasts[x, ]
)
overview_table[x, esci_column_names_mdn ] <- res_median[1, statpsych_cnames_mdn]
}
}
}
return(overview_table)
}
# Produces an overview table from summary data
overview.summary <- function(
means,
sds,
ns,
grouping_variable_levels = NULL,
outcome_variable_name = "My Outcome Variable",
grouping_variable_name = NULL,
conf_level = 0.95,
assume_equal_variance = FALSE
) {
# Input checks ---------------------------------------------------------
# This function expects:
# means - vector of numeric data with >2 elements, no NAs
# sds - vector of numeric data >0 of same length as means, no NAs
# ns - vector of integers >= 2, of same length as means, no NAs
# outcome_variable_name -
# grouping_variable_levels -
# if passed: vector of characters same length of means
# if not passed: auto-generated (Group1, Group2, etc.) and warning issued
# The base function will check:
# conf_level is >0 and <1
# assume_equal_variance is logical
# Check means
esci_assert_type(means, "is.numeric")
row_report <- esci_assert_vector_valid_length(
means,
lower = 1,
lower_inclusive = TRUE,
na.invalid = TRUE
)
cells <- row_report$total
# Check sds
esci_assert_type(sds, "is.numeric")
row_report <- esci_assert_vector_valid_length(
sds,
lower = cells,
upper = cells,
lower_inclusive = TRUE,
upper_inclusive = TRUE,
na.invalid = TRUE
)
for (sd_value in sds) {
esci_assert_type(sd_value, "is.numeric")
esci_assert_range(sd_value,
lower = 0,
lower_inclusive = TRUE)
}
# Check ns
esci_assert_type(ns, "is.numeric")
row_report <- esci_assert_vector_valid_length(
ns,
lower = cells,
upper = cells,
lower_inclusive = TRUE,
upper_inclusive = TRUE,
na.invalid = TRUE
)
for (n_value in ns) {
esci_assert_type(n_value, "is.numeric")
esci_assert_type(n_value, "is.whole.number")
esci_assert_range(n_value,
lower = 2,
lower_inclusive = TRUE)
}
# Check outcome_variable_name
esci_assert_type(outcome_variable_name, "is.character")
outcome_variable_names <- rep(outcome_variable_name, length(means))
# Check grouping_variable_name
if (!is.null(grouping_variable_name)) {
esci_assert_type(grouping_variable_name, "is.character")
} else {
grouping_variable_name = "My Grouping Variable"
}
grouping_variable_names <- rep(grouping_variable_name, length(means))
# Check grouping_variable_levels
if(!is.null(grouping_variable_levels)) {
esci_assert_type(grouping_variable_levels, "is.character")
row_report <- esci_assert_vector_valid_length(
grouping_variable_levels,
lower = length(means),
upper = length(means),
lower_inclusive = TRUE,
upper_inclusive = TRUE,
na.rm = FALSE
)
levels_passed <- TRUE
} else {
grouping_variable_levels <- rep("All", length(means))
levels_passed <- FALSE
}
overview_table <- data.frame(
outcome_variable_name = outcome_variable_names,
grouping_variable_name = grouping_variable_names,
grouping_variable_level = grouping_variable_levels,
mean = means,
mean_LL = NA,
mean_UL = NA,
sd = sds,
n = ns,
df = NA,
mean_SE = NA
)
if (!levels_passed) overview_table$grouping_variable_name <- NULL
if (!levels_passed) overview_table$grouping_variable_level <- NULL
return(
overview.base(
overview_table = overview_table,
conf_level = conf_level,
assume_equal_variance = assume_equal_variance
)
)
}
# Produces an overview table from vector data
overview.vector <- function(
outcome_variable,
grouping_variable = NULL,
outcome_variable_name = NULL,
grouping_variable_name = NULL,
conf_level = 0.95,
assume_equal_variance = TRUE
) {
# Input checks ----------------------------
if(is.null(outcome_variable_name)) {
outcome_variable_name <- deparse(substitute(outcome_variable))
} else {
# Check that it is a character
esci_assert_type(outcome_variable_name, "is.character")
}
# If no grouping variable, just summarize whole vector
if (is.null(grouping_variable)) {
grouping_variable <- rep("All", length(outcome_variable))
grouping_variable_name <- "My Grouping Variable"
group_passed <- FALSE
} else {
if(is.null(grouping_variable_name)) {
grouping_variable_name <- deparse(substitute(grouping_variable))
} else {
esci_assert_type(grouping_variable_name, "is.character")
}
group_passed <- TRUE
}
# Deal with NA values in grouping variable------------------------------------
grouping_variable_report <- esci_assert_vector_valid_length(
grouping_variable,
lower = 2,
lower_inclusive = FALSE)
grouping_variable_all <-grouping_variable_report$total
grouping_variable_valid <- grouping_variable_report$valid
grouping_variable_missing <- grouping_variable_report$missing
# Setup ---------------------
groups <- levels(addNA(grouping_variable))
outcome_variable_names <- rep(outcome_variable_name, length(groups))
grouping_variable_names <- rep(grouping_variable_name, length(groups))
# Build the overview table ---------------------------------------------------
overview_table <- data.frame(
outcome_variable_name = outcome_variable_names,
grouping_variable_name = grouping_variable_names,
grouping_variable_level = groups
)
overview_table[ , c("mean", "mean_SE_temp", "mean_LL", "mean_UL")] <- aggregate(
outcome_variable,
by = list(addNA(grouping_variable)),
FUN = wrap_ci_mean1,
drop = FALSE,
conf_level = conf_level,
na.rm = TRUE
)[, 2]
overview_table[ , c("median", "median_SE_temp", "median_LL", "median_UL")] <- aggregate(
outcome_variable,
by = list(addNA(grouping_variable)),
FUN = wrap_ci_median1,
drop = FALSE,
conf_level = conf_level,
na.rm = TRUE
)[, 2]
overview_table$sd <- aggregate(
outcome_variable,
by = list(addNA(grouping_variable)),
FUN = sd,
drop = FALSE,
na.rm = TRUE)[, 2]
overview_table$min <- aggregate(
outcome_variable,
by = list(addNA(grouping_variable)),
FUN = min,
drop = FALSE,
na.rm = TRUE)[, 2]
overview_table$max <- aggregate(
outcome_variable,
by = list(addNA(grouping_variable)),
FUN = max,
drop = FALSE,
na.rm = TRUE)[, 2]
overview_table[ , c("q1", "q3")] <-aggregate(
outcome_variable,
by = list(addNA(grouping_variable)),
FUN = quantile,
drop = FALSE,
probs = c(0.25, 0.75),
na.rm = TRUE)[, 2]
overview_table$n <- aggregate(
outcome_variable,
by = list(addNA(grouping_variable)),
drop = FALSE,
FUN = length)[, 2]
overview_table$missing <-aggregate(
outcome_variable,
by = list(addNA(grouping_variable)),
drop = FALSE,
function(x) { sum(is.na(x)) })[, 2]
overview_table$n <- overview_table$n - overview_table$missing
overview_table$df <- NA
overview_table$mean_SE <- overview_table$mean_SE_temp
overview_table$mean_SE_temp <- NULL
overview_table$median_SE <- overview_table$median_SE_temp
overview_table$median_SE_temp <- NULL
if (!group_passed) overview_table$grouping_variable_name <- NULL
if (!group_passed) overview_table$grouping_variable_level <- NULL
# Cleanup - Deal with invalid rows and missing data rows-----------------
if (nrow(overview_table[is.na(overview_table$n), ]) > 0) {
overview_table[is.na(overview_table$n), ]$n <- 0
}
if (nrow(overview_table[is.na(overview_table$missing), ]) > 0) {
overview_table[is.na(overview_table$missing), ]$missing <- 0
}
na_level <- NULL
if(overview_table[nrow(overview_table), "n"] == 0 & overview_table[nrow(overview_table), "missing"] == 0) {
overview_table <- head(overview_table, -1)
} else {
na_level <- "Missing"
while (na_level %in% overview_table$group) {
na_level <- paste(na_level, "*", sep ="")
}
overview_table[nrow(overview_table), "grouping_variable_level"] <- na_level
row.names(overview_table)[nrow(overview_table)] <- "missing"
}
overview_no_miss <- overview_table[row.names(overview_table) != 'missing', ]
overview_valid <- overview_no_miss[overview_no_miss$n > 1, ]
overview_valid <- overview.base(
overview_table = overview_valid,
conf_level = conf_level,
assume_equal_variance = assume_equal_variance
)
overview_all <- rbind(
overview_valid,
overview_table[row.names(overview_table) != "missing" & overview_table$n < 2, ],
overview_table[row.names(overview_table) == "missing", ]
)
return(overview_all)
}
# Overview from a data frame
overview.data.frame <- function(
data,
outcome_variable,
grouping_variable = NULL,
conf_level = 0.95,
assume_equal_variance = TRUE
) {
# Input Checks -------------------------------------------------------------
# This function expects:
# data to be a data frame
# grouping_variable to be null a factor with more than 2 valid rows
# outcome_variable to be a numeric column in data, with more than 2 rows
esci_assert_type(data, "is.data.frame")
if (!is.null(grouping_variable)) {
esci_assert_valid_column_name(data, grouping_variable)
esci_assert_column_type(data, grouping_variable, "is.factor")
esci_assert_column_has_valid_rows(
data,
grouping_variable,
lower = 2,
na.rm = TRUE
)
}
# Validate this outcome variable
esci_assert_valid_column_name(data, outcome_variable)
esci_assert_column_type(data, outcome_variable, "is.numeric")
esci_assert_column_has_valid_rows(
data,
outcome_variable,
lower = 2,
na.rm = TRUE
)
if (!is.null(grouping_variable)) {
grouping_variable_name <- grouping_variable
grouping_variable <- data[[grouping_variable]]
} else {
grouping_variable_name <- NULL
}
# Now pass along to the .vector version of this function
overview_table <- overview.vector(
outcome_variable = data[[outcome_variable]],
grouping_variable = grouping_variable,
outcome_variable_name = outcome_variable,
grouping_variable_name = grouping_variable_name,
conf_level = conf_level,
assume_equal_variance = assume_equal_variance
)
return(overview_table)
}
# Overview from a data frame with a list of outcome variables
overview.jamovi <- function(
data,
outcome_variables,
grouping_variable = NULL,
conf_level = 0.95,
assume_equal_variance = TRUE
) {
res <- NULL
# Cycle through the list of columns;
# for each call estimate_mean_one.character, which handles 1 column
for (outcome_variable in outcome_variables) {
# Now pass along to the data_frame version
res <- rbind(
res,
overview.data.frame(
data = data,
outcome_variable = outcome_variable,
grouping_variable = grouping_variable,
conf_level = conf_level,
assume_equal_variance = assume_equal_variance
)
)
} # Finish cycle through variables
return(res)
}
wrap_ci_median1 <- function(
x,
conf_level,
na.rm = TRUE,
drop = FALSE
) {
if(length(na.omit(x)) < 2) return(c(median(x), NA, NA, NA))
res <- wrapper_ci.median(
alpha = 1 - conf_level,
y = if(na.rm) x[!is.na(x)] else x
)
return(res)
}
wrap_ci_mean1 <- function(
x,
conf_level,
na.rm = TRUE,
drop = FALSE
) {
if(length(x) < 2) return(c(mean(x), NA, NA, NA))
res <- wrapper_ci.mean(
alpha = 1 - conf_level,
m = mean(x, na.rm = na.rm),
sd = sd(x, na.rm = na.rm),
n = if(na.rm) length(x[!is.na(x)]) else length(x)
)
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.