##' Calculate Custom Summary Statistics
##'
##' This function computes a selection of summary statistics on a data frame which
##' contains response variables and categorical variables. If there is only one
##' response variable, the results are returned in a data frame. For more than one
##' response variable, the results are returned in a list of data frames. See the examples.
##'
##' @param data A data frame containing the variables.
##'
##' @param groupVars Character. A vector of the categorical variable names.
##'
##' @param respVars Character. A vector of the response variable names.
##'
##' @param ... Other parameters to be passed downstream.
##'
##' @param ci Numeric. The confidence interval desired.
##'
##' @return Either a data frame (one response variable) or a list of data frames
##' (more than one response variable). Each data frame row has summary statistics
##' for a given \code{groupVars}. The summary includes the number of observations,
##' the standard deviation and standard error, the mean, and upper and lower
##' confidence intervals.
##'
##' @name sumStatDeluxe
##' @rdname sumStatDeluxe
##' @export
##' @keywords summary
##'
##' @examples
##' require('plyr')
##'
##' ### One response variable returns a data frame:
##'
##' tst <- sumStatDeluxe(chickwts, groupVars = "feed", respVars = "weight")
##' tst
##'
##' ### Two response variables returns a list of data frames:
##'
##' tst <- sumStatDeluxe(iris, groupVars = "Species",
##' respVars = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"))
##' tst
##'
##' ### Two categorical variables, one response:
##'
##' tst <- sumStatDeluxe(warpbreaks, groupVars = c("wool", "tension"), respVars = "breaks")
##'
##' # One categorical, multiple response variables:
##'
##' tst <- sumStatDeluxe(airquality, groupVars = "Month",
##' respVars = c("Ozone", "Solar.R", "Wind"))
##' tst
##'
sumStatDeluxe <- function(data = NULL, groupVars = NULL, respVars = NULL, ci = 0.95, ...) {
# read ?var carefully for role of na.rm - it must always be true for numeric vectors
if (!requireNamespace("plyr", quietly = TRUE)) {
stop("You need to install package plyr to use this function")
}
# local function to compute std error
se <- function(x) { stats::sd(x, na.rm = TRUE)/sqrt(length(stats::na.omit(x))) }
# local function to compute lower ci of mean
Lci <- function(x) {
m <- mean(stats::na.omit(x))
s <- se(x)
f <- stats::qt(ci/2 + .5, length(stats::na.omit(x)))
l <- m - f*s
}
# local function to compute upper ci of mean
Uci <- function(x) {
m <- mean(stats::na.omit(x))
s <- se(x)
f <- stats::qt(ci/2 + .5, length(stats::na.omit(x)))
l <- m + f*s
}
# local function - acts on a vector, returns a vector
processVec <- function(df, i) {
M <- mean(df[,i], na.rm = TRUE)
S <- stats::sd(df[,i], na.rm = TRUE)
SE <- se(df[,i])
N <- length(stats::na.omit(df[,i]))
L <- Lci(df[,i])
U <- Uci(df[,i])
res <- c(N, round(S, 2), round(SE,2),
round(L, 2), round(M, 2), round(U,2))
}
# local function - acts on df, returns a df
# used when length(respVars >= 2)
processDF <- function(df, respVars) {
df <- df[,respVars]
ans <- processVec(df, 1)
for (n in 2:ncol(df)) {
ans <- rbind(ans, processVec(df, n))
}
ans <- as.data.frame(ans)
rownames(ans) <- names(df)
colnames(ans) <- c("N", "StdDev", "StdError", "lowerCI", "mean", "upperCI")
return(ans)
}
if (length(respVars) == 1) {
DF <- plyr::ddply(data, groupVars, processVec, respVars, ...)
names(DF) <- c(groupVars, "N", "StdDev", "StdError", "lowerCI", "mean", "upperCI")
return(DF)
}
if (length(respVars) >= 2) {
L <- plyr::dlply(data, groupVars, processDF, respVars, ...)
return(L)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.