#' @title Number summary statistics by groups
#' @description
#' \code{isum.by} generates seven number summary statistics and tests normality
#' on the fly grouped by a categorical variable.
#' @param x a numeric object
#' @param by a factor or character object
#' @param data a data frame object (Optional)
#' @param rnd specify rounding of numbers. See \code{\link{round}}.
#' @param na.rm A logical value to specify missing values, <NA> in the table
#' @details
#' Normality test is perfomed by Shapiro-Wilk Normality Test. See more at
#' \code{\link{shapiro.test}}.
#'
#' If the second variable has two levels, it performs either Student's t-test
#' \code{\link{t.test}} or Wilcoxon test (Mann-Whitney's test)
#' \code{\link{wilcox.test}}. If more than two levels, ANOVA
#' \code{\link{aov}} or Kruskal-Wallis rank sum test \code{\link{kruskal.test}}
#' is carried out to test the difference between different groups.
#' @import stats
#' @seealso \code{\link{isum}}, \code{\link{tab}}, \code{\link{xtab}}
#' @keywords number summary
#' @author Myo Minn Oo (Email: \email{dr.myominnoo@@gmail.com} |
#' Website: \url{https://myominnoo.github.io/})
#' @examples
#' str(infert)
#' isum.by(infert$age, infert$education)
#' isum.by(age, education, data = infert)
#' isum.by(age, spontaneous, data = infert)
#' isum.by(age, case, data = infert)
#' @export
isum.by <- function(x, by, data = NULL, rnd = 1, na.rm = FALSE)
{
if (!is.null(data)) {
arguments <- as.list(match.call())
x <- eval(substitute(x), data)
by <- eval(substitute(by), data)
x.name <- arguments$x
by.name <- arguments$by
} else {
x.name <- deparse(substitute(x))
by.name <- deparse(substitute(by))
}
if (na.rm) {
l <- as.character(unique(na.omit(by)))
x <- x[!is.na(x)]
} else
l <- as.character(unique(by))
l <- ifelse(is.na(l), "<NA>", l)
i <- NA
f <- do.call(
rbind,
lapply(l, function(z) {
if (z == "<NA>") {i = x[which(is.na(by))]} else {i = x[which(by == z)]}
len = length(i)
na = length(i[is.na(i)])
mu <- mean(i, na.rm = TRUE)
std <- sd(i, na.rm = TRUE)
q <- quantile(i, probs = c(0, .25, .5, .75, 1), na.rm = TRUE)
v <- round(c(mu, std, q), rnd)
pvalue <- tryCatch({
suppressWarnings(shapiro.test(i)$p.value)
}, error = function(err) {
return(NA)
})
pvalue <- ifelse(pvalue < 0.00001, "< 0.00001", round(pvalue, 5))
f <- data.frame(Obs. = len, NA. = na, Mean = v[1], Std.Dev = v[2],
Median = v[5], Q1 = v[4], Q3 = v[6],
Min = v[3], Max = v[7], Normality = pvalue,
stringsAsFactors = FALSE)
row.names(f) <- z
f
})
)
pvalue <- tryCatch({
suppressWarnings(shapiro.test(i)$p.value)
}, error = function(err) {
return(NA)
})
pvalue <- ifelse(pvalue < 0.00001, "< 0.00001", round(pvalue, 5))
f <- rbind(
f,
"-" = rep("-", 10),
combined = c(length(x), length(x[is.na(x)]),
round(mean(x, na.rm = TRUE), rnd),
round(sd(x, na.rm = TRUE), rnd),
round(quantile(x, probs = c(0, .25, .5, .75, 1), na.rm = TRUE), rnd),
pvalue)
)
if (length(l) > 2) {
pvalue <- tryCatch({
suppressWarnings(summary(aov(x ~ by))[[1]][1,5])
}, error = function(err) {
return(NA)})
pvalue.name <- 'ANOVA'
pvalue <- c(
pvalue,
tryCatch({
suppressWarnings(kruskal.test(x ~ by)$p.value)
}, error = function(err) {
return(NA)})
)
pvalue.name <- c(pvalue.name, 'Kruskal-Wallis')
} else {
pvalue <- tryCatch({
suppressWarnings(t.test(x ~ by)$p.value)
}, error = function(err) {
return(NA)
})
pvalue.name <- 't-test'
pvalue <- c(
pvalue,
tryCatch({
suppressWarnings(wilcox.test(x ~ by)$p.value)
}, error = function(err) {
return(NA)})
)
pvalue.name <- c(pvalue.name, 'Wilcoxon')
}
cat("\nNumber summary: ", x.name, " x ", by.name, "\np-value (",
pvalue.name[1], ") : ", pvalue[1], "\np-value (",
pvalue.name[2], ") : ", pvalue[2], "\n\n", sep = "")
return(f)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.