#' Lisa's Summary Table Function
#'
#' This function creates a nice looking summary table.
#' It returns a dataframe and prints an html table by default for use in R markdown documents.
#' @param df Dataset, a dataframe object (REQUIRED).
#' @param covs Character vector of variable names to include in table (REQUIRED).
#' @param type Numeric vector indicating variable type for each covariate: 1 for continuous and 2 for categorical (REQUIRED).
#' @param labels Labels for covariates. Default = NA, variable names will be used.
#' @param by Variable name (character) to stratify by. Defaults to NA, no stratifying variable and no tests.
#' @param bylab Label (character) for stratification variable to use in warning statement. Default is NA.
#' @param warnmissby Indicator (logical) to warn user of missing values in stratification variable. Default = FALSE.
#' @param allcol Indicator (logical) to diplay the "All Data" column. Default = TRUE.
#' @param alllab Character label for "All" column. Default = "All (N = )".
#' @param orderfreq Indicator (logical) to order (non-factor) categorical variables by descending frequency.
#' Ordered factor variables will retain original ordering. Default = FALSE.
#' @param percent Should row or column percents be used: 2 for row and 1 for column percents. Default is row percentages (2).
#' @param perc.dec Number of decimal places for percentages (categorcal variables). Default = 1.
#' @param stats Statistics to display for continuous variables, a character vector of the following options
#' (Default = mean_sd, median_q1q3, minmax): mean, sd, median, iqr, q1, q3, q1q3, min, max, minmax, range, sem,
#' mean_sd, mean_sem, median_iqr, median_range, median_q1q3, median_minmax.
#' @param cont.dec Number of decimal placess for continuous variable summary stats (mean, median, sd, iqr, etc.). Default = 2.
#' @param dispmiss Indicator (logical) to display number of missing values. Default = TRUE.
#' @param dispN Indicator (logical) to display number of non-missing values. Default = FALSE.
#' @param tests Character vector of tests to calculate p-values. If only one is entered it will apply to all variables.
#' If NA (default), no tests will be done.
#' Parametric ("p": T-test, Chi-squared, Anova, etc.),
#' Non-Parametric ("np": Rank-sum, Fisher's exact, Kruskal-Wallis, etc.),
#' T-test ("ttest"), Wilcoxon/Mann-Whitney Rank-sum ("ranksum"),
#' Chi-squared ("chisq"), Fisher's Exact test ("fe"),
#' Anova ("anova"), and Kruskal-Wallis ("kw") are currently supported.
#' @param exact Logical indicator to use exact version of Wilcoxon rank-sum test (using coin package). Default = FALSE.
#' @param pval.dec Number of decimal places for p-values. Default = 3.
#' @param testcol Indicator (logical) to display column with statistical test names. Default = TRUE.
#' @param pvalcol Indicator (logical) to display column with p-values. Default = TRUE.
#' @param mingroup Minimum non-missing group size required to report p-value (0 by default to report all p-values).
#' @param mincell Minimum non-missing cell size required to report p-value (0 by default to report all p-values).
#' @param paired Indicator (logical) to use a test for paired data (only available for ttest and ranksum). Default = FALSE.
#' @param altp Numeric vector for alternative p-values to use (default is NA, use regular p-values).
#' @param fisher.test.workspace Number. Use this to change the default workspace for fisher.test if desired. Default = 200000.
#' @param kable Indicator (logical) to use kable to display table. Default = TRUE.
#' @param htmlTable Indicator (logical) to use htmlTable package to display table instead of kable Default = FALSE.
#' @param use_flextable Indicator (logical) to use flextable package to display table. Default = FALSE.
#' @param htmltitle Character label for htmlTable variable names column. Default = " ".
#' @param caption Character title for htmlTable or kable table. Default = " ".
#' @param color Character Hex color to use for htmlTable striping. Default = "#EEEEEE" (light grey).
#' @param byref Indicator (logical) to include reference "by" category column be included (default = TRUE).
#' @keywords summary table consulting Lisa
#' @importFrom knitr kable
#' @importFrom htmlTable htmlTable
#' @importFrom coin wilcox_test wilcoxsign_test pvalue
#' @importFrom MASS polr
#' @importFrom clinfun jonckheere.test
#' @importFrom multiCA multiCA.test
#' @import flextable
#' @export
nicetable <- function(df
### REQUIRED inputs
,covs
,type
### covariate labels
,labels = NA
### stratification specifications
,by = NA
,bylab = NA
,warnmissby = FALSE
,allcol = TRUE
,alllab = NA
### categorical covariate options
,orderfreq = FALSE
,percent = 2
,perc.dec = 1
### continuous covariate options
,stats = c("mean_sd", "median_q1q3", "minmax")
,cont.dec = 2
### missing data reporting options
,dispmiss = TRUE
,dispN = FALSE
### statistical tests and options
,tests = NA
,exact = FALSE
,pval.dec = 3
,testcol = TRUE
,pvalcol = TRUE
,mingroup = 0
,mincell = 0
,paired = FALSE
,altp = NA
,fisher.test.workspace = 200000
### table formatting and options
,kable = TRUE
,htmlTable = FALSE
,use_flextable = FALSE
,htmltitle = ""
,caption = ""
,color = "#EEEEEE"
,byref = TRUE
){
# check required user inputs ---------------------------------------
try(if (class(df[1]) != "data.frame") stop("df must be a dataframe\n"))
### if only one variable type is entered, repeat for all variables
if (length(type) == 1) {
type <- rep(type, length(covs))
}
## remove any covs that do not appear in dataset
covs2 <- covs[covs %in% names(df)]
if (length(covs2) != length(covs)) cat("Warning! Covariate(s) do not exist:", covs[!(covs %in% names(df))],"\n")
covs <- covs2
type <- type[covs %in% names(df)]
try(if (length(covs) == 0) stop("No valid covs\n"))
## check that all types are valid
if (length(type[!(type %in% c(1,2))]) > 0) cat("Warning! Invalid type for covariate(s): ", covs[!(type %in% c(1,2))], "\n")
## check that by variable appears in dataset
if (!is.na(by)){
by2 <- by[by %in% names(df)]
try(if (length(by2) != 1) stop("Grouping variable: ", by[!(by %in% names(df))]," does not exist in dataset\n"))
by <- by2
}
simpleCap <- function(x) {
s <- strsplit(x, " ")[[1]]
paste(toupper(substring(s, 1,1)), substring(s, 2), sep="", collapse=" ")
}
if (sum(!is.na(altp)) > 0){
if (length(altp) != length(covs)) altp <- NA
if (length(altp) == length(covs)){
testcol <- FALSE
tests <- "p"
}
}
if (is.na(by)){
byref = TRUE
}
blanks <- TRUE
htmlcaption <- caption
if (is.na(bylab) & !is.na(by)) bylab <- by
noby <- 0
noby[is.na(by)] <- 1
if (is.na(by)){
df$Allcol <- "All"
if (!is.na(alllab)) df$Allcol <- alllab
by <- "Allcol"
allcol <- FALSE
}
### if data is paired, do not print a column for combined data
if (paired == TRUE){
allcol <- FALSE
}
pvalf <- paste("%.", pval.dec, "f", sep="")
percf <- paste("%.", perc.dec, "f", sep="")
contf <- paste("%.", cont.dec, "f", sep="")
### define functions for continuous summary stats
sem <- function(x) sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
nice_sem <- function(x) sprintf(contf, round(sem(x), cont.dec))
nice_mean <- function(x) sprintf(contf, round( mean(x, na.rm=TRUE), cont.dec))
nice_sd <- function(x) sprintf(contf, round( sd(x, na.rm=TRUE), cont.dec))
nice_median <- function(x) sprintf(contf, round(median(x, na.rm=TRUE), cont.dec))
nice_iqr <- function(x) sprintf(contf, round( IQR(x, na.rm=TRUE), cont.dec))
nice_min <- function(x) sprintf(contf, round( min(x, na.rm=TRUE), cont.dec))
nice_max <- function(x) sprintf(contf, round( max(x, na.rm=TRUE), cont.dec))
nice_q1 <- function(x) sprintf(contf, round(quantile(x, c(0.25), na.rm=T), cont.dec))
nice_q3 <- function(x) sprintf(contf, round(quantile(x, c(0.75), na.rm=T), cont.dec))
nice_range <- function(x) sprintf(contf, round(diff(range(x, na.rm=TRUE)), cont.dec))
nice_minmax <- function(x) paste(nice_min(x), ", ", nice_max(x), sep="")
nice_q1q3 <- function(x) paste(nice_q1(x), ", ", nice_q3(x), sep="")
nice_mean_sd <- function(x) paste(nice_mean(x), " (", nice_sd(x), ")", sep="")
nice_mean_sem <- function(x) paste(nice_mean(x), " (", nice_sem(x), ")", sep="")
nice_median_iqr <- function(x) paste(nice_median(x), " (", nice_iqr(x), ")", sep="")
nice_median_range <- function(x) paste(nice_median(x), " (", nice_range(x), ")", sep="")
nice_median_q1q3 <- function(x) paste(nice_median(x), " [", nice_q1q3(x) , "]", sep="")
nice_median_minmax <- function(x) paste(nice_median(x), " [", nice_minmax(x),"]", sep="")
nicelab <- data.frame("nicefun" = c("nice_mean",
"nice_sd",
"nice_sem",
"nice_median",
"nice_iqr",
"nice_q1",
"nice_q3",
"nice_q1q3",
"nice_min",
"nice_max",
"nice_minmax",
"nice_range",
"nice_mean_sd",
"nice_median_range",
"nice_median_iqr",
"nice_median_q1q3",
"nice_median_minmax"),
"label" = c("* Mean",
"* SD",
"* SEM",
"* Median",
"* IQR",
"* Q1",
"* Q3",
"* Q1, Q3",
"* Min",
"* Max",
"* Min, Max",
"* Range",
"* Mean (SD)",
"* Median (Range)",
"* Median (IQR)",
"* Median [Q1, Q3]",
"* Median [Min, Max]"),
stringsAsFactors = FALSE)
rownames(nicelab) <- nicelab$nicefun
nicelab$fun <- unlist(lapply(nicelab[,"nicefun"], function(x) strsplit(x, "nice_")[[1]][2]))
### Use same stats for all continuous variables
### Trouble-shoot user error by removing bad values
### For categorical variables, stats = "NA"
if (stats[1] == "mean_sd_median_range") stats <- c("mean_sd", "median_minmax") ## for backwards compatability
stats <- tolower(stats)
stats <- unique(stats)
stats <- stats[stats %in% nicelab$fun]
stats <- rep(list(stats), length(covs))
stats[which(type == 2)] <- "NA"
### Trouble-shoot user entered tests
tests <- tolower(tests)
if (length(tests) == 1){
tests <- rep(tests, length(covs))
}
if (length(percent) == 1){
percent <- rep(percent, length(covs))
}
if (!is.na(tests[1]) & tests[1] == "p"){
tests[type == 1] <- "ttest"
tests[type == 2] <- "chisq"
}
if (!is.na(tests[1]) & tests[1] == "np"){
tests[type == 1] <- "ranksum"
tests[type == 2] <- "fe"
}
testlabs <- tests
### if labels are NA, use variable names
if (length(labels) == 1){
labels <- rep(labels, length(covs))
}
labels[is.na(labels)] <- covs[is.na(labels)]
### if tests is blank do not calculate a p-value
if (length(tests) == 1){
tests <- rep(tests, length(covs))
}
pval <- rep(TRUE, length(covs))
pval[is.na(tests)] <- FALSE
### treat the by variables as a factor if not already
### subset dataframe for non missing 'by' values
df[,by] <- as.factor(df[,by])
total_levels <- 0
if (sum(is.na(df[,by])) > 0){
if (warnmissby == TRUE){
if (sum(is.na(df[,by])) ==1){
cat("* Note there was 1 entry missing", bylab, "which was excluded from the table. \n\n")
}
if (sum(is.na(df[,by])) > 1){
cat("* Note there were", sum(is.na(df[,by])), "entries missing", bylab, "which were excluded from the table. \n\n")
}
}
}
df <- df[!is.na(df[,by]),]
## save the number of rows per variable to use for table striping
rgroup <- NULL
## placeholder for final table
sum_table <- NULL
for (k in 1:length(covs)){
p <- NA
if (type[k] == 2){
### treat categorical covariates as factors
### if orderfreq = TRUE then reorder all unordered factor levels by descending frequency
### if covariate is already a factor keep the original ordering
if (is.factor(df[,covs[k]]) == FALSE){
df[,covs[k]] <- factor(df[,covs[k]])
if (orderfreq == TRUE) {
tb <- table(df[,covs[k]])
df[,covs[k]] <- factor(df[,covs[k]],
levels = names(tb[order(tb, decreasing = TRUE)]))
}
}
### create a mini table for this variable alone
### the number of columns is the number of groups
### + 1 names column + 1 pvalue column + 1 test name column
### the number of rows is the number of levels + 1 for varname/label
### if there are missing values add another row for missing totals
tmp <- matrix(data = NA,
ncol = nlevels(df[,by]) + 4,
nrow = 1 + nlevels(df[,covs[k]]))
### add first column variable label and names of levels
tmp[1,1] <- labels[k]
tmp[2:(1+nlevels(df[,covs[k]])),1] <- paste("* ",levels(df[,covs[k]]), "\n", sep="")
### create temporary dataset with NA's removed
df.complete <- df[!is.na(df[,covs[k]]),]
### if group size (NA's removed) is less than min, do not report p-values
mingroup_tab <- as.matrix(table(df.complete[,by]))
if (sum(mingroup_tab < mingroup, na.rm=T) > 0) {
pval[k] <- TRUE
tests[k] <- "--"
testlabs[k] <- "--"
}
### if cell size (NA's removed) is less than min, do not report p-values
mincell_tab <- as.matrix(table(df.complete[,covs[k]], df.complete[,by]))
if (sum(mincell_tab < mincell, na.rm=T) > 0) {
pval[k] <- TRUE
tests[k] <- "--"
testlabs[k] <- "--"
}
freq.all <- table(df.complete[,covs[k]])
perc.all <- 100*(table(df.complete[,covs[k]])/nrow(df.complete))
tmp[2:(1+nlevels(df[,covs[k]])),2] <-
paste(freq.all, " (", sprintf(percf, round(perc.all,perc.dec)), "%)", sep="")
## calculate column percents
if (percent[k] == 2){
freq <- as.matrix(table(df.complete[,covs[k]], df.complete[,by]))
coltots <- table(df.complete[,by])
nmat <- NULL
for (i in 1:nlevels(df.complete[,covs[k]])){
nmat <- rbind(nmat, coltots)
}
nvals <- as.matrix(nmat)
perc <- (freq/nvals)*100
}
## calculate row percents
if (percent[k] == 1){
freq <- as.matrix(table(df.complete[,covs[k]], df.complete[,by]))
rowtots <- table(df.complete[,covs[k]])
nmat <- NULL
for (i in 1:nlevels(df.complete[,by])){
nmat <- cbind(nmat, rowtots)
}
nvals <- as.matrix(nmat)
perc <- (freq/nvals)*100
}
perc2 <- perc
perc2[!is.nan(perc2)] <- sprintf(percf, round(perc2[!is.nan(perc2)],perc.dec))
perc2[perc2 == "NaN"] <- "--"
tmp[2:(1+nlevels(df.complete[,covs[k]])), 3:(2+nlevels(df.complete[,by]))] <-
paste(freq, " (", perc2, "%)", sep="")
if (dispN) {
tmp <- rbind(tmp, rep(NA, ncol(tmp)))
tmp[nrow(tmp), 1] <- "* N (non-missing)"
### calculate N for "All column"
tmp[nrow(tmp), 2] <- sum(!is.na(df[,covs[k]]))
### calculate N by subgroup
for (l in 1:nlevels(df[,by])) {
tmp[nrow(tmp), (2 + l)] <- sum(!is.na(df[df[,by] == levels(df[,by])[l] ,covs[k]]))
}
}
if (dispmiss) {
tmp <- rbind(tmp, rep(NA, ncol(tmp)))
tmp[nrow(tmp), 1] <- "* Freq Missing"
### calculate missing for "All column"
tmp[nrow(tmp), 2] <- sum(is.na(df[,covs[k]]))
### calculate missing by subgroup
for (l in 1:nlevels(df[,by])) {
tmp[nrow(tmp), (2 + l)] <- sum(is.na(df[df[,by] == levels(df[,by])[l] ,covs[k]]))
}
}
if (pval[k] == TRUE){
freq <- table(df.complete[,covs[k]], df.complete[,by])
if (tests[k] == "fe" & (nrow(freq) > 1)) {
try_fe <- try(fisher.test(freq, workspace = fisher.test.workspace), silent = TRUE)
if (length(try_fe) > 1) {
p <- fisher.test(freq, workspace = fisher.test.workspace)$p.value
testlabs[k] <- "Fisher's exact"
}
### if fisher's exact test will not run try chisq
if (length(try_fe) == 1) tests[k] <- "chisq"
}
if (tests[k] == "chisq" & (nrow(freq) > 1)) {
try_chisq <- try(chisq.test(freq))
if (length(try_chisq) > 1){
p <- chisq.test(freq)$p.value
testlabs[k] <- "Chi-squared"
}
if (length(try_chisq) == 1){
p <- NA
testlabs[k] <- "NA"
}
}
if (tests[k] == "olr"){
form <- as.formula(paste(covs[k], "~", by))
form.null <- as.formula(paste(covs[k], "~ 1"))
t <- polr(form, data = df.complete)
null <- polr(form.null, data = df.complete)
save <- lrtest(t, null)
p <- save[["Pr(>Chisq)"]][[2]]
}
if (tests[k] %in% c("ranksum","kw")){
if (noby==0 & nlevels(df[,by]) == 2){
form <- as.formula(paste("as.numeric(", covs[k], ") ~", by, sep=""))
try_wilcox <- try(wilcox.test(form, data = df))
if (length(try_wilcox) > 1){
if (is.finite(wilcox.test(form, data= df)$p.value)){
p <- wilcox.test(form, data= df)$p.value
testlabs[k] <- "Wilcoxon rank-sum"
if (exact) {
p <- pvalue(wilcox_test(form, data = df, distribution = "exact"))
testlabs[k] <- "Wilcoxon rank-sum (Ex)"
}
}
if (!is.finite(wilcox.test(form, data= df)$p.value)){
p <- NA
testlabs[k] <- NA
}
}
if (length(try_wilcox) == 1){
p <- NA
testlabs[k] <- NA
}
}
if (noby==0 & nlevels(df[,by]) > 2){
form <- as.formula(paste("as.numeric(", covs[k], ") ~", by, sep=""))
try_kruskal <- try(kruskal.test(form, data = df))
if (length(try_kruskal) > 1){
if (is.finite(kruskal.test(form, data=df)$p.value)){
p <- kruskal.test(form, data=df)$p.value
testlabs[k] <- "Kruskal-Wallis"
}
if (!is.finite(kruskal.test(form, data= df)$p.value)){
p <- NA
testlabs[k] <- NA
}
}
if (length(try_kruskal) == 1){
p <- NA
testlabs[k] <- NA
}
}
if (noby==1){
try_wilcox <- try(wilcox.test(as.numeric(df[,covs[k]]),
mu = (nlevels(df[,covs[k]])+1)/2))
if (length(try_wilcox) > 1 &
is.finite(wilcox.test(as.numeric(df[,covs[k]]),
mu = (nlevels(df[,covs[k]])+1)/2))){
p <- wilcox.test(as.numeric(df[,covs[k]]),
mu = (nlevels(df[,covs[k]])+1)/2)$p.value
testlabs[k] <- "Wilcoxon signed-rank"
# print(p)
}
if (length(try_wilcox) == 1 |
!is.finite(wilcox.test(as.numeric(df[,covs[k]]),
mu = (nlevels(df[,covs[k]])+1)/2)$p.value)){
p <- NA
testlabs[k] <- NA
}
}
}
if (tests[k] == "jt"){
## create numeric versions of covariate by by variable
numcovk <- as.numeric(df[,covs[k]])
numby <- as.numeric(df[,by])
# form <- as.formula(paste("as.numeric(", covs[k], ") ~", by, sep=""))
# try_jt <- try(jt.test(form, data = df))
try_jt <- try(jonckheere.test(x = numcovk, g = numby))
p <- NA
testlabs[k] <- NA
if ((length(try_jt) > 1)) {
jtres <- jonckheere.test(x = numcovk, g = numby)
if (is.finite(jtres$p.value)) {
p <- jtres$p.value
testlabs[k] <- "Jonckheere-Terpstra trend"
}
}
# kSamples package function jt.test
# if (length(try_jt) > 1 & is.finite(jt.test(form, data= df)[[6]][4])){
# p <- jt.test(form, data= df)[[6]][4]
# testlabs[k] <- "Jonckheere-Terpstra trend"
# }
#
# if (length(try_jt) == 1 | !is.finite(jt.test(form, data= df)[[6]][4])){
# p <- NA
# testlabs[k] <- NA
# }
}
if (tests[k] == "multica"){
form <- as.formula(paste(covs[k], "~", by, sep=""))
try_mca <- try(multiCA.test(form, data = df))
if (length(try_mca) > 1 &
is.finite(as.numeric(multiCA.test(form, data= df)[[1]][3]))){
p <- as.numeric(multiCA.test(form, data= df)[[1]][3])
testlabs[k] <- "Cochran-Armitage trend"
}
if (length(try_mca) == 1 |
!is.finite(as.numeric(multiCA.test(form, data= df)[[1]][3]))){
p <- NA
testlabs[k] <- NA
}
}
### if there is only one level of the covariate do not do any tests
if (nlevels(df[,covs[k]]) < 2){
p <- NA
testlabs[k] <- NA
}
if (sum(!is.na(altp)) > 0) p <- altp[k]
tmp[1,(3+nlevels(df[,by]))] <- sprintf(pvalf, round(p, pval.dec))
if (is.na(p)) {
tmp[1,(3+nlevels(df[,by]))] <- "--"
p <- 99
}
if (tests[k] == "--") {
tmp[1,(3+nlevels(df[,by]))] <- "--"
testlabs[k] <- "--"
p <- 99
}
if (htmlTable){
if (pval.dec == 4 & p < 0.0001) tmp[1,(3+nlevels(df[,by]))] <- "< 0.0001"
if (pval.dec == 3 & p < 0.001 ) tmp[1,(3+nlevels(df[,by]))] <- "< 0.001"
if (pval.dec == 2 & p < 0.01 ) tmp[1,(3+nlevels(df[,by]))] <- "< 0.01"
if (pval.dec == 4 & round(p,4) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.9999"
if (pval.dec == 3 & round(p,3) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.999"
if (pval.dec == 2 & round(p,2) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.99"
}
if (!htmlTable){
if (pval.dec == 4 & p < 0.0001) tmp[1,(3+nlevels(df[,by]))] <- "< 0.0001"
if (pval.dec == 3 & p < 0.001 ) tmp[1,(3+nlevels(df[,by]))] <- "< 0.001"
if (pval.dec == 2 & p < 0.01 ) tmp[1,(3+nlevels(df[,by]))] <- "< 0.01"
if (pval.dec == 4 & round(p,4) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.9999"
if (pval.dec == 3 & round(p,3) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.999"
if (pval.dec == 2 & round(p,2) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.99"
}
tmp[1,(4+nlevels(df[,by]))] <- testlabs[k]
}
if (k %% 2 == 0) rgroup <- c(rgroup, rep("none", nrow(tmp)))
if (k %% 2 != 0) rgroup <- c(rgroup, rep(color, nrow(tmp)))
if (blanks == TRUE){
blank <- rep(NA, ncol(tmp))
sum_table <- rbind(sum_table, blank, tmp)
}
}
if (type[k] == 1){
### create temporary dataset with NA's removed
df.complete <- df[!is.na(df[,covs[k]]),]
### if group size (NA's removed) is less than min, do not report p-values
mingroup_tab <- as.matrix(table(df.complete[,by]))
if (sum(mingroup_tab < mingroup, na.rm=T) > 0) {
pval[k] <- TRUE
tests[k] <- "--"
testlabs[k] <- "--"
}
### create a mini table for this variable alone
### the number of columns is the number of groups + 4
### (1 all column + 1 names column + 1 pvalue column + 1 test name column)
### the number of rows is the number of stats requested
tmp <- matrix(data = NA,
ncol = nlevels(df[,by]) + 4,
nrow = 1 + (length(stats[[k]])))
tmp[1,1] <- paste(labels[k], sep=" ")
for (s in 1:length(stats[[k]])){
### get corresponding function and label
nicefun <- paste("nice", stats[[k]][s], sep="_")
tmp[(1 + s), 1] <- nicelab[nicefun, "label"]
### calculate stats for "All column"
tmp[(1 + s), 2] <- do.call(nicefun, args = list("x" = df[,covs[k]]))
tmp[(1 + s), 2] <- gsub("NA|Inf|-Inf|NaN", "--", tmp[(1 + s), 2])
### calculate stats for subgroups
for (l in 1:nlevels(df[,by])){
tmp[(1 + s), (2 + l)] <- do.call(nicefun, args = list("x" = df[df[,by] == levels(df[,by])[l] ,covs[k]]))
tmp[(1 + s), (2 + l)] <- gsub("NA|Inf|-Inf|NaN", "--", tmp[(1 + s), (2 + l)])
}
}
if (dispN) {
tmp <- rbind(tmp, rep(NA, ncol(tmp)))
tmp[nrow(tmp), 1] <- "* N (non-missing)"
### calculate missing for "All column"
tmp[nrow(tmp), 2] <- sum(!is.na(df[,covs[k]]))
### calculate missing by subgroups
for (l in 1:nlevels(df[,by])) {
tmp[nrow(tmp), (2 + l)] <- sum(!is.na(df[df[,by] == levels(df[,by])[l] ,covs[k]]))
}
}
if (dispmiss) {
tmp <- rbind(tmp, rep(NA, ncol(tmp)))
tmp[nrow(tmp), 1] <- "* Freq Missing"
### calculate missing for "All column"
tmp[nrow(tmp), 2] <- sum(is.na(df[,covs[k]]))
### calculate missing by subgroups
for (l in 1:nlevels(df[,by])) {
tmp[nrow(tmp), (2 + l)] <- sum(is.na(df[df[,by] == levels(df[,by])[l] ,covs[k]]))
}
}
if (pval[k] == TRUE){
if (noby == 1){
if (tests[k] == "ranksum"){
wilcox <- try(wilcox.test(as.numeric(df[,covs[k]])), silent = F)
tryok <- ifelse(class(wilcox) %in% "try-error", FALSE, TRUE)
if (tryok){
if (is.finite(wilcox$p.value)){
p <- wilcox$p.value
testlabs[k] <- "Wilcoxon signed-rank"
}
if (!is.finite(wilcox$p.value)){
p <- NA
testlabs[k] <- NA
}
}
if (!tryok){
p <- NA
testlabs[k] <- NA
}
}
if (tests[k] == "ttest"){
ttest <- try(t.test(as.numeric(df[,covs[k]])), silent = F)
tryok <- ifelse(class(ttest) %in% "try-error", FALSE, TRUE)
if (tryok){
if (is.finite(ttest$p.value)){
p <- ttest$p.value
testlabs[k] <- "T-test"
}
if (!is.finite(ttest$p.value)){
p <- NA
testlabs[k] <- NA
}
}
if (!tryok){
p <- NA
testlabs[k] <- NA
}
}
}
form <- as.formula(paste(covs[k], "~", by))
if (noby == 0 & nlevels(df[,by]) == 2){
lev1 <- levels(df[,by])[1]
lev2 <- levels(df[,by])[2]
v1 <- df[df[,by] == lev1, covs[k]]
v2 <- df[df[,by] == lev2, covs[k]]
if (tests[k] == "ttest" | tests[k] == "anova"){
try_ttest <- try(t.test(form, data = df))
if (length(try_ttest) > 1){
p <- t.test(form, data= df)$p.value
testlabs[k] <- "T-test"
if (paired == TRUE){
p <- t.test(v1,v2, paired = TRUE)$p.value
testlabs[k] <- "Paired t-test"
}
}
if (length(try_ttest) == 1){
p <- NA
testlabs[k] <- NA
}
}
if (tests[k] == "ranksum" | tests[k] == "kw"){
try_wilcox <- try(wilcox.test(form, data = df))
if (length(try_wilcox) > 1 & is.finite(wilcox.test(form, data=df)$p.value)){
p <- wilcox.test(form, data=df)$p.value
testlabs[k] <- "Wilcoxon rank-sum"
if (exact) {
p <- pvalue(wilcox_test(form, data=df, distribution="exact"))
testlabs[k] <- "Wilcoxon rank-sum (Ex)"
}
if (paired == TRUE){
p <- wilcox.test(v1, v2, paired = TRUE)$p.value
testlabs[k] <- "Wilcoxon signed-rank"
if (exact){
p <- pvalue(wilcoxsign_test(v1 ~ v2, distribution="exact"))
testlabs[k] <- "Wilcoxon signed-rank (Ex)"
}
}
}
if (length(try_wilcox) == 1 | !is.finite(wilcox.test(form, data=df)$p.value)){
p <- NA
testlabs[k] <- NA
}
}
}
if (noby == 0 & nlevels(df[,by]) > 2){
if (tests[k] == "ttest" | tests[k] == "anova"){
try_aov <- try(aov(form, data= df))
if (length(try_aov) > 1){
anovamod <- aov(form, data= df)
p <- summary(anovamod)[[1]][[1,"Pr(>F)"]]
testlabs[k] <- "Anova"
}
if (length(try_aov) == 1){
p <- NA
testlabs[k] <- NA
}
}
if (tests[k] == "ranksum" | tests[k] == "kw") {
try_kw <- try(kruskal.test(form, data=df))
if (length(try_kw) > 1 & is.finite(kruskal.test(form, data=df)$p.value)){
p <- kruskal.test(form, data=df)$p.value
testlabs[k] <- "Kruskal-Wallis"
}
if (length(try_kw) == 1 | !is.finite(kruskal.test(form, data=df)$p.value)){
p <- NA
testlabs[k] <- NA
}
}
if (tests[k] == "jt"){
form <- as.formula(paste(covs[k], " ~ ", by, sep=""))
try_jt <- try(jt.test(form, data = df))
if (length(try_jt) > 1 & is.finite(jt.test(form, data= df)[[6]][4])){
p <- jt.test(form, data= df)[[6]][4]
testlabs[k] <- "Jonckheere-Terpstra trend"
}
if (length(try_jt) == 1 | !is.finite(jt.test(form, data= df)[[6]][4])){
p <- NA
testlabs[k] <- NA
}
}
}
if (sum(!is.na(altp)) > 0) p <- altp[k]
tmp[1,(3+nlevels(df[,by]))] <- sprintf(pvalf, round(p, pval.dec))
if (is.na(p)) {
tmp[1,(3+nlevels(df[,by]))] <- "--"
p <- 99
}
if (tests[k] == "--") {
tmp[1,(3+nlevels(df[,by]))] <- "--"
testlabs[k] <- "--"
p <- 99
}
if (htmlTable){
if (pval.dec == 4 & p < 0.0001) tmp[1,(3+nlevels(df[,by]))] <- "< 0.0001"
if (pval.dec == 3 & p < 0.001 ) tmp[1,(3+nlevels(df[,by]))] <- "< 0.001"
if (pval.dec == 2 & p < 0.01 ) tmp[1,(3+nlevels(df[,by]))] <- "< 0.01"
if (pval.dec == 4 & round(p,4) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.9999"
if (pval.dec == 3 & round(p,3) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.999"
if (pval.dec == 2 & round(p,2) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.99"
}
if (!htmlTable){
if (pval.dec == 4 & p < 0.0001) tmp[1,(3+nlevels(df[,by]))] <- "< 0.0001"
if (pval.dec == 3 & p < 0.001 ) tmp[1,(3+nlevels(df[,by]))] <- "< 0.001"
if (pval.dec == 2 & p < 0.01 ) tmp[1,(3+nlevels(df[,by]))] <- "< 0.01"
if (pval.dec == 4 & round(p,4) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.9999"
if (pval.dec == 3 & round(p,3) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.999"
if (pval.dec == 2 & round(p,2) == 1) tmp[1,(3+nlevels(df[,by]))] <- "> 0.99"
}
if (!is.finite(p)) tmp[1,(3+nlevels(df[,by]))] <- "--"
tmp[1,(4+nlevels(df[,by]))] <- testlabs[k]
}
if (k %% 2 == 0) rgroup <- c(rgroup, rep("none", nrow(tmp)))
if (k %% 2 != 0) rgroup <- c(rgroup, rep(color, nrow(tmp)))
if (blanks == TRUE){
blank <- rep(NA, ncol(tmp))
sum_table <- rbind(sum_table, blank, tmp)
}
}
}
if (!is.na(alllab)) all <- alllab
if ( is.na(alllab)) all <- "All"
final_table <- data.frame(sum_table, row.names = make.names(sum_table[,1], unique = T))
## header text
head_txt <- c("",
all,
paste(unlist(lapply(levels(df[,by]), simpleCap))),
"",
"")
## header sample sizes
head_nms <- c("Variable",
paste(" (N = ", nrow(df), ")", sep=""),
paste(" (N = ", table(df[,by]), ")", sep=""),
"p-value",
"Test")
# print(final_table)
# grab only user requested columns
reqcols_n <- 1:ncol(final_table)
pvalcol_n <- which(head_nms == "p-value")
testcol_n <- which(head_nms == "Test")
allcol_n <- min(which(head_txt == all))
varcol_n <- which(head_nms == "Variable")
othcol_n <- reqcols_n[!(reqcols_n %in% c(pvalcol_n, testcol_n, allcol_n, varcol_n))]
if (!pvalcol) reqcols_n <- reqcols_n[!(reqcols_n %in% c(pvalcol_n))]
if (sum(!is.na(tests)) == 0) reqcols_n <- reqcols_n[!(reqcols_n %in% c(pvalcol_n, testcol_n))]
if (!testcol) reqcols_n <- reqcols_n[!(reqcols_n %in% c(testcol_n))]
## stratification but no 'all' column
if (!allcol & noby == 0) reqcols_n <- reqcols_n[!(reqcols_n %in% allcol_n)]
## stratification but no 'reference' column
if (noby == 0 & !byref) reqcols_n <- reqcols_n[!(reqcols_n %in% min(othcol_n))]
## no by-group stratification:
if (noby == 1) reqcols_n <- reqcols_n[reqcols_n %in% c(varcol_n, allcol_n, pvalcol_n, testcol_n)]
head_txt <- head_txt[reqcols_n]
head_nms <- head_nms[reqcols_n]
final_table <- final_table[,reqcols_n, drop=FALSE]
if (htmlTable & kable) htmlTable <- FALSE
if (htmlTable){
final_html <- final_table
names(final_html) <- paste(head_txt, head_nms, sep="")
### stop htmlTable from treating everything as a factor
for (i in 1:ncol(final_html)){
final_html[,i] <- as.character(final_html[,i])
}
### remove blanks
final_html <- final_html[!is.na(final_html[,1]),]
### get header rows
head <- which(is.na(final_html[,2]))
### get non-header rows
nohead <- which(!is.na(final_html[,2]))
### indent non-header rows and remove *
final_html[nohead,"Variable"] <- paste(" ",
substring(final_html[nohead,"Variable"], 3))
### bold header rows
final_html[head,"Variable"] <- paste("<b>",
final_html[head,"Variable"],
"<b/>", sep="")
if (sum(head_nms %in% c("p-value", "Test")) == 0){
cgroup <- c(all, unlist(lapply(levels(df[,by]), simpleCap)))
if (allcol == FALSE) cgroup <- cgroup[2:length(cgroup)]
n.cgroup <- rep(1, length(cgroup))
}
if (sum(head_nms %in% c("p-value", "Test") > 0)){
cgroup <- c(all, unlist(lapply(levels(df[,by]), simpleCap)), "")
if (allcol == FALSE) cgroup <- cgroup[2:length(cgroup)]
n.cgroup <- c(rep(1, length(cgroup)-1), sum(head_nms %in% c("p-value", "Test")))
}
### create htmlTable
htmlver <- htmlTable(x = final_html[,2:ncol(final_html)],
header = head_nms[2:length(head_nms)],
rnames = final_html[,"Variable"],
rowlabel = htmltitle,
caption = htmlcaption,
escape.html = F,
cgroup = cgroup,
n.cgroup = n.cgroup,
css.cell='border-collapse: collapse; padding: 4px;',
col.rgroup=rgroup)
print(htmlver)
return(final_html)
}
if (kable){
final_kable <- final_table
names(final_kable) <- paste(head_txt, head_nms, sep="")
for (i in 1:ncol(final_kable)){
final_kable[,i] <- as.character(final_kable[,i])
}
final_kable <- final_kable[!is.na(final_kable[,1]),]
### get header rows
head <- which(is.na(final_kable[,2]))
### get non-header rows
nohead <- which(!is.na(final_kable[,2]))
### indent non-header rows and remove *
final_kable[nohead,"Variable"] <- paste(" ",
substring(final_kable[nohead,"Variable"], 3))
final_kable[nohead,"Variable"] <- gsub("\n", "", final_kable[nohead,"Variable"],fixed = T)
### bold header rows
final_kable[head,"Variable"] <- paste("<b>",
final_kable[head,"Variable"],
"<b/>", sep="")
tabalign <- rep("c", ncol(final_kable))
tabalign[1] <- "l"
if ("p-value" %in% names(final_kable)) tabalign[which(names(final_kable) == "p-value")] <- "r"
names(final_kable) <- gsub(")\\.1$", ")", names(final_kable))
tabalign <- paste(tabalign, collapse="")
print(kable(x = final_kable
,row.names = FALSE
,caption = htmlcaption
,align = tabalign
,format = "markdown"
)
)
return(final_kable)
}
fillnas <- function(S) {
L <- !is.na(S)
c(S[L][1], S[L])[cumsum(L)+1]
}
if (use_flextable){
final_flex <- final_table
for (i in 1:ncol(final_flex)){
final_flex[,i] <- as.character(final_flex[,i])
}
final_flex <- final_flex[!is.na(final_flex[,1]),]
### reformat grouped data for flextable
final_flex$flexgroup <- NA
final_flex$flexgroup[is.na(final_flex[,2])] <- final_flex[is.na(final_flex[,2]),1]
final_flex$flexgroup <- fillnas(final_flex$flexgroup)
if ("Test" %in% head_nms){
flex_testcol <- which(head_nms == "Test")
final_flex[,flex_testcol] <- fillnas(final_flex[,flex_testcol])
}
if ("p-value" %in% head_nms){
flex_pvalcol <- which(head_nms == "p-value")
final_flex[,flex_pvalcol] <- fillnas(final_flex[,flex_pvalcol])
}
final_flex <- final_flex[!is.na(final_flex[,2]),]
if ("p-value" %in% head_nms) final_flex[duplicated(final_flex$flexgroup, final_flex[,flex_pvalcol]), flex_pvalcol] <- NA
if ("Test" %in% head_nms) final_flex[duplicated(final_flex$flexgroup, final_flex[,flex_testcol]), flex_testcol] <- NA
### indent non-header rows and remove *
final_flex[!is.na(final_flex[,2]),1] <- substring(final_flex[!is.na(final_flex[,2]),1], 3)
final_flex[!is.na(final_flex[,2]),1] <- gsub("\n", "", final_flex[!is.na(final_flex[,2]),1], fixed = T)
final_flex_gr <- as_grouped_data(final_flex, groups = "flexgroup")
head_nms_flex <- c("flexgroup", head_nms)
head_nms_flex[1:2] <- c("", "")
names(head_nms_flex) <- names(final_flex_gr)
head_txt_flex <- c("flexgroup", head_txt)
head_txt_flex[1:2] <- c("", "")
names(head_txt_flex) <- names(final_flex_gr)
if ("Test" %in% head_nms){
flex_testcol <- which(head_nms_flex == "Test")
final_flex_gr[,flex_testcol] <- c(final_flex_gr[,flex_testcol][-1], NA)
}
if ("p-value" %in% head_nms){
flex_pvalcol <- which(head_nms_flex == "p-value")
final_flex_gr[,flex_pvalcol] <- c(final_flex_gr[,flex_pvalcol][-1], NA)
}
ft <- flextable(final_flex_gr)
## format header
ft <- delete_part(ft, part = "header")
ft <- add_header(x = ft
,values = head_nms_flex
,top = TRUE
)
ft <- add_header(x = ft
,values = head_txt_flex
,top = TRUE
)
## text fonts and alignment
ft <- theme_booktabs(ft)
ft <- bold(ft, part = "header")
ft <- bold(ft, j = 1, i = ~ !is.na(flexgroup), bold = TRUE, part = "body" )
ft <- align(ft, align = "center", part = "all")
ft <- align(ft, align = "left", j = c(1,2))
if ("p-value" %in% head_nms) ft <- align(ft, align = "right", j = flex_pvalcol)
if ("Test" %in% head_nms) ft <- align(ft, align = "right", j = flex_testcol)
ft <- merge_h_range(ft, i = ~ !is.na(flexgroup), j1 = 1, j2 = 2)
ft <- merge_h_range(ft, part = "header", j1 = 1, j2 = 2)
knit_print(autofit(ft))
return(list("dat" = final_flex_gr
,"ft" = autofit(ft)
,"head_nms" = head_nms_flex
,"head_txt" = head_txt_flex
)
)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.