summary.factorlist4 <- function(df, dependent, explanatory, cont="mean", p=FALSE, na.include=FALSE,
column=FALSE, total_col=FALSE, orderbytotal=FALSE, glm.id=FALSE,
na.to.missing = TRUE){
s <- Hmisc:::summary.formula(as.formula(paste(dependent, "~", paste(explanatory, collapse="+"))), data = df,
method="reverse", overall=FALSE,
test=TRUE,na.include=na.include)
# Column vs row proportions for factor variables
df.out = plyr::ldply(s$stats, function(x){
if(dim(x)[2] == 13){ #hack to get continuous vs categorical. Wouldn't work for factor with 13 levels
# Continuous variables
# Mean (SD)
if (cont == "mean"){
a = paste0(round(x[1,12], 1), " (", round(x[1,13], 1), ")")
b = paste0(round(x[2,12], 1), " (", round(x[2,13], 1), ")")
c = paste0(round(x[3,12], 1), " (", round(x[3,13], 1), ")")
d = paste0(round(x[4,12], 1), " (", round(x[4,13], 1), ")")
row1_name = dimnames(x)[[2]][12]
row2_name = dimnames(x)[[2]][13]
}
# Median (IQR)
if (cont == "median"){
a = paste0(x[1,6], " (", x[1,8]-x[1,4], ")")
b = paste0(x[2,6], " (", x[2,8]-x[2,4], ")")
c = paste0(x[3,6], " (", x[3,8]-x[3,4], ")")
d = paste0(x[4,6], " (", x[4,8]-x[4,4], ")")
row1_name = "Median"
row2_name = "IQR"}
col1_name = dimnames(x)[[1]][1]
col2_name = dimnames(x)[[1]][2]
col3_name = dimnames(x)[[1]][3]
col4_name = dimnames(x)[[1]][4]
df.out = data.frame(paste0(row1_name, " (", row2_name, ")"), a, b, c, d)
names(df.out) = c("levels", col1_name, col2_name, col3_name,col4_name)
} else {
# Factor variables
row_name = dimnames(x)$w
col1_name = dimnames(x)$g[1]
col2_name = dimnames(x)$g[2]
col3_name = dimnames(x)$g[3]
col4_name = dimnames(x)$g[4]
col1 = x[,1]
col2 = x[,2]
col3 = x[,3]
col4 = x[,4]
total = col1 + col2 + col3 + col4
if (column == FALSE) {
col1_prop = (col1/apply(x, 1, sum))*100 # row margin
col2_prop = (col2/apply(x, 1, sum))*100
col3_prop = (col3/apply(x, 1, sum))*100
col4_prop = (col4/apply(x, 1, sum))*100
} else {
col1_prop = (col1/sum(col1))*100 # column margin
col2_prop = (col2/sum(col2))*100
col3_prop = (col3/sum(col3))*100
col4_prop = (col4/sum(col4))*100
total_prop = (total/sum(total))*100
}
a = paste0(col1, " (", sprintf("%.1f", round(col1_prop, 1)), ")") #sprintf to keep trailing zeros
b = paste0(col2, " (", sprintf("%.1f", round(col2_prop, 1)), ")")
c = paste0(col3, " (", sprintf("%.1f", round(col3_prop, 1)), ")")
d = paste0(col4, " (", sprintf("%.1f", round(col4_prop, 1)), ")")
if (total_col == FALSE){
df.out = data.frame(row_name, a, b, c, d)
names(df.out) = c("levels", col1_name, col2_name, col3_name, col4_name)
} else if (total_col == TRUE & column == FALSE) {
df.out = data.frame(row_name, a, b, c, d, total, total)
names(df.out) = c("levels", col1_name, col2_name, col3_name, col4_name, "Total", "index_total")
} else if (total_col == TRUE & column == TRUE) {
df.out = data.frame(row_name, a, b, c, d, paste0(total, " (", sprintf("%.1f", round(total_prop, 1)), ")"), total)
names(df.out) = c("levels", col1_name, col2_name, col3_name, col4_name, "Total", "index_total")
}
}
return(df.out)
})
# Keep original order
df.out$index = 1:dim(df.out)[1]
if (p == TRUE){
a = plyr::ldply(s$testresults, function(x) sprintf("%.3f",round(x[[1]], 3)))
names(a) = c(".id", "pvalue")
df.out = merge(df.out, a, by=".id")
}
# Add back in actual labels
df.labels = data.frame(".id" = names(s$stats), "label" = s$labels)
df.out.labels = merge(df.out, df.labels, by = ".id")
if (orderbytotal==FALSE){
df.out.labels = df.out.labels[order(df.out.labels$index),] # reorder columns
} else {
df.out.labels = df.out.labels[order(-df.out.labels$index_total),] # reorder columns
}
# Reorder columns and remove unnecessary columns
# Reorder
label_index = which(names(df.out.labels) == "label")
not_label_index = which(names(df.out.labels) != "label")
df.out.labels = df.out.labels[,c(label_index,not_label_index)]
# Remove
id_index = which(names(df.out.labels) == ".id")
index_index = which(names(df.out.labels) == "index")
index_total_index = which(names(df.out.labels) == "index_total")
df.out.labels = df.out.labels[,-c(id_index, index_index, index_total_index)]
# Remove duplicate labels
df.out.labels = rm_duplicate_labels(df.out.labels, na.to.missing = na.to.missing)
return(df.out.labels)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.