#' Descriptive Statistics Table Results
#'
#' Generate Descriptive Statistics Table
#' @param data Input data (data.frame)
#' @param varlist List of variable names
#' @param group Name of group variable (factor)
#' @return Table Results of Descriptive Statistics with P-values
#' @details Frequency numbers with percentage would be calculated for categorical variable.
#' @details Mean and Standard Deviation would be calculated for continuous variables.
#' @details P-values calculated from Chi-squared test and T-test for categorical and continuous variables respectively.
#' @details P-values calculated excluding missing values.
#' @examples
#' @export
#' sumtab function
sumtab <- function(data,varlist,group){
sum1 <- NULL
for (k in 1:length(varlist)){
if (class(data[,varlist[k]]) %in% c('factor')){
n <- cbind(table(data[,varlist[k]]),table(data[,varlist[k]], data[,group]))
pct <- cbind(prop.table(table(data[,varlist[k]])),
prop.table(table(data[,varlist[k]], data[,group]),2))
p <- chisq.test(table(data[,varlist[k]], data[,group]))$p.value
mis <- c(paste0(length(which(is.na(data[,varlist[k]]))),' (',
format(100*length(which(is.na(data[,varlist[k]])))/nrow(data),nsmall=2,digits=2),'%)'),
tapply(data[,varlist[k]], data[,group], function(y){
paste0(length(which(is.na(y))),' (',
format(100*length(which(is.na(y)))/length(y),,nsmall=2,digits=2),'%)')
}))
names(mis) <- rep("",length(mis))
sum <- matrix(NA,dim(n)[1],dim(n)[2])
for (i in 1:dim(n)[1]){
for (j in 1:dim(n)[2]){
sum[i,j] <- paste0(n[i,j]," (",format(pct[i,j]*100,digits=2,nsmall=2),"%)")
}
}
sum <- cbind(rownames(n),sum)
sum <- rbind(c(varlist[k],rep("",dim(n)[2])),sum)
sum <- rbind(sum,c("missing",mis))
sum <- cbind(sum,c(format(p,digits=2,nsmall=2),rep("",dim(n)[1]+1)))
rownames(sum) <- rep("",dim(sum)[1])
colnames(sum) <- rep("",dim(sum)[2])
sum1 <- rbind(sum1,sum)
}
if (class(data[,varlist[k]]) %in% c('numeric')){
n <- c(paste0(length(which(!is.na(data[,varlist[k]]))),' (',
format(100*length(which(!is.na(data[,varlist[k]])))/nrow(data),nsmall=2,digits=2),'%)'),
tapply(data[,varlist[k]], data[,group], function(y){
paste0(length(which(!is.na(y))),' (',
format(100*length(which(!is.na(y)))/length(y),,nsmall=2,digits=2),'%)')
}))
m <- c(mean(data[,varlist[k]], na.rm=T),
tapply(data[,varlist[k]], data[,group],mean, na.rm=T))
sd <- c(sd(data[,varlist[k]], na.rm=T),
tapply(data[,varlist[k]], data[,group],sd, na.rm=T))
mis <- c(paste0(length(which(is.na(data[,varlist[k]]))),' (',
format(100*length(which(is.na(data[,varlist[k]])))/nrow(data),nsmall=2,digits=2),'%)'),
tapply(data[,varlist[k]], data[,group], function(y){
paste0(length(which(is.na(y))),' (',
format(100*length(which(is.na(y)))/length(y),,nsmall=2,digits=2),'%)')
}))
p <- NULL
if (length(levels(data[,group])) > 2){
p <- anova(lm(data[,varlist[k]] ~ data[,group]))$`Pr(>F)`[1]
}
if (length(levels(data[,group])) == 2){
p <- t.test(data[,varlist[k]] ~ data[,group])$p.value
}
sum <- matrix(NA,3,length(n))
for (i in 1:(length(n))){
sum[1,i] <- n[i]
sum[2,i] <- mis[i]
sum[3,i] <- paste0(format(m[i],digits=2,nsmall=2)
," (",format(sd[i],digits=2,nsmall=2),")")
}
sum <- rbind(rep("",dim(sum)[2]),sum)
sum <- cbind(c(varlist[k],"non-missing","missing","Mean (SD)"),sum)
sum <- cbind(sum,c("","","",format(p,nsmall = 2,digits = 2)))
rownames(sum) <- rep("",dim(sum)[1])
colnames(sum) <- rep("",dim(sum)[2])
sum1 <- rbind(sum1,sum)
}
}
sum1 <- rbind(c("variable","all",levels(data[,group]),"p-value"),sum1)
return(sum1)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.