R/sumtab.R

Defines functions sumtab

Documented in sumtab

#' Descriptive Statistics Table Results
#'
#' Generate Descriptive Statistics Table
#' @param data Input data (data.frame)
#' @param varlist List of variable names
#' @param group Name of group variable (factor)
#' @return Table Results of Descriptive Statistics with P-values
#' @details Frequency numbers with percentage would be calculated for categorical variable.
#' @details Mean and Standard Deviation would be calculated for continuous variables.
#' @details P-values calculated from Chi-squared test and T-test for categorical and continuous variables respectively.
#' @details P-values calculated excluding missing values.
#' @examples
#' @export
#' sumtab function


sumtab <- function(data,varlist,group){

  sum1 <- NULL

  for (k in 1:length(varlist)){
    if (class(data[,varlist[k]]) %in% c('factor')){
      n <- cbind(table(data[,varlist[k]]),table(data[,varlist[k]], data[,group]))
      pct <- cbind(prop.table(table(data[,varlist[k]])),
                   prop.table(table(data[,varlist[k]], data[,group]),2))
      p <- chisq.test(table(data[,varlist[k]], data[,group]))$p.value

      mis <- c(paste0(length(which(is.na(data[,varlist[k]]))),' (',
                      format(100*length(which(is.na(data[,varlist[k]])))/nrow(data),nsmall=2,digits=2),'%)'),
               tapply(data[,varlist[k]], data[,group], function(y){
                 paste0(length(which(is.na(y))),' (',
                        format(100*length(which(is.na(y)))/length(y),,nsmall=2,digits=2),'%)')
               }))
      names(mis) <- rep("",length(mis))

      sum <- matrix(NA,dim(n)[1],dim(n)[2])
      for (i in 1:dim(n)[1]){
        for (j in 1:dim(n)[2]){
          sum[i,j] <- paste0(n[i,j]," (",format(pct[i,j]*100,digits=2,nsmall=2),"%)")
        }
      }

      sum <- cbind(rownames(n),sum)
      sum <- rbind(c(varlist[k],rep("",dim(n)[2])),sum)
      sum <- rbind(sum,c("missing",mis))
      sum <- cbind(sum,c(format(p,digits=2,nsmall=2),rep("",dim(n)[1]+1)))
      rownames(sum) <- rep("",dim(sum)[1])
      colnames(sum) <- rep("",dim(sum)[2])

      sum1 <- rbind(sum1,sum)
    }

    if (class(data[,varlist[k]]) %in% c('numeric')){
      n <- c(paste0(length(which(!is.na(data[,varlist[k]]))),' (',
                    format(100*length(which(!is.na(data[,varlist[k]])))/nrow(data),nsmall=2,digits=2),'%)'),
             tapply(data[,varlist[k]], data[,group], function(y){
               paste0(length(which(!is.na(y))),' (',
                      format(100*length(which(!is.na(y)))/length(y),,nsmall=2,digits=2),'%)')
             }))
      m <- c(mean(data[,varlist[k]], na.rm=T),
             tapply(data[,varlist[k]], data[,group],mean, na.rm=T))
      sd <- c(sd(data[,varlist[k]], na.rm=T),
              tapply(data[,varlist[k]], data[,group],sd, na.rm=T))

      mis <- c(paste0(length(which(is.na(data[,varlist[k]]))),' (',
                      format(100*length(which(is.na(data[,varlist[k]])))/nrow(data),nsmall=2,digits=2),'%)'),
               tapply(data[,varlist[k]], data[,group], function(y){
                 paste0(length(which(is.na(y))),' (',
                        format(100*length(which(is.na(y)))/length(y),,nsmall=2,digits=2),'%)')
               }))

      p <- NULL
      if (length(levels(data[,group])) > 2){
        p <- anova(lm(data[,varlist[k]] ~ data[,group]))$`Pr(>F)`[1]
      }
      if (length(levels(data[,group])) == 2){
        p <- t.test(data[,varlist[k]] ~ data[,group])$p.value
      }

      sum <- matrix(NA,3,length(n))
      for (i in 1:(length(n))){
        sum[1,i]  <- n[i]
        sum[2,i]  <- mis[i]
        sum[3,i]  <- paste0(format(m[i],digits=2,nsmall=2)
                            ," (",format(sd[i],digits=2,nsmall=2),")")
      }

      sum <- rbind(rep("",dim(sum)[2]),sum)
      sum <- cbind(c(varlist[k],"non-missing","missing","Mean (SD)"),sum)
      sum <- cbind(sum,c("","","",format(p,nsmall = 2,digits = 2)))
      rownames(sum) <- rep("",dim(sum)[1])
      colnames(sum) <- rep("",dim(sum)[2])

      sum1 <- rbind(sum1,sum)
    }

  }

  sum1 <- rbind(c("variable","all",levels(data[,group]),"p-value"),sum1)

  return(sum1)

}
shenhanjie/statsTBL documentation built on Dec. 23, 2021, 1:22 a.m.