R/descriptives.R

Defines functions report_desc summary_desc descriptives

Documented in descriptives

#' @title Descriptive Statistics on variables within a data frame
#'
#' @description The \code{descriptives} function is used to perform descriptive statistics on a set of variables in a \code{\link{data.frame}}.
#' This can include descriptive statistics by group when including a vector of categorical variables in the groupby= argument.
#'
#' @param data A \code{\link{data.frame}} containing columns of variables.
#' @param vars A \code{\link{vector}} of variable names existing in the dataframe to perform descriptive statistics on.
#' @param groupby An optional \code{\link{vector}} containing factor names within a dataframe used for by group processing.
#' @param conf.level The confidence level for mean and median confidence intervals. The default value is 0.95.
#' @param medianCI A TRUE/FALSE boolean value indicating whether or not to produce bootstrapped confidence intervals on the median (computationally intensive).
#' @param R The number of bootstrap replications to use if medianCI = TRUE. Otherwise has no effect.
#' @param round The number of digits to round the final result.
#'
#' @return A \code{\link{list}} containing descriptive statistics for each variable listed in the \code{vars} statement.
#' @export
#'
#' @examples
#' descriptives(data=mtcars, vars=c("mpg","disp"), groupby=c("vs","am"))
descriptives<-function(data, vars, groupby=NULL, conf.level=0.95, medianCI=FALSE, R=2000, round=NULL){

  #Check to insure data is a dataframe
  if(!is.data.frame(data)){
    stop(paste("The object", data, "is not a dataframe"));
  }

  # check to see if all vars are numeric
  temp <- data %>% select(vars)
  temp <- as.vector(sapply(temp,is.numeric))

  if(all(temp)==FALSE){
    stop(paste("All variables listed in the vars= statement are not numeric"));
  }


  #Start actual function
  out<-list()
  out$meta<-list()
  out$output<-list()

  out[["meta"]][["vars"]]<-vars
  out[["meta"]][["groupby"]]<-groupby
  out[["meta"]][["conf.level"]]<-conf.level
  out[["meta"]][["medianCI"]]<-medianCI
  out[["meta"]][["R"]]<-R
  out[["meta"]][["round"]]<-round

  for(i in 1:length(vars)){

    varname<-vars[i]

    if(!is.null(groupby)){

      dat<-data %>% group_by(.dots=groupby) %>%
        summarise(ntotal = n(),
                  nmiss = sum(is.na(!!sym(vars[i]))),
                  mean = mean(!!sym(vars[i]), na.rm = TRUE),
                  sd = sd(!!sym(vars[i]), na.rm = TRUE),
                  stderr = sd/sqrt(n()),
                  median = median(!!sym(vars[i]), na.rm = TRUE),
                  min = min(!!sym(vars[i]), na.rm = TRUE),
                  max = max(!!sym(vars[i]), na.rm = TRUE),
                  pct25 = quantile(!!sym(vars[i]), probs=0.25, na.rm=TRUE),
                  pct75 = quantile(!!sym(vars[i]), probs=0.75, na.rm=TRUE),
                  IQR = pct75 - pct25)
      #LCL = mean - qt(1 - (0.05 / 2), ntotal - 1) * stderr,
      #UCL = mean + qt(1 - (0.05 / 2), ntotal - 1) * stderr,
      #LCLmed = MedianCI(!!sym(vars[i]), method="boot", na.rm=TRUE)[2],
      #UCLmed = MedianCI(!!sym(vars[i]), method="boot", na.rm=TRUE)[3])

      #CL mean calcs
      clm<-data %>% group_by(.dots=groupby) %>% summarise(list(DescTools::MeanCI(!!sym(vars[i]), method="classic", na.rm=TRUE, conf.level = conf.level)))
      clm<-clm[[length(clm)]]
      clm<-unlist(clm)
      nms<-names(clm)
      clm<-data.frame(names=nms, clm)

      lwrm<-clm %>% filter(names=="lwr.ci")
      uprm<-clm %>% filter(names=="upr.ci")

      quantile

      if(medianCI==TRUE){
        #CL median calcs
        clmed<-data %>% group_by(.dots=groupby) %>% summarise(list(DescTools::MedianCI(!!sym(vars[i]), method="boot", na.rm=TRUE, R=2000, conf.level = conf.level)))
        clmed<-clmed[[length(clmed)]]
        clmed<-unlist(clmed)
        nms<-names(clmed)
        clmed<-data.frame(names=nms, clmed)

        lwrmed<-clmed %>% filter(names=="lwr.ci")
        uprmed<-clmed %>% filter(names=="upr.ci")

        #combine CLmean and CLmed components
        confint<-data.frame(LCLmean=lwrm[,2], UCLmean=uprm[,2], LCLmed=lwrmed[,2], UCLmed=uprmed[,2])
        dat<-cbind(as.data.frame(dat), confint)
      } else {
        confint<-data.frame(LCLmean=lwrm[,2], UCLmean=uprm[,2])
        dat<-cbind(as.data.frame(dat), confint)
      }


    } else {

      dat<-data %>%
        summarise(ntoal = n(),
                  nmiss = sum(is.na(!!sym(vars[i]))),
                  mean = mean(!!sym(vars[i]), na.rm = TRUE),
                  sd = sd(!!sym(vars[i]), na.rm = TRUE),
                  stderr = sd/sqrt(n()),
                  median = median(!!sym(vars[i]), na.rm = TRUE),
                  min = min(!!sym(vars[i]), na.rm = TRUE),
                  max = max(!!sym(vars[i]), na.rm = TRUE),
                  pct25 = quantile(!!sym(vars[i]), probs=0.25, na.rm=TRUE),
                  pct75 = quantile(!!sym(vars[i]), probs=0.75, na.rm=TRUE),
                  IQR = pct75 - pct25)
      #LCL = mean - qt(1 - (0.05 / 2), n - 1) * stderr,
      #UCL = mean + qt(1 - (0.05 / 2), n - 1) * stderr,
      #LCLmed = MedianCI(!!sym(vars[i]), method="exact", na.rm=TRUE)[2],
      #UCLmed = MedianCI(!!sym(vars[i]), method="exact", na.rm=TRUE)[3])

      #CL mean calcs
      clm<-data %>% summarise(list(DescTools::MeanCI(!!sym(vars[i]), method="classic", na.rm=TRUE, conf.level = conf.level)))
      clm<-clm[[length(clm)]]
      clm<-unlist(clm)
      nms<-names(clm)
      clm<-data.frame(names=nms, clm)

      lwrm<-clm %>% filter(names=="lwr.ci")
      uprm<-clm %>% filter(names=="upr.ci")

      if(medianCI==TRUE){

        #CL median calcs
        clmed<-data %>% summarise(list(DescTools::MedianCI(!!sym(vars[i]), method="boot", na.rm=TRUE, R=R, conf.level = conf.level)))
        clmed<-clmed[[length(clmed)]]
        clmed<-unlist(clmed)
        nms<-names(clmed)
        clmed<-data.frame(names=nms, clmed)

        lwrmed<-clmed %>% filter(names=="lwr.ci")
        uprmed<-clmed %>% filter(names=="upr.ci")

        #combine CLmean and CLmed components
        confint<-data.frame(LCLmean=lwrm[,2], UCLmean=uprm[,2], LCLmed=lwrmed[,2], UCLmed=uprmed[,2])
        dat<-cbind(as.data.frame(dat), confint)
      } else {
        confint<-data.frame(LCLmean=lwrm[,2], UCLmean=uprm[,2])
        dat<-cbind(as.data.frame(dat), confint)
      }
    }

    dat<-cbind(variable=varname, as.data.frame(dat))
    row.names(dat)<-NULL

    out$output[[varname]]<-dat
  }

  if(!is.null(round)){
    out$output<-round_list(out$output, round)
  }

  class(out)<-c("fpr", "desc")

  return(out)
}

#====================================== Summary ===================================================

summary_desc<-function(object){

  groupby<-object$meta$groupby

  if(is.null(groupby)){
    print(suppressWarnings(bind_rows(object$output)), row.names=FALSE)
  } else{
    for(i in 1:length(object$output)){
      print(object$output[[i]], row.names=FALSE)
      cat("\n")
    }
  }

  return(invisible(object))
}

#====================================== Reports ===================================================

report_desc<-function(object, style="multiline", split.tables=110, keep.trailing.zeros=TRUE, ...){

  c1<-class(object)[1]
  c2<-class(object)[2]
  groupby<-object$meta$groupby

  if(c1 != "fpr"){
    stop(paste(object, "not an fpr object"))
  }

  cat("\n")
  cat("=======================================================================================================================")
  cat("\n")
  cat("Descriptive Statistics")
  cat("\n")
  cat("=======================================================================================================================")
  cat("\n")

  if(is.null(groupby)){
    out<-suppressWarnings(bind_rows(object$output))
    pander::pandoc.table(out, style=style, split.tables=split.tables, keep.trailing.zeros=keep.trailing.zeros, ...)
  } else {

    for(i in 1:length(object$output)){
      out<-object$output[[i]]
      pander::pandoc.table(out, style=style, split.tables=split.tables, keep.trailing.zeros=keep.trailing.zeros, ...)
    }
  }
}
jprice80/fullprocs documentation built on June 13, 2020, 11:09 p.m.