R/tableSummary.R
In voiceR: Voice Analytics for Social Scientists

Documented in tableSummary

#' Create a summary table
#'
#' Automatically generates HTML table with the main descriptive statistics (mean, standard deviation and number of data points) for each variable of the data set. Descriptive statistics can be conditioned by groups if grouping variables are defined.
#'
#' @param audioData A data.frame generated by the autoExtract() function.
#' @param by A character vector indicating the name of the factor(s).
#' @param measures A character vector indicating the name of the variables to be included in the table.
#' @param nameMeasures Optional character vector indicating the names to be displayed for each of the different measures in the output table. If no value is provided, original variable names are displayed.
#' @param figureNumber Integer indicating figure number, used to create the title for the table. Default corresponds to 1.
#' @return HTML file summarizing the main descriptive statistics for each variable, conditioned by group(s) if one or more factors are provided.
#' @examples
#' tableSummary(testAudioData, by = c("Condition", "Dimension"),
#' measures = c("duration", "voice_breaks_percent", "RMS_env", "mean_loudness",
#' "mean_F0"))
#'
#' @importFrom stats sd as.formula aggregate na.pass
#' @importFrom kableExtra kable_classic add_header_above footnote
#' @importFrom knitr kable
#' @export


tableSummary <- function(audioData, by = c(), measures = c("duration", "voice_breaks_percent", "RMS_env", "mean_loudness", "mean_F0", "sd_F0", "mean_entropy", "mean_HNR"), nameMeasures = c(), figureNumber = 1){
  if(!is.data.frame(audioData)) stop("audioData should be a data.frame produced by autoExtract")
  if(!all(measures %in% colnames(audioData))){
    stop(paste(measures[which(!measures %in% colnames(audioData))], "not found in audioData"))
  }
  if(sum(!by %in% colnames(audioData)) > 0){
    stop(paste(by[which(!by %in% colnames(audioData))], "not found in audioData"))
  }

  if(!(all(apply(audioData[, tolower(colnames(audioData)) %in% tolower(by), drop = FALSE], 2, is.factor)) || all(apply(audioData[, tolower(colnames(audioData)) %in% tolower(by), drop = FALSE], 2, is.character)))){
    stop("Variables selected using by are not factors")
  }
  if(!all(apply(audioData[,tolower(colnames(audioData)) %in% tolower(measures), drop = FALSE], 2, is.numeric))){
    stop("Variables selected using measures are not numeric")
  }



  #If no custom name provided for the measure, use the measure name
  if(length(nameMeasures) == 0){
    nameMeasures <- measures
  }
  audioData <- audioData[,!(names(audioData) %in% "ID")]
  audioData <- audioData[,names(audioData) %in% by | names(audioData) %in% measures]

  #If no factors, just report means and standard deviations of audio measures, plus overall number of observations
  if(length(by) == 0){
    numericColumns <- unlist(lapply(audioData, is.numeric))
    descriptionTable <- apply(audioData[,numericColumns], 2,FUN = function(x) c(N = round(length(x), 0), M = round(mean(x, na.rm = TRUE), 2), SD = round(sd(x, na.rm = TRUE), 2) ))
    descriptionTable <- kable_classic(kable(
      descriptionTable,
      col.names = nameMeasures,
      format = "html",
      booktabs = TRUE,
      caption = paste0("Figure ", figureNumber, ". Means and Standard Deviations of Audio Measures")
    ), full_width = F, html_font = "Cambria")
  }
  #if factors are specified report means, audios and number of observations according to them
  else{
    formula = as.formula(paste(". ~", ifelse(length(by) == 1, by, paste(by[1], "+", by[2]))))
    descriptionTable <- do.call(data.frame,(aggregate(formula, data = audioData, FUN = function(x) c(N = round(length(x), 0), M = round(mean(x, na.rm = TRUE), 2), SD = round(sd(x, na.rm = TRUE), 2) ), na.action = na.pass)))
    headerNames <- c(length(by), rep(3, length(measures)))
    names(headerNames) <- c(" ", nameMeasures)
    descriptionTable <- kable_classic(kable(
      descriptionTable,
      col.names = c(colnames(descriptionTable)[1:length(by)],rep(c("N", "M", "SD"), length(measures))),
      format = "html",
      booktabs = TRUE,
      caption = paste0("Figure ", figureNumber,". Means and Standard Deviations of Audio Measures by ", ifelse(length(by) == 1, by, paste(by[1], "and", by[2])) )
    ), full_width = F, html_font = "Cambria")
    descriptionTable <-  add_header_above(descriptionTable, headerNames, escape = FALSE, line_sep = 0)
  }
  #Convert table into kable extra format
  descriptionTable <- footnote(descriptionTable, general ="N, M and SD are used to represent number of data points, mean and standard deviation, respectively.", threeparttable = TRUE, footnote_as_chunk = TRUE)

  return(descriptionTable)
}