R/normalityPlots.R
In voiceR: Voice Analytics for Social Scientists

Documented in normalityPlots

#' Normality Plots
#'
#' Generates plots showing the normality of the different measures from the data.frame obtained from autoExtract.
#'
#' @param audioData A data.frame generated by autoExtract.
#' @param measures An optional character vector indicating the name of the variables to be plotted.
#' @return A list containing the different plots that are generated.
#' @examples
#' normalityPlots(testAudioData)
#'
#' @importFrom ggplot2 ggplot ggplot_build annotate aes geom_density
#' @importFrom stats shapiro.test
#' @importFrom rlang !! sym
#' @export
normalityPlots <- function(audioData, measures = c("duration", "voice_breaks_percent", "RMS_env", "mean_loudness", "mean_F0", "sd_F0", "mean_entropy", "mean_HNR")){
  if(!is.data.frame(audioData)) stop("audioData should be a data.frame produced by autoExtract")
  if(!all(measures %in% colnames(audioData))) {
    stop("measures should be one of the following measures: duration, voice_breaks_percent, RMS_env, mean_loudness, mean_F0, sd_F0, mean_entropy, or mean_HNR")
  }


  #Check if audiodata contains conditions and dimensions
  conditionPresence <- "Condition" %in% colnames(audioData)
  dimensionPresence <- "Dimension" %in% colnames(audioData)


  #Create empty list to save the plots
  plots <- list()
  i <- 1

  #if no conditions
  if(conditionPresence == FALSE){
    #generate a plot for each measure, showing the shapiro p value
    for (measure in measures) {
      annotation <- paste0("Shapiro-Wilk p: ", round(shapiro.test(audioData[,measure])$p.value,4), "\n")

      plots[[measure]] <- local({

        p1 <- ggplot(audioData, aes(x=!!sym(measure))) + geom_density()
        p1Params <- ggplot_build(p1)
        p1 <- p1 + annotate("text", x= p1Params$layout$panel_scales_x[[1]]$range$range[1] + abs(p1Params$layout$panel_scales_x[[1]]$range$range[1] - p1Params$layout$panel_scales_x[[1]]$range$range[2])*0.4, y = p1Params$layout$panel_scales_y[[1]]$range$range[2]*0.85, label = annotation)
        p1
      })


    }
  }
  else{
    #if there are conditions, generate a plot for each measure containing annotations for the shapiro wilk p value for each condition
    for (measure in measures) {
      annotation <- ""
      for (condition in unique(audioData$Condition)) {
        annotation <- paste0(annotation, "Shapiro-Wilk p (", condition, "): ", ifelse(length(which(!is.na(audioData[audioData$Condition == condition,measure]))) >= 3, round(shapiro.test(audioData[audioData$Condition == condition,measure])$p.value,4), "Not enough data"), "\n")
      }
      #annotation <- grid::grobTree(grid::textGrob(annotation, x=0.5,  y=0.7, hjust=0,  gp=grid::gpar(col="blue", fontsize=9, fontface="bold")))

      plots[[measure]] <- local({

        p1 <- ggplot(audioData, aes(x=!!sym(measure), fill=!!sym("Condition"))) + geom_density(alpha = 0.3)
        p1Params <- ggplot_build(p1)
        p1 <- p1 + annotate("text", x= p1Params$layout$panel_scales_x[[1]]$range$range[1] + abs(p1Params$layout$panel_scales_x[[1]]$range$range[1] - p1Params$layout$panel_scales_x[[1]]$range$range[2])*0.4, y = p1Params$layout$panel_scales_y[[1]]$range$range[2]*0.85, label = annotation)
        p1
      })


    }
  }

  return(plots)
}