R/normalizeData.R

Defines functions normalizeData

Documented in normalizeData

#' Normalize audio data using Box-Cox transformation
#'
#' This function normalizes audio data using the Box-Cox transformation. It takes in a data frame of audio data and a vector of measures to be normalized. Users can choose to normalize by dimensions and/or conditions.
#'
#' @param audioData A data.frame generated by the autoExtract() function.
#' @param measures A vector of strings specifying the measures to be normalized. Default corresponds to all the measures extracted by autoExtract().
#' @param includeDimensions A logical value indicating whether or not to include dimensions in the normalization process. Default corresponds to FALSE.
#' @param includeConditions A logical value indicating whether or not to include conditions in the normalization process. Default corresponds to FALSE.
#'
#' @return A list containing three elements: (1) a data frame of the normalized audio data and (2) a logical vector indicating whether or not each measure was transformed using Box-Cox transformation and (3) the Box-cox constant added to each measure.
#'
#' @examples
#' normalizeData(testAudioData)
#'
#' @importFrom rcompanion transformTukey
#' @importFrom MASS boxcox
#'
#' @export

normalizeData <- function(audioData, measures = c("duration", "voice_breaks_percent", "RMS_env", "mean_loudness", "mean_F0", "sd_F0", "mean_entropy", "mean_HNR"), includeDimensions = FALSE, includeConditions = FALSE){

  # Check parameters
  stopifnot(is.data.frame(audioData))
  stopifnot(is.character(measures))
  stopifnot(is.logical(includeDimensions))
  stopifnot(is.logical(includeConditions))
  if(nrow(audioData) < 3) stop("Not enough data.")

  avoidNormalCheck <- rep(FALSE, length(measures))
  constantBoxCox <- numeric(length(measures))

  for (i in seq_along(measures)) {
    measure <- measures[i]
    normalityData <- tableNormality(audioData, measure, includeDimensions = includeDimensions)
    if(includeConditions && any(is.na(normalityData$pValue))){
      stop("Some conditions do not have enough data (N < 3)")
    }
    if(includeConditions && includeDimensions && any(is.na(normalityData$pValue))){
      stop("Some combinations of conditions and dimensions do not have enough data (N < 3)")
    }

    if (min(normalityData$pValue) >= 0.05) {
      next
    }
    if (min(audioData[, measure], na.rm = TRUE) <= 0) {
      constantBoxCox[i] <- abs(min(audioData[, measure], na.rm = TRUE)) + 0.01
      audioData[!is.na(audioData[, measure]), measure] <- audioData[!is.na(audioData[, measure]), measure] + constantBoxCox[i]
    }
    if (includeConditions) {
      formula <- paste0(measure, " ~ Condition")
      if (includeDimensions) {
        formula <- paste0(formula, " + Dimension")
      }
      Box <- boxcox(as.formula(formula), data = audioData, lambda = seq(-6, 6, 0.1))
      lambda <- Box$x[which.max(Box$y)]
      audioData[, measure] <- (audioData[, measure] ^ lambda - 1) / lambda
    } else {
      audioData[, measure] <- transformTukey(audioData[, measure], plotit = FALSE, start = -6, end = 6, int = 0.1)
    }
    avoidNormalCheck[i] <- TRUE
  }

  # Return list with results
  return(list(audioData = audioData, avoidNormalCheck = avoidNormalCheck, constantBoxCox = constantBoxCox))
}

Try the voiceR package in your browser

Any scripts or data that you put into this service are public.

voiceR documentation built on Sept. 13, 2023, 1:07 a.m.