Nothing
#' Create boxplots for extracted audio features
#'
#' Generates boxplots for each exracted feature. Plots can be split by experimental condition.
#'
#' @param audioData A data.frame generated by the autoExtract() function.
#' @param by An optional character vector indicating column(s) from which the comparison groups are to be retrieved.
#' @param measures An optional character vector indicating the name of the variables to be plotted.
#' @param normalSig Set significance level to test for normality assumptions. Default corresponds to "0.05".
#' @param avoidNormalCheck Logical value forcing to avoid checking for normality. When defined as TRUE, it is assumed that the data is normally distributed. Default corresponds to FALSE.
#' @return A list containing the generated boxplots.
#' @examples
#' comparisonPlots(testAudioData, by = "Condition")
#'
#' @importFrom ggplot2 ggplot geom_boxplot aes theme element_blank ggplot_build annotation_custom unit
#' @importFrom gtable gtable_add_grob
#' @importFrom gridExtra tableGrob ttheme_minimal
#' @importFrom grid segmentsGrob gpar
#' @importFrom ggpubr stat_compare_means stat_pvalue_manual
#' @importFrom utils combn
#' @importFrom FSA dunnTest
#' @importFrom stats as.formula kruskal.test TukeyHSD aov
#' @importFrom stringr str_split
#' @importFrom rcompanion scheirerRayHare
#' @importFrom rlang !! sym
#' @export
comparisonPlots <- function(audioData, by = c(), measures = c("duration", "voice_breaks_percent", "RMS_env", "mean_loudness", "mean_F0", "sd_F0", "mean_entropy", "mean_HNR")
, normalSig = 0.05, avoidNormalCheck = FALSE){
if(!is.data.frame(audioData)) stop("audioData should be a data.frame produced by autoExtract")
if(!all(measures %in% c("duration", "voice_breaks_percent", "RMS_env", "mean_loudness", "mean_F0", "sd_F0", "mean_entropy", "mean_HNR"))) {
stop("measures should contain: duration, voice_breaks_percent, RMS_env, mean_loudness, mean_F0, sd_F0, mean_entropy, or mean_HNR")
}
if(!is.numeric(normalSig) & (normalSig < 0 | normalSig > 1)) stop("Error normalSig should be numeric and no smaller than 0 and no bigger than 1")
if(length(avoidNormalCheck) != 1) stop("avoidNormalCheck should have length 1")
#Measures for which we want to generate plots
my_comparisons <- list( c("1", "2") )
#Empty list, in which we will store the plots
plots <- list()
i <- 1
#If we only use one variable to create comparisons, just use that variable for both dimensions
if(length(by) == 1){
dimension1 <- by
dimension2 <- by
}
#Otherwise use a different variable for each dimension
else if(length(by) == 2){
dimension1 <- by[1]
dimension2 <- by[2]
}
else{
warning("No comparison Variables set.")
}
#When there's at least one comparison variable create the plots
if(length(by) >= 1){
numberCategoriesDim1 <- length(unique(audioData[,dimension1]))
audioData[,dimension1] <- as.factor(audioData[,dimension1])
numberCategoriesDim2 <- length(unique(audioData[,dimension2]))
audioData[,dimension2] <- as.factor(audioData[,dimension2])
#Create a plot for each measure
for (measure in measures) {
#If there are many NAs do not plot
if(sum(is.na(audioData[,measure])) >= 0.92 * length(audioData[,measure])){
warning(paste(measure, "contains all NAs. Plot for ", measure, "not generated."))
next
}
#Check if data is normally distributed
normalTable <- tableNormality(audioData, measure = measure, includeDimensions = "Dimension" %in% by)
if(is.character(normalTable)) stop("Sample size not big enough. Please consider increasing your sample.")
if(min(normalTable$N) < 3){
if("Dimension" %in% by){
warning("Only checking condition, as N per Condition and Dimension is lower than 3")
dimension1 <- "Condition"
dimension2 <- "Condition"
}
else{
stop("Not enough data to check comparisons")
}
}
pValue <- min(normalTable$pValue)
#Generate the plot comparing means depending on number of comparison variables and number of factors per comparison variable:
# One factor:
# - Normality and only 2 groups: t-test.
# - Normality and more than 2 groups: ANOVA and Tukey's test.
# - Non-normality and only 2 groups: Wilcoxon's Test.
# - Non-normality and more than 2 groups: Kruskal-Wallis test and Mann-Whitney U test.
# Two factors:
# - Normality: 2-way ANOVA.
# - Non-normality: Scheirer-Ray-Hare test.
plots[[measure]] <- local({
i <- i
if(numberCategoriesDim2 == 2 & length(by) == 1){
if(pValue < normalSig & !avoidNormalCheck){
p1 <- ggplot(audioData, aes(x=!!sym(dimension1), y=!!sym(measure), color = !!sym(dimension2))) +
geom_boxplot() +
stat_compare_means(method = "wilcox.test", label.y = max(audioData[,measure], na.rm = TRUE)*1.1, label.x = 1) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
}
else{
p1 <- ggplot(audioData, aes(x=!!sym(dimension1), y=!!sym(measure), color = !!sym(dimension2))) +
geom_boxplot() +
stat_compare_means(method = "t.test", label.y = max(audioData[,measure], na.rm = TRUE)*1.1, label.x = 1) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
}
}
else{
if(length(by) == 1){
x <- combn(seq(1,numberCategoriesDim1), 2)
my_comparisons <- lapply(seq_len(ncol(x)), function(i) x[,i])
if(pValue < normalSig & !avoidNormalCheck){
dunnData <- dunnTest(as.formula(paste0(measure, "~", dimension1)), data = audioData)
dunnData <- dunnData$res
dunnData$significance <- NA
if(length(dunnData[dunnData$P.adj > 0.1, "significance"]) > 0)
dunnData[dunnData$P.adj > 0.1, "significance"] <- "ns"
if(length(dunnData[dunnData$P.adj < 0.1, "significance"]) > 0)
dunnData[dunnData$P.adj < 0.1, "significance"] <- "+"
if(length(dunnData[dunnData$P.adj < 0.05, "significance"]) > 0)
dunnData[dunnData$P.adj < 0.05, "significance"] <- "*"
if(length(dunnData[dunnData$P.adj < 0.01, "significance"]) > 0)
dunnData[dunnData$P.adj < 0.01, "significance"] <- "**"
if(length(dunnData[dunnData$P.adj < 0.001, "significance"]) > 0)
dunnData[dunnData$P.adj < 0.001, "significance"] <- "***"
dunnData$group1 <- str_split(dunnData$Comparison, "-", simplify = T)[,1]
dunnData$group2 <- str_split(dunnData$Comparison, "-", simplify = T)[,2]
dunnData$group1 <- gsub(" ", "", dunnData$group1, fixed = TRUE)
dunnData$group2 <- gsub(" ", "", dunnData$group2, fixed = TRUE)
dunnData <- dunnData[dunnData$P.adj < 0.1,]
p1 <- ggplot(audioData, aes(x=!!sym(dimension1), y=!!sym(measure), color = !!sym(dimension2))) +
geom_boxplot() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplotData <- ggplot_build(p1)
multiplier <- (max(audioData[,measure], na.rm = TRUE)-min(audioData[,measure], na.rm = TRUE))*0.08
ypos <- seq((max(ggplotData$data[[1]]$ymax, na.rm = TRUE)+multiplier), max(ggplotData$data[[1]]$ymax, na.rm = TRUE) + multiplier*(nrow(dunnData)+1) , by = multiplier)[1:nrow(dunnData)]
kruskalTestData <- kruskal.test(as.formula(paste0(measure, "~", dimension1)), audioData)
p1 <- p1 +
stat_compare_means(method = "kruskal.test", label.y = ggplotData$layout$panel_scales_y[[1]]$range$range[2] - (ggplotData$layout$panel_scales_y[[1]]$range$range[2] - ggplotData$layout$panel_scales_y[[1]]$range$range[1])*0.15, label.x = 1.3)
if(nrow(dunnData) > 0){
p1 <- p1 + stat_pvalue_manual(dunnData, label = "significance", y.position = ypos, tip.length = 0)
}
}
else{
tukeyData <- TukeyHSD(aov(as.formula(paste0(measure, "~", dimension1)), data = audioData))
tukeyData <- as.data.frame(tukeyData$Condition)
tukeyData$significance <- NA
if(length(tukeyData[tukeyData$`p adj` > 0.1, "significance"]) > 0)
tukeyData[tukeyData$`p adj` > 0.1, "significance"] <- "ns"
if(length(tukeyData[tukeyData$`p adj` < 0.1, "significance"]) > 0)
tukeyData[tukeyData$`p adj` < 0.1, "significance"] <- "+"
if(length(tukeyData[tukeyData$`p adj` < 0.05, "significance"]) > 0)
tukeyData[tukeyData$`p adj` < 0.05, "significance"] <- "*"
if(length(tukeyData[tukeyData$`p adj` < 0.01, "significance"]) > 0)
tukeyData[tukeyData$`p adj` < 0.01, "significance"] <- "**"
if(length(tukeyData[tukeyData$`p adj` < 0.001, "significance"]) > 0)
tukeyData[tukeyData$`p adj` < 0.001, "significance"] <- "***"
tukeyData$group1 <- str_split(rownames(tukeyData), "-", simplify = T)[,1]
tukeyData$group2 <- str_split(rownames(tukeyData), "-", simplify = T)[,2]
tukeyData <- tukeyData[tukeyData$`p adj` < 0.1,]
p1 <- ggplot(audioData, aes(x=!!sym(dimension1), y=!!sym(measure), color = !!sym(dimension2))) +
geom_boxplot() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplotData <- ggplot_build(p1)
multiplier <- (max(audioData[,measure], na.rm = TRUE)-min(audioData[,measure], na.rm = TRUE))*0.08
ypos <- seq((max(ggplotData$data[[1]]$ymax, na.rm = TRUE)+multiplier), max(ggplotData$data[[1]]$ymax, na.rm = TRUE) + multiplier*(nrow(tukeyData)+1) , by = multiplier)[1:nrow(tukeyData)]
AnovaTestData <- summary(aov(as.formula(paste0(measure, "~", dimension1)), audioData))
p1 <- p1 +
stat_compare_means(method = "anova", label.y = ggplotData$layout$panel_scales_y[[1]]$range$range[2] - (ggplotData$layout$panel_scales_y[[1]]$range$range[2] - ggplotData$layout$panel_scales_y[[1]]$range$range[1])*0.15, label.x = 1.3)
if(nrow(tukeyData) > 0){
p1 <- p1 + stat_pvalue_manual(tukeyData, label = "significance", y.position = ypos, tip.length = 0)
}
}
}
if(length(by) == 2){
if(pValue < normalSig & !avoidNormalCheck){
res.schreirer <- scheirerRayHare(as.formula(paste(measure, "~", dimension1, "*", dimension2)), audioData, verbose = FALSE)
row.names(res.schreirer)[3] <- "Interaction"
annotationTable <- data.frame('Scheirer-Ray-Hare' = rownames(res.schreirer)[1:3], pValue = round(res.schreirer[1:3,"p.value"], 2))
annotationTable$Scheirer.Ray.Hare[3] <- "Interaction"
p1 <- ggplot(audioData, aes(x=!!sym(dimension1), y=!!sym(measure), color = !!sym(dimension2))) +
geom_boxplot() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplotData <- ggplot_build(p1)
tbl <- tableGrob(annotationTable, theme = ttheme_minimal(), rows=NULL, cols= c("Scheirer-Ray-Hare", "p-Value"))
tbl <- gtable_add_grob( tbl, grobs = segmentsGrob( name = 'segment', y1 = unit( 0, 'npc' ), gp = gpar( lty = 1, lwd = 1 ) ),
t = 1, l = 1, r = ncol( tbl ) )
p1 <- p1 + annotation_custom(tbl, ymin = ggplotData$layout$panel_scales_y[[1]]$range$range[2] - (ggplotData$layout$panel_scales_y[[1]]$range$range[2] - ggplotData$layout$panel_scales_y[[1]]$range$range[1])*0.25, xmin = 1, xmax = 2)
}
else{
res.aov2 <- aov(as.formula(paste(measure, "~", dimension1, "*", dimension2)), data = audioData)
res.aov2 <- summary(res.aov2)[[1]]
annotationTable <- data.frame('2-way ANOVA' = rownames(res.aov2)[1:3], pValue = round(res.aov2[1:3,"Pr(>F)"], 2))
row.names(annotationTable)[3] <- "Interaction"
text <- " Two-way ANOVA p-value\n"
for (i in 1:(nrow(res.aov2)-1)) {
text <- paste(text, trimws(rownames(res.aov2)[i]), paste0(rep(" ", 30 - length(trimws(rownames(res.aov2)[i]))), collapse = ""), round(res.aov2$`Pr(>F)`[i], 2), "\n")
}
p1 <- ggplot(audioData, aes(x=!!sym(dimension1), y=!!sym(measure), color = !!sym(dimension2))) +
geom_boxplot() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplotData <- ggplot_build(p1)
tbl <- tableGrob(annotationTable, theme = ttheme_minimal(), rows=NULL, cols= c("2-way ANOVA", "p-Value"))
tbl <- gtable_add_grob( tbl, grobs = segmentsGrob( name = 'segment', y1 = unit( 0, 'npc' ), gp = gpar( lty = 1, lwd = 1 ) ),
t = 1, l = 1, r = ncol( tbl ) )
p1 <- p1 + annotation_custom(tbl, ymin = ggplotData$layout$panel_scales_y[[1]]$range$range[2] - (ggplotData$layout$panel_scales_y[[1]]$range$range[2] - ggplotData$layout$panel_scales_y[[1]]$range$range[1])*0.25, xmin = 1, xmax = 2)
}
}
}
p1
})
}
}
else{
for (measure in measures) {
plots[[measure]] <- local({
p1 <- ggplot(audioData, aes(x=factor(0), y=!!sym(measure))) +
geom_boxplot() + theme(axis.title.x= element_blank(),
axis.text.x= element_blank(), axis.ticks.x= element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p1
})
}
}
return(plots)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.