R/correlation_plots.R
In nzilbb.vowels: Vowel Covariation Tools

Documented in plot_correlation_counts plot_correlation_magnitudes

#' Plot distribution of correlations from `correlation_test` object
#'
#' This plot type is used in
#' Brand et al. (2021).
#' It presents the magnitudes of the correlations from the real data as a solid
#' red line, and the correlations from each iteration of the permutation test as
#' light blue lines. This gives a visual sense of the distribution of random
#' correlations compared with those in the actual data. If there are significant
#' pairwise correlations in the data, the thick red line should be visually
#' lower and wider across the plot than the thinner blue lines. If there are no
#' significant pairwise correlations, then the thick red line will have the
#' same shape as the blue lines.
#'
#' @param cor_test an object of class `correlation_test` generated by
#'   `correlation_test`.
#' @return `ggplot` object.
#' @importFrom dplyr bind_rows
#' @importFrom ggplot2 ggplot scale_alpha_manual scale_linewidth_manual aes labs
#'   stat_density guides after_stat
#' @importFrom magrittr %>%
#' @importFrom glue glue
#' @importFrom Rdpack reprompt
#' @examples
#'   # Test correlations (use at least n = 100)
#'   cor_test <- correlation_test(onze_intercepts |>
#'     dplyr::select(-speaker), n = 10)
#'   cor_plot <- plot_correlation_magnitudes(cor_test)
#'   cor_plot
#'
#'   # modify plot using `ggplot2` functions, e.g.
#'   cor_plot +
#'     ggplot2::labs(title = NULL) +
#'     ggplot2::theme_bw()
#' @references
#'   Brand, James, Jen Hay, Lynn Clark, Kevin Watson & Márton Sóskuthy (2021):
#'   Systematic co-variation of monophthongs across speakers of New Zealand
#'   English. Journal of Phonetics. Elsevier. 88. 101096.
#'   doi:10.1016/j.wocn.2021.101096


#' @export
plot_correlation_magnitudes <- function(cor_test) {

  cor_data <- bind_rows(
    "Permuted" = cor_test$permuted_correlations,
    "Original" = cor_test$original_correlations,
    .id = "Source"
  )

  # compute low alpha so that the permuted values can be seen even with
  # a low number of permutations.
  low_alpha <- 100/length(unique(cor_data$iteration))

  cor_data %>%
    ggplot(
      aes(
        x = .data$cor,
        y = after_stat(.data$density),
        colour = .data$Source,
        group = .data$iteration,
        alpha = .data$Source,
        linewidth = .data$Source
      )
    ) +
    stat_density(geom = "line", position= "identity") +
    scale_alpha_manual(values = c("Permuted" = low_alpha, "Original" = 1)) +
    scale_linewidth_manual(values = c("Permuted" = 0.1, "Original" = 2)) +
    labs(
      title = glue(
        "Magnitude of Correlations in Original Data and {cor_test$iterations} Permutations"
      ),
      y = "Density",
      x = glue("Magnitude of correlation ({cor_test$cor_method})")
    ) +
    guides(alpha = "none", size = "none")

}

#' Plot of correlation counts from `correlation_test` object
#'
#' Plot the number of statistically significant pairwise correlations in a data
#' set given an alpha value against the distribution of counts of statistically
#' significant pairwise correlations in permuted data. This is an informal test
#' which is useful to convincing yourself that there is structure in your data
#' which PCA might be able to uncover.
#'
#' The resulting plot presents the distribution of _counts_ of statistically
#' significant correlations at a given alpha level in the permuted data and the
#' count of statistically significant correlations in the original data. If the
#' red dot is above the uppermost line inside the blue violin plot, we say the
#' number of statistically significant correlations in the real data is itself
#' statistically significant. Usually this is used as a rough sanity check in
#' the course of a PCA workflow and we want to see the red dot well above the
#' violin (as in the example below).
#'
#' The resulting plot is a `ggplot2` plot and can be modified using functions
#' from that package. For instance, titles can be removed using the [ggplot2::labs()]
#' function (as in the examples below).
#'
#' @param cor_test an object of class `correlation_test` generated by
#'   `correlation_test`.
#' @param alpha significance level for counting correlation as significant.
#' @param half_violin Plot correlation counts using a half violin plot and half
#'   point plot. Quantiles are not currently supported.
#' @return `ggplot` object.
#' @importFrom dplyr bind_rows filter mutate summarise
#' @importFrom ggplot2 ggplot aes labs geom_violin geom_point
#' @importFrom gghalves geom_half_violin geom_half_point
#' @importFrom magrittr %>%
#' @importFrom glue glue
#' @examples
#'   # Test correlations (use at least n = 100)
#'   cor_test <- correlation_test(onze_intercepts |>
#'     dplyr::select(-speaker), n = 10)
#'   cor_plot <- plot_correlation_counts(cor_test)
#'   cor_plot
#'
#'   # make statistical test more strict by reducing the alpha.
#'   cor_plot_strict <- plot_correlation_counts(cor_test, alpha = 0.01)
#'
#'   # modify plot using `ggplot2` functions, e.g.
#'   cor_plot_strict +
#'     ggplot2::labs(title = NULL) +
#'     ggplot2::theme_bw()
#' @export
plot_correlation_counts <- function(
    cor_test, alpha = 0.05, half_violin = FALSE
) {

  if (half_violin) {
    violin_element <- geom_half_violin(
      alpha = 0.8,
      side = "r"
    )
    point_element <- geom_half_point(
      size = 0.5,
      alpha = 0.8,
      side = "l"
    )
  } else {
    violin_element <- geom_violin(
      draw_quantiles = c(0.25, 0.5, 0.75),
      alpha = 0.8
    )
    point_element <- NULL
  }

  cor_test$permuted_correlations %>%
    filter(
      .data$cor_p <= alpha
    ) %>%
    group_by(.data$iteration) %>%
    summarise(
      n_sig = n()
    ) %>%
    mutate(
      source = "Null"
    ) %>%
    ggplot(
      aes(
        x = "",
        y = .data$n_sig,
        colour = .data$source
      )
    ) +
    violin_element +
    point_element +
    geom_point(
      data = cor_test$original_correlations %>%
        filter(
          .data$cor_p <= alpha
        ) %>%
        summarise(
          n_sig = n()
        ) %>%
        mutate(
          source = "Data"
        )
    ) +
    labs(
      title = glue(
        "Count of Signficant Correlations in Original Data and ",
        "{cor_test$iterations} Permutations"
      ),
      y = "Count of significant correlations",
      x = "",
      caption = "Violin plot displays median, 5% and 95% quaniles.",
      colour = "Source"
    )
}