R/plot_procrustes_loadings.R
In nzilbb.vowels: Vowel Covariation Tools

Documented in plot_procrustes_loadings

#' Plot loadings with confidence bands from `procrustes_loadings()`
#'
#' `r lifecycle::badge('experimental')` Plot index loadings or loadings with
#' confidence intervals and null distributions generated by bootstrapping and
#' permutation followed by Procrustes rotation. This approach works when
#' PC loadings are unstable due to multiple PCs explaining similar amounts of
#' variance. This is an alternative to the use of bootstrapping without
#' Procrustes rotation (as in [`pca_test()`]) and avoids the need for the use
#' of the `filter_boots` argument to [`plot_loadings()`].
#'
#'
#' @importFrom dplyr filter group_by if_else mutate summarise
#' @importFrom ggplot2 ggplot aes geom_errorbar geom_text guide_axis labs
#'   scale_colour_manual scale_x_discrete
#' @importFrom forcats fct_reorder
#' @param proc_loadings a tibble, generated by [`procrustes_loadings()`]
#' @param pc_no an integer indicating which PC to plot.
#' @param loadings_confint confidence limits for generated confidence intervals.
#' (default: 0.9 to match `pca_test()`).
#'
#' @returns a `ggplot` object.
#' @export
#'
#' @examples
#'   proc_loadings <- procrustes_loadings(
#'     pca_data = onze_intercepts |> dplyr::select(-speaker),
#'     max_pcs = 3,
#'     index = TRUE,
#'     n = 10, # set this to at least 100 in actual use.
#'     scale = TRUE
#'    )
#'
#'    plot_procrustes_loadings(proc_loadings, pc_no = 2)
#'
plot_procrustes_loadings <-
  function(proc_loadings, pc_no=1, loadings_confint=0.9) {

    # Check if loadings are index loadings or not and set
    # label for y-axis of plot appropriately.
    if ("Null" %in% proc_loadings[['source']]) {
      loading_label = "Index loadings"
    } else {
      loading_label = "Loadings"
    }

    out_plot <- proc_loadings |>
      mutate(
        variable = fct_reorder(
          .data$variable,
          abs(.data[[paste0("PC", pc_no)]])
        ),
      ) |>
      group_by(.data$variable, .data$source) |>
      summarise(
        PC_low = stats::quantile(
          .data[[paste0("PC", pc_no)]],
          (1 - loadings_confint)/2
        ),
        PC_high = stats::quantile(
          .data[[paste0("PC", pc_no)]],
          1 - (1 - loadings_confint)/2
        )
      ) |>
      # If high and low are both neg or both pos, then we take absolute
      # value. If they include 0, then set low bound to 0.
      mutate(
        PC_low = if_else(
          (.data$PC_low > 0 & .data$PC_high > 0) |
            (.data$PC_low < 0 & .data$PC_high < 0),
          abs(.data$PC_low),
          0
        ),
        PC_high = abs(.data$PC_high)
      ) |>
      filter(
        source != "Original"
      ) |>
      ggplot(
        aes(
          x = .data$variable,
          colour = .data$source
        )
      ) +
      geom_errorbar(
        aes(
          ymin = abs(.data$PC_low),
          ymax = abs(.data$PC_high),
        )
      ) +
      geom_text(
        aes(
          label = if_else(.data[[paste0("PC", pc_no)]] > 0, "+", "-"),
          y = abs(.data[[paste0("PC", pc_no)]])
        ),
        data = proc_loadings |> filter(.data$source == "Original"),
        size = 8,
        colour = "black"
      ) +
      scale_x_discrete(guide = guide_axis(angle = 90)) +
      scale_colour_manual(
        values = c("Sampling" = "#F8766D", "Null" = "#00BFC4")
      ) +
      labs(
        y = loading_label,
        x = "Variable",
        colour = "Distribution"
      )

    out_plot
}