R/pca_contrib_plot.R

Defines functions pca_contrib_plot

Documented in pca_contrib_plot

#' PCA contribution plots
#'
#' Plot the contribution of each variable in a data set to a given Principal
#' Component (PC). Variables are arranged by ascending contribution to the PC,
#' where contribution is the squared loading for the variable expressed as a
#' percentage. These plots match those given in supplementary material for
#' Brand et al. (2021).
#'
#' As with the other plotting functions in this package, the result is a
#' `ggplot2` plot. It can be modified using `ggplot2` functions (see, e.g.,
#' [plot_correlation_magnitudes()].
#'
#' @param pca_object a pca object generated by `prcomp` or `princomp`.
#' @param pc_no the PC to be visualised. Default value is 1.
#' @param cutoff the cutoff value for interpretation of the PC. Determines what
#'   total percentage contribution we want from the variables we select for
#'   interpretation. The default of 50 means that we pick the variables with the
#'   highest contribution to the PC until we have accounted for 50% of the total
#'   contributions to the PC. Can be set to `NULL` in which case, no cutoff value
#'   is plotted.
#' @return `ggplot` object.
#' @importFrom dplyr if_else lead rename mutate arrange filter
#' @importFrom forcats fct_reorder
#' @importFrom ggplot2 ggplot geom_text geom_vline scale_alpha_manual
#'     scale_color_manual aes labs theme element_text
#' @importFrom rlang .data
#' @importFrom tibble as_tibble
#' @importFrom Rdpack reprompt
#'
#' @examples
#'   onze_pca <- prcomp(onze_intercepts |> dplyr::select(-speaker), scale = TRUE)
#'
#'   # Plot PC1 with a cutoff value of 60%
#'   pca_contrib_plot(onze_pca, pc_no = 1, cutoff = 60)
#'
#'   # Plot PC2 with no cutoff value.
#'   pca_contrib_plot(onze_pca, pc_no = 2, cutoff = NULL)
#'
#' @references
#'   Brand, James, Jen Hay, Lynn Clark, Kevin Watson & Márton Sóskuthy (2021):
#'   Systematic co-variation of monophthongs across speakers of New Zealand
#'   English. Journal of Phonetics. Elsevier. 88. 101096.
#'   doi:10.1016/j.wocn.2021.101096
#' @export
pca_contrib_plot <- function(pca_object, pc_no=1, cutoff=50) {

  loadings <- as_tibble(pca_object$rotation[,pc_no], rownames="variable") |>
    rename(
      loading = "value"
    ) |>
    mutate(
      contribution = (.data$loading^2) * 100,
      loading_sign = if_else(.data$loading < 0, "-", "+")
    ) |>
    arrange(
      .data$contribution
    ) |>
    mutate(
      variable = fct_reorder(.data$variable, .data$contribution, base::min)
    )

  plot_aesthetic <- aes(
    x = .data$variable,
    y = .data$contribution,
    label = .data$loading_sign,
    alpha = .data$highlight,
    color = .data$loading_sign
  )

  # Handle cut off line.
  if (is.numeric(cutoff)) {

    loadings <- loadings |>
      mutate(
        cumulative_contribution = cumsum(.data$contribution),
        highlight = .data$cumulative_contribution > (100 - cutoff)
      )

    vertical_line_coord <- loadings |>
      mutate(
        change_point = .data$highlight != lead(.data$highlight),
        row_no = 1,
        row_no = cumsum(.data$row_no),
      ) |>
      filter(
        .data$change_point == TRUE
      ) |>
      mutate(
        x_intercept = .data$row_no + 0.5
      )

    cutoff_element <- geom_vline(
      aes(
        xintercept = .data$x_intercept
      ),
      color = "red",
      linetype = "dashed",
      data = vertical_line_coord
    )

  } else if (is.null(cutoff)) {
    # If there is no cutoff required on the plot, then we remove the
    # cutoff element of the plot entirely, and remove the alpha aesthetic
    # which reduces the opacity of values below the cutoff.
    cutoff_element <- NULL
    plot_aesthetic$alpha <- NULL

  } else {

    stop('Cutoff value must be numeric or NULL.')

  }

  loadings |>
    ggplot(
      mapping = plot_aesthetic
    ) +
    geom_text(
      size = 6,
      fontface = "bold",
      show.legend = FALSE
    ) +
    cutoff_element +
    scale_alpha_manual(
      values=c(0.4, 1)
    ) +
    scale_color_manual(
      values = c('red', 'black')
    ) +
    labs(
      title = paste0('PC', pc_no, ' Contribution Plot'),
      x = "Variable",
      y = "Contribution (%)"
    ) +
    theme(
      axis.text.x = element_text(angle = 90, hjust = 1, face = "bold"),
      axis.text.y = element_text(size = 14, face = "bold"),
      axis.title = element_text(size = 14, face = "bold")
    )
}

Try the nzilbb.vowels package in your browser

Any scripts or data that you put into this service are public.

nzilbb.vowels documentation built on June 8, 2025, 12:35 p.m.