R/plot_distances_boxplot.R

Defines functions plot_distance_boxplot

Documented in plot_distance_boxplot

#' Visualize Nearest Neighbor Distances with Boxplot and Mean ± SEM
#'
#' Creates a boxplot visualization of distance metrics with overlaid mean points
#' and SEM error bars. Suitable for comparing multiple groups with potentially
#' skewed distributions.
#'
#' @param distance_result A data.frame generated by `calculate_nearest_distances()`,
#'        containing distance measurements with columns: id_col + target_types.
#' @param id_col Name of the column containing cell IDs (default: "barcode").
#' @param show_points Logical, whether to overlay individual data points (default: FALSE).
#' @param y_scale Method for y-axis scaling: "original" or "log10" (default: "original").
#' @param palette Color palette name from `RColorBrewer` (default: "Set2").
#' @return A ggplot2 object. Additional customization can be done using ggplot2 functions.
#' @export
#' @import ggplot2
#' @import dplyr
#' @importFrom tidyr pivot_longer
#' @importFrom RColorBrewer brewer.pal
#' @importFrom stats sd
#' @examples
#' plot_distance_boxplot(distance_results,id_col = "Newbarcode")

plot_distance_boxplot <- function(distance_result,
                                  id_col = "barcode",
                                  show_points = FALSE,
                                  y_scale = c("original", "log10"),
                                  palette = "Set2") {

  # --- Input validation ---
  y_scale <- match.arg(y_scale)

  if (!is.data.frame(distance_result)) {
    stop("distance_result must be a data.frame")
  }

  if (!id_col %in% colnames(distance_result)) {
    stop(sprintf("ID column '%s' not found in distance_result", id_col))
  }

  if (ncol(distance_result) < 2) {
    stop("distance_result must contain at least 1 target type column")
  }

  # --- Data preparation ---
  plot_data <- distance_result %>%
    select(-all_of(id_col)) %>%  # Remove ID column
    pivot_longer(cols = everything(),
                 names_to = "Group",
                 values_to = "Distance") %>%
    filter(!is.na(Distance))

  # --- Plot construction ---
  p <- ggplot(plot_data, aes(x = Group, y = Distance, fill = Group)) +
    geom_boxplot(
      alpha = 0.7,
      outlier.shape = ifelse(show_points, NA, 19),  # Hide outliers if showing points
      width = 0.6
    ) +
    # Add mean point (red diamond)
    stat_summary(
      fun = mean,
      geom = "point",
      shape = 18,
      size = 3,
      color = "red"
    ) +
    # Add SEM error bars
    stat_summary(
      fun.data = function(x) {
        data.frame(
          y = mean(x),
          ymin = mean(x) - sd(x)/sqrt(length(x)),
          ymax = mean(x) + sd(x)/sqrt(length(x)))
      },
      geom = "errorbar",
      width = 0.2,
      color = "darkred"
        ) +
          labs(
            x = "Cell Type",
            y = "Distance to Nearest Neighbor",
            title = "Nearest Neighbor Distance Comparison",
            subtitle = "Boxplot with mean and SEM"
          ) +
          scale_fill_brewer(palette = palette) +
          theme_minimal() +
          theme(
            legend.position = "none",
            axis.text.x = element_text(angle = 45, hjust = 1)
          )

        # Optional data points
        if (show_points) {
          p <- p + geom_jitter(
            width = 0.1,
            alpha = 0.3,
            size = 1,
            color = "gray30"
          )
        }

        # Optional log scale
        if (y_scale == "log10") {
          p <- p + scale_y_log10() +
            ylab("Distance to Nearest Neighbor (log10)")
        }

        return(p)
}

Try the STDistance package in your browser

Any scripts or data that you put into this service are public.

STDistance documentation built on June 18, 2025, 5:08 p.m.