R/ifcb_psd_plot.R

Defines functions ifcb_psd_plot

Documented in ifcb_psd_plot

utils::globalVariables(c("x", "y"))
#' Generate PSD Plot for a Given Sample
#'
#' This function generates a plot for a given sample from Particle Size Distribution (PSD) data and fits from Imaging FlowCytobot (IFCB).
#' The PSD `data` and `fits` can be generated by `ifcb_psd` (Hayashi et al. 2025).
#'
#' @param sample_name The name of the sample to plot in DYYYYMMDDTHHMMSS_IFCBXXX.
#' @param data A data frame containing the PSD data (data output from `ifcb_psd`), where each row represents a sample and each column represents different particle sizes in micrometers.
#' @param fits A data frame containing the fit parameters for the power curve (fits output from `ifcb_psd`), where each row represents a sample and the columns include the parameters `a`, `k`, and `R^2`.
#' @param start_fit The x-value threshold below which data should be excluded from the plot and fit.
#' @param flags Optional data frame or tibble with columns `sample` and `flag`. If `sample_name`
#'   appears in `flags$sample`, the corresponding `flag` text will be displayed on the plot
#'   as a red label in the top-left corner.
#'
#' @return A ggplot object representing the PSD plot for the sample.
#' @export
#'
#' @references
#' Hayashi, K., Enslein, J., Lie, A., Smith, J., Kudela, R.M., 2025. Using particle size distribution (PSD)
#' to automate imaging flow cytobot (IFCB) data quality in coastal California, USA.
#' International Society for the Study of Harmful Algae. https://doi.org/10.15027/0002041270
#'
#' @seealso \code{\link{ifcb_psd}} \url{https://github.com/kudelalab/PSD}
#'
#' @examples
#' \dontrun{
#' # Initialize a python session if not already set up
#' ifcb_py_install()
#'
#' # Analyze PSD
#' psd <- ifcb_psd(
#'   feature_folder = 'path/to/features',
#'   hdr_folder = 'path/to/hdr_data',
#'   save_data = TRUE,
#'   output_file = 'psd/svea_2021',
#'   plot_folder = NULL,
#'   use_marker = FALSE,
#'   start_fit = 13,
#'   r_sqr = 0.5
#' )
#'
#' # Optional flags
#' flags <- tibble::tibble(
#'   sample = "D20230316T101514",
#'   flag   = "Incomplete Run."
#' )
#'
#' # Plot PSD of the first sample
#' plot <- ifcb_psd_plot(
#'   sample_name = "D20230316T101514",
#'   data = psd$data,
#'   fits = psd$fits,
#'   start_fit = 10,
#'   flags = flags
#' )
#'
#' # Inspect plot
#' print(plot)
#' }
ifcb_psd_plot <- function(sample_name, data, fits, start_fit, flags = NULL) {

  # Extract the sample data
  sample_data <- data %>% filter(sample == sample_name)

  if (nrow(sample_data) == 0) {
    stop("No fit parameters found for the specified sample.")
  }

  # parse x (sizes) from colnames and y from the first row of sample_data
  x_values <- as.numeric(gsub("[^0-9]+", "", colnames(sample_data)[4:ncol(sample_data)]))
  y_values <- as.numeric(sample_data[1, 4:ncol(sample_data)])

  plot_data <- data.frame(x = x_values, y = y_values)

  # drop NA y and filter by start_fit
  plot_data <- plot_data %>%
    filter(!is.na(y)) %>%
    filter(x >= start_fit)

  if (nrow(plot_data) == 0) {
    stop("No valid data points remain after filtering by start_fit and removing NA y.")
  }

  # Extract fit parameters
  fit_params <- fits %>% filter(sample == sample_name)

  if (nrow(fit_params) == 0) {
    stop("No fit parameters found for the specified sample.")
  }

  a  <- fit_params$a
  k  <- fit_params$k
  R2 <- fit_params$`R^2`

  # Build equation text
  if (!is.na(R2) && is.finite(R2)) {
    equation_text <- paste0(
      "y = ", format(a, scientific = TRUE, digits = 3),
      " * x^", format(k, digits = 3),
      "\nR\u00B2 = ", format(R2, digits = 3)
    )
  } else {
    equation_text <- "No R\u00B2 value available."
  }

  # Base plot
  p <- ggplot(plot_data, aes(x = x, y = y)) +
    geom_line(na.rm = TRUE) +
    labs(
      title = paste("Sample:", sample_name),
      x = "ESD (\u00B5m)",
      y = "N'(D) [c/L\u207B]"
    ) +
    theme_minimal() +
    theme(
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", fill = NA),
      panel.background = element_rect(fill = "white", color = NA),
      plot.background = element_rect(fill = "white", color = NA),
      plot.margin = margin(8, 30, 8, 30) # leave room for annotations
    ) +
    coord_cartesian(clip = "off") # allow annotations near edges

  # Add the fit curve if R2 is valid
  if (!is.na(R2) && is.finite(R2)) {
    p <- p +
      stat_function(fun = function(x) a * x^k, color = "blue") +
      # place equation in top-right using panel coordinates
      annotate(
        "text",
        x = Inf, y = Inf,
        label = equation_text,
        hjust = 1.05, vjust = 1.05,
        size = 4.0
      )
  }

  # Add flag label (top-left) if sample is flagged
  if (!is.null(flags) && "sample" %in% colnames(flags)) {
    flag_row <- flags %>% filter(sample == sample_name)
    if (nrow(flag_row) >= 1) {
      # use the first matching flag (change logic if you want to handle multiple)
      flag_text <- as.character(flag_row$flag[1])
      # wrap long flags so they don't overflow
      flag_text_wrapped <- str_wrap(flag_text, width = 40)

      p <- p +
        annotate(
          "label",
          x = -Inf, y = Inf,
          label = paste0("Flag: ", flag_text_wrapped),
          hjust = -0.05, vjust = 1.05,
          size = 3.8,
          fontface = "bold",
          fill = "white",
          color = "red"
        )
    }
  }

  p
}

Try the iRfcb package in your browser

Any scripts or data that you put into this service are public.

iRfcb documentation built on Jan. 8, 2026, 1:06 a.m.