R/ifcb_correct_annotation.R

Defines functions ifcb_correct_annotation

Documented in ifcb_correct_annotation

utils::globalVariables("edit_manual_file")
#' Correct Annotations in MATLAB Classlist Files
#'
#' This function corrects annotations in MATLAB classlist files located in a specified manual folder,
#' generated by the code in the `ifcb-analysis` repository (Sosik and Olson 2007).
#' It replaces the class ID of specified regions of interest (ROIs) in the classlist files based on
#' a correction file or a character vector.
#'
#' @param manual_folder A character string specifying the path to the folder containing the original MAT classlist files to be updated.
#' @param out_folder A character string specifying the path to the folder where updated MAT classlist files will be saved.
#' @param correction Either a character string specifying the path to the correction file, or a character vector containing image filenames to be corrected.
#'   If a file is provided, it should have a column named `image_filename`. If a character vector is provided, it will be treated as a direct list of image filenames.
#' @param correct_classid An integer specifying the class ID to use for corrections.
#' @param do_compression A logical value indicating whether to compress the .mat file. Default is TRUE.
#' @param correction_file
#'    `r lifecycle::badge("deprecated")`
#'    Use \code{correction} instead.
#'
#' @return This function does not return any value; it updates the classlist files in the specified output directory.
#'
#' @details
#' Python must be installed to use this function. The required python packages can be installed in a virtual environment using `ifcb_py_install()`.
#'
#' The correction file is expected to contain at least one column: `image_filename`, which includes the filenames of the images (with or without additional trailing information).
#' The function processes each file, corrects the annotations, and saves the updated files in the output folder.
#'
#' If a character vector is provided as `correction`, it will be used directly as a list of filenames for correction.
#'
#' The `correction` is typically generated using a Shiny app that provides an interactive interface for browsing and managing
#' IFCB (Imaging FlowCytobot) image galleries. This Shiny app can be initialized using the function `ifcb_run_image_gallery()`.
#'
#' @references Sosik, H. M. and Olson, R. J. (2007), Automated taxonomic classification of phytoplankton sampled with imaging-in-flow cytometry. Limnol. Oceanogr: Methods 5, 204–216.
#' @seealso \code{\link{ifcb_py_install}} \url{https://github.com/hsosik/ifcb-analysis}
#' @examples
#' \dontrun{
#' # Initialize a python session if not already set up
#' ifcb_py_install()
#'
#' # Correct class ID in .mat classlist files using a correction file
#' ifcb_correct_annotation("input/manual",
#'                         "output/manual",
#'                         "corrections.txt",
#'                         99)
#'
#' # Correct class ID in .mat classlist files using a character vector of filenames
#' ifcb_correct_annotation("input/manual",
#'                         "output/manual",
#'                         c("D20230917T153755_IFCB134_01724.png",
#'                           "D20230917T110059_IFCB134_00380.png"),
#'                         99)
#' }
#'
#' @export
ifcb_correct_annotation <- function(manual_folder, out_folder, correction = NULL, correct_classid, do_compression = TRUE, correction_file = deprecated()) {

  # Warn the user if correction_file is used
  if (lifecycle::is_present(correction_file)) {
    # Signal the deprecation to the user
    deprecate_warn("0.3.12", "iRfcb::ifcb_correct_annotation(correction_file = )", "iRfcb::ifcb_correct_annotation(correction = )")
    # Deal with the deprecated argument for compatibility
    correction <- correction_file
  }

  if (is.null(correction)) {
    stop("argument `correction` is missing, with no default")
  }

  # Initialize python check
  check_python_and_module()

  # Import the Python function
  source_python(system.file("python", "edit_manual_file.py", package = "iRfcb"))

  # Check if `correction` is a file path or a character vector
  if (is.character(correction) && length(correction) == 1 && file.exists(correction)) {
    # Read corrections from file
    corrections <- read.table(correction, header = TRUE, row.names = NULL)
  } else if (is.character(correction)) {
    # Use the provided character vector as corrections
    corrections <- data.frame(image_filename = correction, stringsAsFactors = FALSE)
  } else {
    stop("Invalid input: `correction` should be a file path or a character vector.")
  }

  # Extract sample filenames without the trailing part after the last underscore
  corrections$sample_filename <- sub("^(.*)_[^_]*$", "\\1", corrections$image_filename)

  # Convert the filenames to get the roi values
  corrections$roi <- ifcb_convert_filenames(tools::file_path_sans_ext(corrections$image_filename))$roi

  # Aggregate roi to correct per sample
  corrections_aggregated <- stats::aggregate(roi ~ sample_filename, data = corrections, FUN = list)

  # Loop through all files and apply corrections
  for (i in seq_len(nrow(corrections_aggregated))) {
    # Extract filename and roi values from the current row
    filename <- as.character(corrections_aggregated$sample_filename[i])
    roi_list <- unlist(corrections_aggregated$roi[[i]], use.names = FALSE)

    # Ensure roi_list is a list of integers
    roi_list <- as.list(as.integer(roi_list))

    # Call the Python function with the extracted values
    edit_manual_file(
      file.path(manual_folder, paste0(filename, ".mat")),  # Ensure correct file path
      file.path(out_folder, paste0(filename, ".mat")),  # Ensure correct output file path
      roi_list,
      correct_classid,
      do_compression
    )
  }
}

Try the iRfcb package in your browser

Any scripts or data that you put into this service are public.

iRfcb documentation built on April 16, 2025, 1:09 a.m.