Nothing
utils::globalVariables(c("name", "manual", "roi number"))
#' Count IFCB Annotations from .mat Files
#'
#' This function processes `.mat` files, generated by the code in the `ifcb-analysis` repository (Sosik and Olson 2007),
#' to count and summarize the annotations for each class based on the class2use information provided in a file.
#'
#' @param manual_files A character string specifying the path to the .mat files or a folder containing .mat files.
#' @param class2use_file A character string specifying the path to the file containing the class2use variable.
#' @param skip_class A numeric vector of class IDs or a character vector of class names to be excluded from the count. Default is NULL.
#' @param sum_level A character string specifying the level of summarization. Options: "sample", "roi" or "class" (default).
#' @param mat_recursive Logical. If TRUE, the function will search for MATLAB files recursively when `manual_files` is a folder. Default is FALSE.
#' @param use_python Logical. If `TRUE`, attempts to read the `.mat` file using a Python-based method. Default is `FALSE`.
#'
#' @details
#' If `use_python = TRUE`, the function tries to read the `.mat` file using `ifcb_read_mat()`, which relies on `SciPy`.
#' This approach may be faster than the default approach using `R.matlab::readMat()`, especially for large `.mat` files.
#' To enable this functionality, ensure Python is properly configured with the required dependencies.
#' You can initialize the Python environment and install necessary packages using `ifcb_py_install()`.
#'
#' If `use_python = FALSE` or if `SciPy` is not available, the function falls back to using `R.matlab::readMat()`.
#'
#' @return A data frame with the total count of images per class, roi or per sample.
#' @export
#' @references Sosik, H. M. and Olson, R. J. (2007), Automated taxonomic classification of phytoplankton sampled with imaging-in-flow cytometry. Limnol. Oceanogr: Methods 5, 204–216.
#'
#' @examples
#' \dontrun{
#' # Count annotations excluding specific class IDs
#' result <- ifcb_count_mat_annotations("path/to/manual_folder",
#' "path/to/class2use_file",
#' skip_class = c(99, 100))
#' print(result)
#'
#' # Count annotations excluding a specific class name
#' result <- ifcb_count_mat_annotations("path/to/manual_folder",
#' "path/to/class2use_file",
#' skip_class = "unclassified")
#' print(result)
#' }
ifcb_count_mat_annotations <- function(manual_files, class2use_file, skip_class = NULL, sum_level = "class", mat_recursive = FALSE, use_python = FALSE) {
if (!sum_level %in% c("class", "sample", "roi")) {
stop("sum_level should either be `class`, `roi` or `sample`")
}
# Check if feature_files is a single folder path or a vector of file paths
if (length(manual_files) == 1 && file.info(manual_files)$isdir) {
manual_files <- list.files(manual_files, pattern = "D.*\\.mat", full.names = TRUE, recursive = mat_recursive)
}
# Get the class2use variable from the specified file
class2use <- ifcb_get_mat_variable(class2use_file)
# Create a lookup table from class2use
lookup_table <- data.frame(
manual = seq_along(class2use),
name = class2use,
stringsAsFactors = FALSE
)
# Convert skip_class names to manual IDs if they are character strings
if (is.character(skip_class)) {
filtered_skip_class <- lookup_table %>% filter(name %in% skip_class)
if (nrow(filtered_skip_class) == 0) {
stop("None of the class names provided in skip_class were found in class2use.")
}
skip_class <- filtered_skip_class %>% pull(manual)
}
# Initialize an empty data frame to accumulate the results
total_sum <- data.frame()
# Initialize a list to store all warnings
warning_list <- list()
for (file in manual_files) {
# Skip empty/corrupt files
if (file.size(file) == 0) {
warning(paste("Empty .mat file:", file, "Skipping."))
next
}
if (use_python && scipy_available()) {
mat_data <- ifcb_read_mat(file)
} else {
# Read the contents of the MAT file
mat_data <- read_mat(file)
}
taxa_list <- as.data.frame(mat_data$classlist)
# Assign names to the columns in taxa_list
names(taxa_list) <- unlist(mat_data$list_titles)
# Filter out the skipped classes and NA values from the taxa list
taxa_list <- taxa_list %>%
filter(!manual %in% skip_class & !is.na(manual))
# Replace the numbers in taxa_list$manual with the corresponding names using a lookup table
sum <- taxa_list %>%
mutate(sample = tools::file_path_sans_ext(basename(file))) %>%
left_join(lookup_table, by = "manual") %>%
mutate(class = as.character(ifelse(is.na(name), as.character(manual), name))) %>%
select(sample, `roi number`, class)
if (sum_level %in% c("class", "sample")) {
# Summarize the number of images by class
sum <- sum %>%
# mutate(sample = tools::file_path_sans_ext(basename(file))) %>%
group_by(sample, class) %>%
summarise(n = n(), .groups = 'drop')
}
# Accumulate the results into total_sum
total_sum <- bind_rows(total_sum, sum)
}
if (sum_level == "class") {
# Combine and summarize results
total_sum <- total_sum %>%
group_by(class) %>%
summarise(n = sum(n, na.rm = TRUE), .groups = 'drop')
}
# Display the number of warnings
num_warnings <- length(warning_list)
if (num_warnings > 0) {
message(sprintf("There were %d warnings (use warnings() to see them)", num_warnings))
}
total_sum
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.