Nothing
utils::globalVariables(c("gpsLatitude", "gpsLongitude", "type", "value", "counts", "ml_analyzed_calc", "biovolume_per_liter", "timestamp", "time", "ifcb_number"))
#' Read and Summarize Classified IFCB Data
#'
#' This function reads a MATLAB `.mat` file containing aggregated and classified IFCB (Imaging FlowCytobot)
#' data generated by the `countcells_allTBnew_user_training` function from the `ifcb-analysis` repository (Sosik and Olson 2007),
#' or a list of classified data generated by `ifcb_summarize_class_counts`.
#' It returns a data frame with species counts and optionally biovolume information based on specified thresholds.
#'
#' @param summary A character string specifying the path to the `.mat` summary file or a list generated by `ifcb_summarize_class_counts`.
#' @param hdr_directory A character string specifying the path to the directory containing header (.hdr) files. Default is NULL.
#' @param biovolume A logical indicating whether the file contains biovolume data. Default is FALSE.
#' @param threshold A character string specifying the threshold type for counts and biovolume. Options are "opt" (default), "adhoc", and "none".
#' @param use_python Logical. If `TRUE`, attempts to read the `.mat` file using a Python-based method. Default is `FALSE`.
#' @return A data frame containing the summary information including file list, volume analyzed, species counts, optionally biovolume, and other metadata.
#'
#' @details
#' If `use_python = TRUE`, the function tries to read the `.mat` file using `ifcb_read_mat()`, which relies on `SciPy`.
#' This approach may be faster than the default approach using `R.matlab::readMat()`, especially for large `.mat` files.
#' To enable this functionality, ensure Python is properly configured with the required dependencies.
#' You can initialize the Python environment and install necessary packages using `ifcb_py_install()`.
#'
#' If `use_python = FALSE` or if `SciPy` is not available, the function falls back to using `R.matlab::readMat()`.
#'
#' @seealso \url{https://github.com/hsosik/ifcb-analysis}
#' @export
#' @references Sosik, H. M. and Olson, R. J. (2007), Automated taxonomic classification of phytoplankton sampled with imaging-in-flow cytometry. Limnol. Oceanogr: Methods 5, 204–216.
#' @examples
#' mat_file <- system.file("exdata/example_summary.mat", package = "iRfcb")
#'
#' summary_data <- ifcb_read_summary(mat_file, biovolume = FALSE, threshold = "opt")
#' print(summary_data)
ifcb_read_summary <- function(summary, hdr_directory = NULL, biovolume = FALSE, threshold = "opt", use_python = FALSE) {
if (is.list(summary)) {
# If 'summary' is a list, assign it to the variable 'mat'
mat <- summary
# Replace all underscores in the names of the list elements with dots to match output from MATLAB
# names(mat) <- gsub("_", ".",names(mat))
} else {
if (use_python && scipy_available()) {
mat <- ifcb_read_mat(summary)
} else {
# Read the contents of the MAT file
mat <- read_mat(summary)
}
}
# Check if hdr_directory is provided and exists
if (!is.null(hdr_directory)) {
# Extract GPS information from header files
hdr_info <- ifcb_read_hdr_data(file.path(hdr_directory), gps_only = TRUE, verbose = FALSE)
gps_info <- hdr_info %>%
dplyr::select(sample, gpsLatitude, gpsLongitude)
# List all .hdr files in the specified directory
files <- list.files(hdr_directory, pattern = "\\.hdr$", recursive = TRUE, full.names = TRUE)
# Extract volume analyzed information from .hdr files
volume_info <- data.frame(
sample = gsub(".*/(D\\d+T\\d+_IFCB\\d+)\\.hdr", "\\1", files),
ml_analyzed_calc = ifcb_volume_analyzed(files)
)
}
# Extract ml_analyzed and file list from the MATLAB data
ml_analyzed <- as.vector(mat$ml_analyzedTB)
filelistTB <- unlist(mat$filelistTB)
# Select class count based on threshold
classcountTB <- switch(threshold,
"opt" = mat$classcountTB_above_optthresh,
"adhoc" = mat$classcountTB_above_adhocthresh,
"none" = mat$classcountTB,
stop("Invalid threshold option. Choose from 'opt', 'adhoc', or 'none'."))
# Check if classcountTB is NULL
if (is.null(classcountTB)) {
stop(paste("Class count data for threshold", threshold, "does not exist in the file."))
}
# Extract species names from class2useTB
class2useTB <- unlist(mat$class2useTB)
if (use_python) {
classcountTB <- as.matrix(t(classcountTB))
}
# Assign column names for class counts
colnames(classcountTB) <- paste("counts", class2useTB, sep = "_")
# Initialize the summary data frame with sample and ml_analyzed
summary <- data.frame(
sample = filelistTB,
ml_analyzed = ml_analyzed,
classcountTB,
check.names = FALSE
)
# If biovolume is requested, include biovolume data
if (biovolume) {
# Select biovolume based on threshold
classbiovolTB <- switch(threshold,
"opt" = mat$classbiovolTB_above_optthresh,
"adhoc" = mat$classbiovolTB_above_adhocthresh,
"none" = mat$classbiovolTB,
stop("Invalid threshold option. Choose from 'opt', 'adhoc', or 'none'."))
# Check if classbiovolTB is NULL
if (is.null(classbiovolTB)) {
stop(paste("Biovolume data for threshold", threshold, "does not exist in the file."))
}
# Assign column names for biovolume
colnames(classbiovolTB) <- paste("biovolume", class2useTB, sep = "_")
# Combine biovolume data with summary
summary <- dplyr::bind_cols(summary, classbiovolTB)
}
# Transform summary data into long format and calculate counts per liter
summary_long <- summary %>%
tidyr::pivot_longer(
cols = !c("sample", "ml_analyzed"),
names_pattern = "([^_]+)_(.*)",
names_to = c("type", "species"),
values_to = "value"
) %>%
tidyr::pivot_wider(names_from = type, values_from = value) %>%
dplyr::filter(counts != 0)
# If hdr_directory is provided, adjust ml_analyzed with calculated volume
if (!is.null(hdr_directory)) {
summary_long <- summary_long %>%
dplyr::left_join(volume_info, by = "sample") %>%
dplyr::mutate(ml_analyzed = dplyr::coalesce(ml_analyzed, ml_analyzed_calc)) %>%
dplyr::select(-ml_analyzed_calc)
}
# Calculate counts per liter
summary_long <- summary_long %>%
dplyr::mutate(counts_per_liter = counts / ml_analyzed * 1000)
# If biovolume is requested, calculate biovolume per liter and in mm3
if (biovolume) {
summary_long <- summary_long %>%
dplyr::mutate(biovolume_per_liter = biovolume / ml_analyzed * 1000,
biovolume_mm3 = biovolume_per_liter / 1000)
}
# Extract date information from sample names
date_info <- ifcb_convert_filenames(unique(summary_long$sample))
# Merge date information with summary_long
summary_long <- summary_long %>%
dplyr::left_join(date_info, by = "sample") %>%
dplyr::relocate(timestamp, date, year, month, day, time, ifcb_number, .after = sample)
# If hdr_directory is provided, merge GPS information with summary_long
if (!is.null(hdr_directory)) {
summary_long <- summary_long %>%
dplyr::left_join(gps_info, by = "sample") %>%
dplyr::relocate(gpsLatitude, gpsLongitude, .after = ifcb_number)
}
# Return the finalized summary_long data frame
summary_long
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.