R/ifcb_zip_matlab.R

Defines functions ifcb_zip_matlab

Documented in ifcb_zip_matlab

#' Create a Zip Archive of Manual MATLAB Files
#'
#' This function creates a zip archive containing specified files and directories for manually
#' annotated IFCB images, organized into a structured format suitable for distribution or storage.
#' The MATLAB files are generated by the `ifcb-analysis` repository (Sosik and Olson 2007).
#' The zip archive can be used to submit IFCB data to repositories like in the SMHI IFCB Plankton Image Reference Library (Torstensson et al., 2024).
#'
#' @param manual_folder The directory containing `.mat` files to be included in the zip archive.
#' @param features_folder The directory containing `.csv` files, including subfolders, to be included in the zip archive.
#' @param class2use_file The path to the file (class2use_file) that will be renamed and included in the 'config' directory of the zip archive.
#' @param zip_filename The filename for the zip archive to be created.
#' @param data_folder Optionally, the directory containing additional data files (`.roi`, `.adc`, `.hdr`) to be included in the zip archive.
#' @param readme_file Optionally, the path to a README file that will be updated with metadata and included in the zip archive.
#' @param matlab_readme_file Optionally, the path to a MATLAB README file whose content will be appended to the end of the README file in the zip archive.
#' @param email_address The email address to be included in the README file for contact information.
#' @param version Optionally, the version number to be included in the README file.
#' @param print_progress A logical value indicating whether to print progress bar. Default is TRUE.
#' @param feature_recursive Logical. If TRUE, the function will search for feature files recursively within the `feature_folder`. Default is TRUE.
#' @param manual_recursive Logical. If TRUE, the function will search for MATLAB files recursively within the `manual_folder`. Default is FALSE.
#' @param data_recursive Logical. If TRUE, the function will search for data files recursively within the `data_folder` (if provided). Default is TRUE.
#' @param quiet Logical. If TRUE, suppresses messages about the progress and completion of the zip process. Default is FALSE.
#'
#' @details This function performs the following operations:
#' \itemize{
#'   \item Lists `.mat` files from `manual_folder`.
#'   \item Lists `.csv` files from `features_folder` (including subfolders).
#'   \item Lists `.roi`, `.adc`, `.hdr` files from `data_folder` if provided.
#'   \item Copies listed files to temporary directories (`manual_dir`, `features_dir`, `data_dir`, `config_dir`).
#'   \item Renames and copies `class2use_file` to `config_dir` as `class2use.mat`.
#'   \item Updates `readme_file` with metadata (if provided) and appends PNG image statistics and MATLAB README content.
#'   \item Creates a manifest file (`MANIFEST.txt`) listing all files in the zip archive.
#'   \item Creates a zip archive (`zip_filename`) containing all copied and updated files.
#'   \item Cleans up temporary directories after creating the zip archive.
#' }
#'
#' @return No return value. This function creates a zip archive containing the specified files and directories.
#'
#' @examples
#' \dontrun{
#' ifcb_zip_matlab("path/to/manual_files", "path/to/feature_files",
#'                 "path/to/class2use.mat", "output_zip_archive.zip",
#'                 data_folder = "path/to/data_files",
#'                 readme_file = system.file("exdata/README-template.md", package = "iRfcb"),
#'                 matlab_readme_file = system.file("inst/exdata/MATLAB-template.md",
#'                                                  package = "iRfcb"),
#'                 email_address = "example@email.com",
#'                 version = "1.0")
#' }
#'
#' @export
#'
#' @references
#' Sosik, H. M. and Olson, R. J. (2007), Automated taxonomic classification of phytoplankton sampled with imaging-in-flow cytometry. Limnol. Oceanogr: Methods 5, 204–216.
#' Torstensson, Anders; Skjevik, Ann-Turi; Mohlin, Malin; Karlberg, Maria; Karlson, Bengt (2024). SMHI IFCB Plankton Image Reference Library. SciLifeLab. Dataset. \doi{10.17044/scilifelab.25883455}
#'
#' @seealso \code{\link{ifcb_zip_pngs}} \url{https://github.com/hsosik/ifcb-analysis}
ifcb_zip_matlab <- function(manual_folder, features_folder, class2use_file, zip_filename,
                            data_folder = NULL, readme_file = NULL, matlab_readme_file = NULL,
                            email_address = "", version = "", print_progress = TRUE,
                            feature_recursive = TRUE, manual_recursive = FALSE, data_recursive = TRUE,
                            quiet = FALSE) {
  # Print message to indicate starting listing files
  if (!quiet) {
    cat("Listing all files...\n")
  }

  # List all .mat files in the specified folder (excluding subfolders)
  mat_files <- list.files(manual_folder, pattern = "\\.mat$", full.names = TRUE, recursive = manual_recursive)

  # List all feature files in the specified folder (including subfolders)
  feature_files <- list.files(features_folder, pattern = "\\.csv$", full.names = TRUE, recursive = feature_recursive)

  # If data_folder is provided, list all data files in the specified folder (including subfolders)
  if (!is.null(data_folder)) {
    data_files <- list.files(data_folder, pattern = "\\.(roi|adc|hdr)$", full.names = TRUE, recursive = data_recursive)
  } else {
    data_files <- NULL
  }

  # Temporary directory to store renamed folders
  temp_dir <- tempdir()
  manual_dir <- file.path(temp_dir, "manual")
  features_dir <- file.path(temp_dir, "features")
  data_dir <- file.path(temp_dir, "data")
  config_dir <- file.path(temp_dir, "config")

  # Create temporary directories if they don't already exist
  if (!file.exists(manual_dir)) dir.create(manual_dir)
  if (!file.exists(features_dir)) dir.create(features_dir)
  if (!file.exists(config_dir)) dir.create(config_dir)
  if (!is.null(data_files) && !file.exists(data_dir)) dir.create(data_dir)

  # Total number of files to copy
  total_files <- length(mat_files)
  current_file <- 0

  # Print message to indicate starting copying manual files
  if (!quiet) {
    cat("Copying manual files...\n")
  }

  # Copy .mat files to the manual directory
  for (mat_file in mat_files) {
    file.copy(mat_file, manual_dir, overwrite = TRUE)
    current_file <- current_file + 1
    if (print_progress & !quiet) {
      print_progress(current_file, total_files) # Helper function
    }
  }

  # Print a new line after the progress bar is complete
  if (print_progress & !quiet) {
    cat("\n")
  }

  # Print message to indicate starting copying feature files
  if (!quiet) {
    cat("Copying feature files...\n")
  }

  # Total number of mat files to process
  total_mat_files <- length(mat_files)
  current_mat_file <- 0

  # Find and copy matching feature files for each .mat file
  for (mat_file in mat_files) {
    current_mat_file <- current_mat_file + 1
    if (print_progress & !quiet) {
      print_progress(current_mat_file, total_mat_files) # Helper function
    }

    matching_features <- find_matching_features(mat_file, feature_files) # Helper function
    for (feature_file in matching_features) {
      # Get relative path of feature file with respect to features_folder
      relative_path <- substr(feature_file, nchar(features_folder) + 2, nchar(feature_file))

      # Create the destination directory for the feature file
      dest_dir <- file.path(features_dir, dirname(relative_path))
      if (!file.exists(dest_dir)) dir.create(dest_dir, recursive = TRUE, showWarnings = FALSE)

      # Copy feature file to the destination directory
      file.copy(feature_file, dest_dir, overwrite = TRUE)
    }
  }

  # Print a new line after the progress bar is complete
  if (print_progress & !quiet) {
    cat("\n")
  }

  # If data_folder is provided, copy data files
  if (!is.null(data_files)) {
    # Print message to indicate starting copying data files
    if (!quiet) {
      cat("Copying data files...\n")
    }

    # Total number of mat files to process
    total_mat_files <- length(mat_files)
    current_mat_file <- 0

    # Find and copy matching data files for each .mat file
    for (mat_file in mat_files) {
      current_mat_file <- current_mat_file + 1
      if (print_progress & !quiet) {
        print_progress(current_mat_file, total_mat_files) # Helper function
      }

      matching_data <- find_matching_data(mat_file, data_files) # Helper function
      for (data_file in matching_data) {
        # Get relative path of data file with respect to data_folder
        relative_path <- substr(data_file, nchar(data_folder) + 2, nchar(data_file))

        # Create the destination directory for the data file
        dest_dir <- file.path(data_dir, dirname(relative_path))
        if (!file.exists(dest_dir)) dir.create(dest_dir, recursive = TRUE, showWarnings = FALSE)

        # Copy data file to the destination directory
        file.copy(data_file, dest_dir, overwrite = TRUE)
      }
    }

    # Print a new line after the progress bar is complete
    if (print_progress & !quiet) {
      cat("\n")
    }
  }

  # Copy the class2use file to the config directory and rename it to class2use.mat
  if (!quiet) {
    cat("Copying class2use file...\n")
  }

  file.copy(class2use_file, file.path(config_dir, "class2use.mat"), overwrite = TRUE)

  # If readme_file is provided, update it
  if (!is.null(readme_file)) {
    if (!quiet) {
      cat("Creating README file...\n")
    }

    # Read the template README.md content
    readme_content <- readLines(readme_file, encoding = "UTF-8")
    if (!is.null(matlab_readme_file)) {
      matlab_content <- readLines(matlab_readme_file, encoding = "UTF-8")
    }

    # Get the current date
    current_date <- Sys.Date()

    # Summarize the number of images by class
    files_df <- ifcb_count_mat_annotations(manual_folder, class2use_file,
                                           skip_class = "unclassified",
                                           mat_recursive = manual_recursive)

    # Arrange by n
    files_df <- arrange(files_df, desc(n))

    # Extract dates from file paths and get the years
    dates <- str_extract(mat_files, "D\\d{8}")
    years <- as.integer(substr(dates, 2, 5))

    # Find the minimum and maximum year
    min_year <- min(years, na.rm = TRUE)
    max_year <- max(years, na.rm = TRUE)

    # Remove suffix from zip-filename, if present
    zip_name <- gsub("_annotated_images.zip|_matlab_files.zip", "", basename(zip_filename))
    zip_name <- gsub(".zip", "", zip_name)

    # Update the README.md template placeholders
    updated_readme <- gsub("<DATE>", current_date, readme_content)
    updated_readme <- gsub("<VERSION>", version, updated_readme)
    updated_readme <- gsub("<E-MAIL>", email_address, updated_readme)
    updated_readme <- gsub("<ZIP_NAME>", zip_name, updated_readme)
    updated_readme <- gsub("<YEAR_START>", min_year, updated_readme)
    updated_readme <- gsub("<YEAR_END>", max_year, updated_readme)
    updated_readme <- gsub("<YEAR>", year(current_date), updated_readme)
    updated_readme <- gsub("<N_IMAGES>", formatC(sum(files_df$n), format = "d", big.mark = ","), updated_readme)
    updated_readme <- gsub("<CLASSES>", nrow(files_df), updated_readme)

    # Create the new section for the number of images
    new_section <- c("## Number of images per class", "")
    new_section <- c(new_section, paste0("- ", files_df$class, ": ", formatC(files_df$n, format = "d", big.mark = ",")))
    new_section <- c("", new_section)  # Add an empty line before the new section for separation

    # Append the new section to the readme content
    updated_readme <- c(updated_readme, new_section)

    if (!is.null(matlab_readme_file)) {
      matlab_section <- c("", matlab_content)  # Add an empty line before the new section for separation

      # Append the new section to the readme content
      updated_readme <- c(updated_readme, matlab_section)
    }

    # Write the updated content back to the README.md file
    writeLines(updated_readme, file.path(temp_dir, "README.md"), useBytes = TRUE)
  }

  # Create the zip archive
  files_to_zip <- c(manual_dir, features_dir, config_dir)
  if (!is.null(data_files)) files_to_zip <- c(files_to_zip, data_dir)
  if (!is.null(readme_file)) files_to_zip <- c(files_to_zip, file.path(temp_dir, "README.md"), file.path(temp_dir, "MANIFEST.txt"))

  # Print message to indicate creating of MANIFEST.txt
  if (!quiet) {
    cat("Creating MANIFEST.txt...\n")
  }

  # Create a manifest for the zip package
  create_package_manifest(files_to_zip, manifest_path = file.path(temp_dir, "MANIFEST.txt"), temp_dir) # Helper function

  # Print message to indicate starting zip creation
  if (!quiet) {
    cat("Creating zip archive...\n")
  }

  if (!dir.exists(dirname(zip_filename))) {
    dir.create(dirname(zip_filename), recursive = TRUE)
  }

  zipr(zipfile = zip_filename, files = files_to_zip)
  if (!quiet) {
    cat("Zip archive created successfully:", normalizePath(zip_filename, winslash = "/"), "\n")
  }

  # Clean up temporary directories
  unlink(manual_dir, recursive = TRUE)
  unlink(features_dir, recursive = TRUE)
  unlink(config_dir, recursive = TRUE)
  unlink(data_dir, recursive = TRUE)
}

Try the iRfcb package in your browser

Any scripts or data that you put into this service are public.

iRfcb documentation built on April 16, 2025, 1:09 a.m.