R/process-dir.R
In SerolyzeR: Reading, Quality Control and Preprocessing of MBA (Multiplex Bead Assay) Data

Documented in detect_mba_format find_layout_file get_output_dir is_mba_data_file process_dir

#' @title
#' Find a layout file given plate filepath
#'
#' @import fs
#' @importFrom stringr str_split
#'
#' @keywords internal
#'
find_layout_file <- function(plate_filepath, layout_filepath = NULL) {
  if (!is.null(layout_filepath)) {
    if (fs::file_exists(layout_filepath)) {
      return(layout_filepath)
    }

    stop(
      paste0("The specified common layout file ", layout_filepath, " does not exist.")
    )
  }

  stopifnot(fs::is_absolute_path(plate_filepath))

  file_dir <- fs::path_dir(plate_filepath)
  filename <- fs::path_file(plate_filepath)
  filename_splitted <- stringr::str_split(filename, "\\.")
  filename_basename <- filename_splitted[[1]][1]

  supported_layout_exts <- c("xlsx", "csv")
  layout_file_glob <- paste0(
    filename_basename, "_layout",
    "\\.(", paste(supported_layout_exts, collapse = "|"), ")$"
  )
  possible_files <- list.files(file_dir, pattern = layout_file_glob)
  if (length(possible_files) == 0) {
    warning(
      paste0("Layout file for a file ", plate_filepath, " could not be found. Won't use the layout file for the given plate.")
    )
    return(NULL)
  }

  possible_layout_filename <- possible_files[1]
  possible_layout_path <- fs::path_join(c(file_dir, possible_layout_filename))
  if (fs::file_exists(possible_layout_path)) {
    return(possible_layout_path)
  } else {
    warning(
      paste0("Layout file for a file ", plate_filepath, " could not be found. Won't use the layout file for the given plate.")
    )
    return(NULL)
  }
}

#' @title
#' Identify if a file is a MBA data file
#'
#' @param filepath (`character(1)`) The path to the file.
#' @param check_format (`logical(1)`) If `TRUE`, the function will check if the file name contains a supported format. The default is `TRUE`.
#'
#' @return `TRUE` if the file is a MBA data file, `FALSE` otherwise.
#'
#' @import fs
#' @importFrom stringr str_split
#'
#' @keywords internal
#'
is_mba_data_file <- function(filepath, check_format = TRUE) {
  format_pattern <- SerolyzeR.env$mba_pattern
  extension_pattern <- "\\.([xX][lL][sS][xX]|[cC][sS][vV])$"
  output_pattern <- "RAU|nMFI"
  layout_pattern <- "_layout"

  stopifnot(fs::file_exists(filepath))
  filename <- fs::path_file(filepath)
  filename_splitted <- stringr::str_split(filename, "\\.")
  basename <- filename_splitted[[1]][1]

  # plate filename has to contain supported format
  if (check_format) {
    if (!grepl(format_pattern, filename, ignore.case = TRUE)) {
      return(FALSE)
    }
  }

  # plate filename extensions have to be supported
  if (!grepl(extension_pattern, filename)) {
    return(FALSE)
  }

  # plate filename has not to contain layout pattern
  if (grepl(layout_pattern, basename, fixed = TRUE)) {
    return(FALSE)
  }

  # plate filename has not to contain supported output format
  # as not to mix it up with output files
  if (grepl(output_pattern, basename, ignore.case = TRUE)) {
    return(FALSE)
  }

  return(TRUE)
}

#' @title
#' Try to detect the format of a file
#'
#' @import fs
#' @importFrom stringr str_split
#'
#' @keywords internal
#'
detect_mba_format <- function(filepath, format = NULL) {
  if (!is.null(format)) {
    stopifnot(is_mba_format(format, allow_nullable = FALSE))
    return(format)
  }

  stopifnot(fs::file_exists(filepath))
  filename <- fs::path_file(filepath)
  filename_splitted <- stringr::str_split(filename, "\\.")
  basename <- filename_splitted[[1]][1]

  if (grepl(SerolyzeR.env$xponent_pattern, basename, ignore.case = TRUE)) {
    return("xPONENT")
  } else if (grepl(SerolyzeR.env$intelliflex_pattern, basename, ignore.case = TRUE)) {
    return("INTELLIFLEX")
  } else {
    stop("The format of the file could not be detected.")
  }
}

#' @title
#' Get output directory for a given input file
#'
#' @import fs
#'
#' @keywords internal
#'
get_output_dir <- function(
    input_file,
    input_dir,
    output_dir = NULL,
    flatten_output_dir = FALSE) {
  output_root <- ifelse(is.null(output_dir), input_dir, output_dir)
  if (!fs::dir_exists(output_root)) {
    stop("Output directory does not exist.")
  }
  if (flatten_output_dir) {
    current_output_dir <- output_root
  } else {
    input_file_rel_path <- fs::path_rel(input_file, input_dir)
    current_output_dir <- fs::path_dir(
      fs::path_join(c(output_root, input_file_rel_path))
    )
  }
  return(fs::path(current_output_dir))
}
#' @title
#' Process a Directory of Luminex Data Files
#'
#' @description
#' This function processes all Luminex plate files within a specified directory.
#' Each plate file is processed using [process_file()], and the resulting normalised data is saved.
#' Optionally, quality control reports can be generated, and results from multiple plates can be merged into a single file.
#'
#' ## Workflow
#' 1. Identify all Luminex plate files in the `input_dir`, applying recursive search if `recurse = TRUE`.
#' 2. Detect the format of each file based on the `format` parameter or the filename.
#' 3. Locate the corresponding layout file using the filename or use the common layout passed with the `layout_filepath` parameter.
#' 4. Determine the appropriate output directory using [get_output_dir()].
#' 5. Process each plate file using [process_file()].
#' 6. If `merge_outputs = TRUE`, merge normalised data from multiple plates into a single CSV file.
#'
#' ## Naming Conventions for Input Files
#' - **If `format` is specified:**
#'   - Each plate file should be named as `{plate_name}.csv`.
#'   - The corresponding layout file should be named as `{plate_name}_layout.csv` or `{plate_name}_layout.xlsx`.
#'   - Alternatively, if `layout_filepath` is provided, it serves as a unified layout file for all plates.
#'
#' - **If `format` equals `NULL` (automatic detection):**
#'   - Each plate file should be named as `{plate_name}_{format}.csv`, where `{format}` is either `xPONENT` or `INTELLIFLEX`.
#'   - The corresponding layout file should be named using the same convention as above, i.i. `{plate_name}_{format}_layout.csv` or `{plate_name}_{format}_layout.xlsx`.
#'
#' ## Output File Structure
#' - The `output_dir` parameter specifies where the processed files are saved.
#' - If `output_dir` is `NULL`, output files are saved in the same directory as the input files.
#' - By default, the output directory structure follows the input directory, unless `flatten_output_dir = TRUE`, which saves all outputs directly into `output_dir`.
#' - Output filenames follow the convention used in [process_file()].
#'   - For a plate named `{plate_name}`, the normalised output files are named as:
#'     - `{plate_name}_RAU.csv` for RAU normalisation.
#'     - `{plate_name}_nMFI.csv` for nMFI normalisation.
#'     - `{plate_name}_MFI.csv` for MFI normalisation.
#'   - If `generate_reports = TRUE`, a quality control report for every plate is saved as `{plate_name}_report.pdf`.
#'   - If `merge_outputs = TRUE`, merged normalised files are named as:
#'     - `merged_RAU_{timestamp}.csv`
#'     - `merged_nMFI_{timestamp}.csv`
#'     - `merged_MFI_{timestamp}.csv`
#'   - If `generate_multiplate_reports = TRUE`, a multiplate quality control report is saved as `multiplate_report_{timestamp}.pdf`.
#'
#' @param input_dir (`character(1)`) Path to the directory containing plate files. Can contain subdirectories if `recurse = TRUE`.
#' @param output_dir (`character(1)`, optional) Path to the directory where output files will be saved. Defaults to `NULL` (same as input directory).
#' @param recurse (`logical(1)`, default = `FALSE`)
#'   - If `TRUE`, searches for plate files in subdirectories as well.
#' @param flatten_output_dir (`logical(1)`, default = `FALSE`)
#'   - If `TRUE`, saves output files directly in `output_dir`, ignoring the input directory structure.
#' @param format (`character(1)`, optional) Luminex data format. If `NULL`, it is automatically detected. Options: `'xPONENT'`, `'INTELLIFLEX'`.
#' @param layout_filepath (`character(1)`, optional) Path to a layout file. If `NULL`, the function attempts to detect it automatically.
#' @param normalisation_types (`character()`, default = `c("MFI", "RAU", "nMFI")`)
#'   - The normalisation types to apply. Supported values: `"MFI"`, `"RAU"`, `"nMFI"`.
#' @param generate_reports (`logical(1)`, default = `FALSE`)
#'   - If `TRUE`, generates single plate quality control reports for each processed plate file.
#' @param generate_multiplate_reports (`logical(1)`, default = `FALSE`)
#'  - If `TRUE`, generates a multiplate quality control report for all processed plates.
#' @param merge_outputs (`logical(1)`, default = `FALSE`)
#'   - If `TRUE`, merges all normalised data into a single CSV file per normalisation type.
#'   - The merged file is named `merged_{normalisation_type}_{timestamp}.csv`.
#' @param column_collision_strategy (`character(1)`, default = `'intersection'`)
#'   - Determines how to handle missing or extra columns when merging outputs.
#'   - Options: `'union'` (include all columns), `'intersection'` (include only common columns).
#' @param return_plates (`logical(1)`, default = `FALSE`)
#'   - If `TRUE`, returns a list of processed plates sorted by experiment date.
#' @param dry_run (`logical(1)`, default = `FALSE`)
#'   - If `TRUE`, prints file details without processing them.
#' @param verbose (`logical(1)`, default = `TRUE`)
#'   - If `TRUE`, prints detailed processing information.
#' @param ... Additional arguments passed to [process_file()].
#'
#' @return If `return_plates = TRUE`, returns a sorted list of [`Plate`] objects. Otherwise, returns `NULL`.
#'
#' @examples
#' # Process all plate files in a directory
#' input_dir <- system.file("extdata", "multiplate_lite", package = "SerolyzeR", mustWork = TRUE)
#' output_dir <- tempdir(check = TRUE)
#' plates <- process_dir(input_dir, return_plates = TRUE, output_dir = output_dir)
#'
#' @import fs
#'
#' @export
process_dir <- function(
    input_dir,
    output_dir = NULL,
    recurse = FALSE,
    flatten_output_dir = FALSE,
    layout_filepath = NULL,
    format = NULL,
    normalisation_types = c("MFI", "RAU", "nMFI"),
    generate_reports = FALSE,
    generate_multiplate_reports = FALSE,
    merge_outputs = FALSE,
    column_collision_strategy = "intersection",
    return_plates = FALSE,
    dry_run = FALSE,
    verbose = TRUE,
    ...) {
  if (!fs::dir_exists(input_dir)) {
    stop("Input directory does not exist.")
  }
  if (!is.null(output_dir) && !fs::dir_exists(output_dir)) {
    stop("Output directory is specified, but does not exist.")
  }
  if (!is.null(layout_filepath) && !fs::file_exists(layout_filepath)) {
    stop("Layout file is specified, but does not exist.")
  }
  stopifnot(is_mba_format(format, allow_nullable = TRUE))
  input_dir <- fs::path_abs(input_dir)

  input_files <- c()
  for (input_file in fs::dir_ls(input_dir, recurse = recurse)) {
    if (is_mba_data_file(input_file, check_format = is.null(format))) {
      input_files <- c(input_files, input_file)
    }
  }

  if (dry_run) {
    cat("Dry run mode enabled.\n")
    cat("Input directory: ", input_dir, "\n")
    if (!is.null(format)) {
      cat("MBA format: static (", format, ") \n")
    } else {
      cat("MBA format: dynamic \n")
    }
    if (!is.null(layout_filepath)) {
      cat("Layout file: static (", layout_filepath, ") \n")
    } else {
      cat("Layout file: dynamic \n")
    }
  }

  if (length(input_files) == 0) {
    cat("No files found in the input directory.\n")
    cat("Check if files inside the input directory are named correctly. ")
    cat("If files are not named according to the convention, ")
    cat("one should provide a global MBA format and layout file.\n")
    return(NULL)
  }

  formats <- rep(NA, length(input_files))
  for (i in seq_along(input_files)) {
    formats[i] <- detect_mba_format(input_files[i], format = format)
  }
  stopifnot(all(!is.na(formats)))

  layouts <- rep(NA, length(input_files))
  for (i in seq_along(input_files)) {
    layouts[i] <- find_layout_file(
      input_files[i],
      layout_filepath = layout_filepath
    )
  }
  stopifnot(all(!is.na(layouts)))

  if (dry_run) {
    cat("The following files will be processed:\n")
    for (i in seq_along(input_files)) {
      current_output_dir <- get_output_dir(input_files[i], input_dir,
        output_dir = output_dir, flatten_output_dir = flatten_output_dir
      )
      cat(
        "\n",
        "File: ", input_files[i], "\n",
        "Layout: ", layouts[i], "\n",
        "Format: ", formats[i], "\n",
        "Output:", current_output_dir, "\n"
      )
    }
    return(NULL)
  }

  plates <- list()
  for (i in seq_along(input_files)) {
    current_output_dir <- get_output_dir(input_files[i], input_dir,
      output_dir = output_dir, flatten_output_dir = flatten_output_dir
    )
    plate <- process_file(
      input_files[i],
      layout_filepath = ifelse(is.na(layouts[i]), NULL, layouts[i]),
      output_dir = current_output_dir,
      format = formats[i],
      process_plate = !merge_outputs,
      normalisation_types = normalisation_types,
      generate_report = generate_reports,
      verbose = verbose,
      ...
    )

    plates[[plate$plate_name]] <- plate
  }

  plates <- sort_list_by(
    plates,
    value_f = function(p) p$plate_datetime,
    decreasing = FALSE
  )

  file_ending <- format(now(), "%Y%m%d_%H%M%S")
  if (merge_outputs) {
    for (normalisation_type in normalisation_types) {
      dataframes <- list()
      for (plate in plates) {
        output_df <- process_plate(plate,
          normalisation_type = normalisation_type, write_output = FALSE,
          blank_adjustment = TRUE, verbose = verbose
        )
        df_header_columns <- data.frame(
          plate_name = plate$plate_name,
          sample_name = rownames(output_df)
        )
        rownames(output_df) <- NULL
        modifed_output_df <- cbind(df_header_columns, output_df)
        dataframes[[plate$plate_name]] <- modifed_output_df
      }

      main_output_df <- merge_dataframes(
        dataframes,
        column_collision_strategy = column_collision_strategy,
        fill_value = NA
      )

      file_name <- paste0(
        "merged_", normalisation_type, "_", file_ending, ".csv"
      )
      output_path <- fs::path_join(c(output_dir, file_name))
      write.csv(main_output_df, output_path, row.names = FALSE)
      verbose_cat("Merged output saved to: ", output_path, "\n", verbose = verbose)
    }
  }

  if (generate_multiplate_reports) {
    file_name <- paste0("multiplate_report_", file_ending, ".html")
    report_title <- paste0("Multiplate Report of directory ", basename(input_dir))
    generate_levey_jennings_report(plates, report_title = report_title, output_dir = output_dir, filename = file_name, ...)
    verbose_cat("Multiplate report saved to: ", file.path(output_dir, file_name), "\n", verbose = verbose)
  }


  if (return_plates) {
    return(plates)
  }
}

Any scripts or data that you put into this service are public.

SerolyzeR documentation built on April 12, 2025, 2:11 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

SerolyzeR
Reading, Quality Control and Preprocessing of MBA (Multiplex Bead Assay) Data

R/process-dir.R
In SerolyzeR: Reading, Quality Control and Preprocessing of MBA (Multiplex Bead Assay) Data

Defines functions process_dir get_output_dir detect_mba_format is_mba_data_file find_layout_file

Documented in detect_mba_format find_layout_file get_output_dir is_mba_data_file process_dir

Try the SerolyzeR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SerolyzeR Reading, Quality Control and Preprocessing of MBA (Multiplex Bead Assay) Data

R/process-dir.R In SerolyzeR: Reading, Quality Control and Preprocessing of MBA (Multiplex Bead Assay) Data

Defines functions process_dir get_output_dir detect_mba_format is_mba_data_file find_layout_file

Documented in detect_mba_format find_layout_file get_output_dir is_mba_data_file process_dir

Try the SerolyzeR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SerolyzeR
Reading, Quality Control and Preprocessing of MBA (Multiplex Bead Assay) Data

R/process-dir.R
In SerolyzeR: Reading, Quality Control and Preprocessing of MBA (Multiplex Bead Assay) Data