R/process_spectra.R

Defines functions process_spectra

Documented in process_spectra

# WARNING - Generated by {fusen} from dev/dereplicate-spectra.Rmd: do not edit by hand

#' Process Bruker MALDI Biotyper spectra _à la_ Strejcek et al. (2018)
#'
#' @details
#' Based on the original implementation, the function performs the following tasks:
#'
#' 1. Square-root transformation
#' 2. Mass range trimming to 4-10 kDa as they were deemed most determinant by Strejcek et al. (2018)
#' 3. Signal smoothing using the Savitzky-Golay method and a half window size of 20
#' 4. Baseline correction with the SNIP procedure
#' 5. Normalization by Total Ion Current
#' 6. Peak detection using the SuperSmoother procedure and with a signal-to-noise ratio above 3
#' 7. Peak filtering. This step has been added to discard peaks with a negative signal-to-noise ratio probably due to being on the edge of the mass range.
#'
#'
#' @param spectra_list A list of [MALDIquant::MassSpectrum] objects.
#' @param spectra_names A [tibble::tibble] (or [data.frame]) of sanitized spectra names by default from [get_spectra_names]. If provided manually, the column `sanitized_name` will be used to name the spectra.
#' @param rds_prefix `r lifecycle::badge('deprecated')` Writing to disk as RDS is no longer supported. A character indicating the prefix for the `.RDS` output files to be written in the `processed` directory. By default, no prefix are given and thus no files are written.
#'
#' @return A named list of three objects:
#' * `spectra`: a list the length of the spectra list of [MALDIquant::MassSpectrum] objects.
#' * `peaks`: a list the length of the spectra list of [MALDIquant::MassPeaks] objects.
#' * `metadata`: a tibble indicating the median signal-to-noise ratio (`SNR`) and peaks number for all spectra list (`peaks`), with spectra names in the `name` column.
#'
#' @seealso [import_biotyper_spectra] and [check_spectra] for the inputs and [merge_processed_spectra] for further analysis.
#' @export
#'
#' @references Strejcek M, Smrhova T, Junkova P & Uhlik O (2018). “Whole-Cell MALDI-TOF MS versus 16S rRNA Gene Analysis for Identification and Dereplication of Recurrent Bacterial Isolates.” *Frontiers in Microbiology* 9 <doi:10.3389/fmicb.2018.01294>.
#'
#' @note The original R code on which this function is based is accessible at: <https://github.com/strejcem/MALDIvs16S>
#' @examples
#' # Get an example directory of six Bruker MALDI Biotyper spectra
#' directory_biotyper_spectra <- system.file(
#'   "toy-species-spectra",
#'   package = "maldipickr"
#' )
#' # Import the six spectra
#' spectra_list <- import_biotyper_spectra(directory_biotyper_spectra)
#' # Transform the spectra signals according to Strejcek et al. (2018)
#' processed <- process_spectra(spectra_list)
#' # Overview of the list architecture that is returned
#' #  with the list of processed spectra, peaks identified and the
#' #  metadata table
#' str(processed, max.level = 2)
#' # A detailed view of the metadata with the median signal-to-noise
#' #  ratio (SNR) and the number of peaks
#' processed$metadata
process_spectra <- function(spectra_list,
                            spectra_names = get_spectra_names(spectra_list),
                            rds_prefix = deprecated()) {
  # It returns the list and write it for future processing as an RDS file.

  if (lifecycle::is_present(rds_prefix)) {
    lifecycle::deprecate_warn(
      when = "1.2.9000",
      what = "process_spectra(rds_prefix)",
      details = "Ability to write processed spectra to disk as RDS files will be dropped in next release."
    )
  }
  # 1. SQRT transformation
  # 2. Mass range trimming
  # 3. Signal smoothing
  # 4. Baseline Correction
  # 5. Normalization
  spectra <- spectra_list %>%
    MALDIquant::transformIntensity("sqrt") %>%
    MALDIquant::trim(range = c(4000, 10000)) %>%
    MALDIquant::smoothIntensity("SavitzkyGolay", halfWindowSize = 20) %>%
    MALDIquant::removeBaseline("SNIP", iterations = 50) %>%
    MALDIquant::calibrateIntensity(method = "TIC")

  # 6. Peak detection
  peaks <- spectra %>%
    MALDIquant::detectPeaks(
      method = "SuperSmoother",
      halfWindowSize = 20,
      SNR = 3
    )
  # Added extra step to remove the peaks with
  # negative SNR due to being on the edge of the masses trimmed
  peaks <- lapply(peaks, function(pk) {
    pk[MALDIquant::snr(pk) >= 3]
  })

  # Spectrum metadata table
  # Extract signal-to-noise ratios and peaks number
  snr_list <- lapply(peaks, MALDIquant::snr)
  metadata <- data.frame(
    "SNR" = vapply(snr_list, stats::median, FUN.VALUE = numeric(1)),
    "peaks" = lengths(snr_list)
  )

  # Add the spectra identifiers to all objects
  if (!"sanitized_name" %in% colnames(spectra_names)) {
    stop(
      "Missing 'sanitized_name' column in the provided 'spectra_names' tibble!",
      "\n\nTip: Use the `get_spectra_names()` for default and compliant names."
    )
  }
  rownames(metadata) <- names(spectra) <- names(peaks) <- spectra_names[["sanitized_name"]]

  # Aggregate the objects to a list
  processed_list <- list(
    "spectra" = spectra,
    "peaks" = peaks,
    # convert the data.frame to a tibble
    "metadata" = tibble::as_tibble(metadata, rownames = "name")
  )

  return(processed_list)
}

Try the maldipickr package in your browser

Any scripts or data that you put into this service are public.

maldipickr documentation built on Sept. 13, 2024, 1:12 a.m.