Nothing
# WARNING - Generated by {fusen} from dev/dereplicate-spectra.Rmd: do not edit by hand
#' Process Bruker MALDI Biotyper spectra _à la_ Strejcek et al. (2018)
#'
#' @details
#' Based on the original implementation, the function performs the following tasks:
#'
#' 1. Square-root transformation
#' 2. Mass range trimming to 4-10 kDa as they were deemed most determinant by Strejcek et al. (2018)
#' 3. Signal smoothing using the Savitzky-Golay method and a half window size of 20
#' 4. Baseline correction with the SNIP procedure
#' 5. Normalization by Total Ion Current
#' 6. Peak detection using the SuperSmoother procedure and with a signal-to-noise ratio above 3
#' 7. Peak filtering. This step has been added to discard peaks with a negative signal-to-noise ratio probably due to being on the edge of the mass range.
#'
#'
#' @param spectra_list A list of [MALDIquant::MassSpectrum] objects.
#' @param spectra_names A [tibble::tibble] (or [data.frame]) of sanitized spectra names by default from [get_spectra_names]. If provided manually, the column `sanitized_name` will be used to name the spectra.
#' @param rds_prefix `r lifecycle::badge('deprecated')` Writing to disk as RDS is no longer supported. A character indicating the prefix for the `.RDS` output files to be written in the `processed` directory. By default, no prefix are given and thus no files are written.
#'
#' @return A named list of three objects:
#' * `spectra`: a list the length of the spectra list of [MALDIquant::MassSpectrum] objects.
#' * `peaks`: a list the length of the spectra list of [MALDIquant::MassPeaks] objects.
#' * `metadata`: a tibble indicating the median signal-to-noise ratio (`SNR`) and peaks number for all spectra list (`peaks`), with spectra names in the `name` column.
#'
#' @seealso [import_biotyper_spectra] and [check_spectra] for the inputs and [merge_processed_spectra] for further analysis.
#' @export
#'
#' @references Strejcek M, Smrhova T, Junkova P & Uhlik O (2018). “Whole-Cell MALDI-TOF MS versus 16S rRNA Gene Analysis for Identification and Dereplication of Recurrent Bacterial Isolates.” *Frontiers in Microbiology* 9 <doi:10.3389/fmicb.2018.01294>.
#'
#' @note The original R code on which this function is based is accessible at: <https://github.com/strejcem/MALDIvs16S>
#' @examples
#' # Get an example directory of six Bruker MALDI Biotyper spectra
#' directory_biotyper_spectra <- system.file(
#' "toy-species-spectra",
#' package = "maldipickr"
#' )
#' # Import the six spectra
#' spectra_list <- import_biotyper_spectra(directory_biotyper_spectra)
#' # Transform the spectra signals according to Strejcek et al. (2018)
#' processed <- process_spectra(spectra_list)
#' # Overview of the list architecture that is returned
#' # with the list of processed spectra, peaks identified and the
#' # metadata table
#' str(processed, max.level = 2)
#' # A detailed view of the metadata with the median signal-to-noise
#' # ratio (SNR) and the number of peaks
#' processed$metadata
process_spectra <- function(spectra_list,
spectra_names = get_spectra_names(spectra_list),
rds_prefix = deprecated()) {
# It returns the list and write it for future processing as an RDS file.
if (lifecycle::is_present(rds_prefix)) {
lifecycle::deprecate_warn(
when = "1.2.9000",
what = "process_spectra(rds_prefix)",
details = "Ability to write processed spectra to disk as RDS files will be dropped in next release."
)
}
# 1. SQRT transformation
# 2. Mass range trimming
# 3. Signal smoothing
# 4. Baseline Correction
# 5. Normalization
spectra <- spectra_list %>%
MALDIquant::transformIntensity("sqrt") %>%
MALDIquant::trim(range = c(4000, 10000)) %>%
MALDIquant::smoothIntensity("SavitzkyGolay", halfWindowSize = 20) %>%
MALDIquant::removeBaseline("SNIP", iterations = 50) %>%
MALDIquant::calibrateIntensity(method = "TIC")
# 6. Peak detection
peaks <- spectra %>%
MALDIquant::detectPeaks(
method = "SuperSmoother",
halfWindowSize = 20,
SNR = 3
)
# Added extra step to remove the peaks with
# negative SNR due to being on the edge of the masses trimmed
peaks <- lapply(peaks, function(pk) {
pk[MALDIquant::snr(pk) >= 3]
})
# Spectrum metadata table
# Extract signal-to-noise ratios and peaks number
snr_list <- lapply(peaks, MALDIquant::snr)
metadata <- data.frame(
"SNR" = vapply(snr_list, stats::median, FUN.VALUE = numeric(1)),
"peaks" = lengths(snr_list)
)
# Add the spectra identifiers to all objects
if (!"sanitized_name" %in% colnames(spectra_names)) {
stop(
"Missing 'sanitized_name' column in the provided 'spectra_names' tibble!",
"\n\nTip: Use the `get_spectra_names()` for default and compliant names."
)
}
rownames(metadata) <- names(spectra) <- names(peaks) <- spectra_names[["sanitized_name"]]
# Aggregate the objects to a list
processed_list <- list(
"spectra" = spectra,
"peaks" = peaks,
# convert the data.frame to a tibble
"metadata" = tibble::as_tibble(metadata, rownames = "name")
)
return(processed_list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.