Nothing
# WARNING - Generated by {fusen} from dev/dereplicate-spectra.Rmd: do not edit by hand
#' Merge multiple processed spectra and peaks
#'
#' Aggregate multiple processed spectra, their associated peaks and metadata into a feature matrix and a concatenated metadata table.
#'
#' @param processed_spectra A [list] of the processed spectra and associated peaks and metadata in two possible formats:
#' * A list of **in-memory objects** (named `spectra`, `peaks`, `metadata`) produced by [process_spectra]. Named lists will have names dropped, see Note.
#' * `r lifecycle::badge('deprecated')` A list of **paths** to RDS files produced by [process_spectra] when using the `rds_prefix` option.
#' @param remove_peakless_spectra A logical indicating whether to discard the spectra without detected peaks.
#' @param interpolate_missing A logical indicating if intensity values for missing peaks should be interpolated from the processed spectra signal or left NA which would then be converted to 0.
#'
#' @return A *n*×*p* matrix, with *n* spectra as rows and *p* features as columns that are the peaks found in all the processed spectra.
#'
#' @note When aggregating multiple runs of processed spectra, if a named list is
#' provided, note that the names will be dropped, to prevent further downstream
#' issues when these names were being appended to the rownames of the matrix
#' thus preventing downstream metadata merge.
#'
#' @seealso [process_spectra], the "Value" section in [`MALDIquant::intensityMatrix`](https://rdrr.io/cran/MALDIquant/man/intensityMatrix-functions.html)
#' @export
#' @examples
#' # Get an example directory of six Bruker MALDI Biotyper spectra
#' directory_biotyper_spectra <- system.file(
#' "toy-species-spectra",
#' package = "maldipickr"
#' )
#' # Import the six spectra
#' spectra_list <- import_biotyper_spectra(directory_biotyper_spectra)
#' # Transform the spectra signals according to Strejcek et al. (2018)
#' processed <- process_spectra(spectra_list)
#' # Merge the spectra to produce the feature matrix
#' fm <- merge_processed_spectra(list(processed))
#' # The feature matrix has 6 spectra as rows and
#' # 35 peaks as columns
#' dim(fm)
#' # Notice the difference when the interpolation is turned off
#' fm_no_interpolation <- merge_processed_spectra(
#' list(processed),
#' interpolate_missing = FALSE
#' )
#' sum(fm == 0) # 0
#' sum(fm_no_interpolation == 0) # 68
#'
#' # Multiple runs can be aggregated using list()
#' # Merge the spectra to produce the feature matrix
#' fm_all <- merge_processed_spectra(list(processed, processed, processed))
#' # The feature matrix has 3×6=18 spectra as rows and
#' # 35 peaks as columns
#' dim(fm_all)
#'
#' # If using a list, names will be dropped and are not propagated to the matrix.
#' \dontrun{
#' fm_all <- merge_processed_spectra(
#' list("A" = processed, "B" = processed, "C" = processed))
#' any(grepl("A|B|C", rownames(fm_all))) # FALSE
#' }
#'
merge_processed_spectra <- function(processed_spectra, remove_peakless_spectra = TRUE, interpolate_missing = TRUE) {
if (any(
is.null(processed_spectra),
!is.list(processed_spectra),
length(processed_spectra) == 0
)
) {
stop(
"Either 'processed_spectra' is not a list or it is an empty list."
)
}
# Determine the type of input with a sneak-peek at the first element
if (typeof(processed_spectra[[1]]) == "character") {
if (is_a_rds_list(processed_spectra)) {
lifecycle::deprecate_warn(
when = "1.2.9000",
what = "merge_processed_spectra(processed_spectra = 'must be an in-memory object')",
details = "Ability to read processed spectra from RDS files will be dropped in next release."
)
processed <- lapply(processed_spectra, readRDS)
}
} else {
processed <- processed_spectra
}
# Names at the upper level causes problems when aggregating multiple runs by
# being appended to the rownames of matrix thus preventing downstream metadata
# merge.
if(!is.null(names(processed))){
processed <- unname(processed)
}
stopifnot(is_a_processed_spectra_list(processed))
peakless <- list()
# List the spectra with no peaks detected and remove them
# Initially using rownames of dataframe, now using dplyr functions for consistency
if (remove_peakless_spectra) {
peakless <- lapply( # Extract the metadata object from all element of the list
processed, `[[`, "metadata"
) %>%
dplyr::bind_rows(.id = "list_identifier") %>%
dplyr::filter(.data$peaks == 0) %>%
dplyr::pull(.data$name)
if (length(peakless) > 0) {
warning(
"No peaks were detected in the following spectra, so they will be removed\n",
paste(peakless, collapse = "\n"), "\n"
)
}
}
# 7. Bin peaks
peaks <- lapply( # Extract the peaks object from all element of the list
processed, `[[`, "peaks"
) %>% unlist()
names_spectra <- names(peaks)
if (remove_peakless_spectra & length(peakless) > 0) {
peaks <- peaks[-which(names(peaks) %in% peakless)]
}
peaks <- MALDIquant::binPeaks(peaks, tolerance = .002, method = "strict")
# 8. Feature matrix construction (peaks as columns and spectra as rows)
if (interpolate_missing) {
# This is the default in the Strejcek et al. (2018) procedure
spectra_list <- lapply( # Extract the spectrum object from all element of the list
processed, `[[`, "spectra"
) %>% unlist()
if (remove_peakless_spectra & length(peakless) > 0) {
spectra_list <- spectra_list[-which(names(spectra_list) %in% peakless)]
}
featureMatrix <- MALDIquant::intensityMatrix(peaks, spectra_list)
} else {
featureMatrix <- MALDIquant::intensityMatrix(peaks)
featureMatrix[is.na(featureMatrix)] <- 0
}
# Adding the correct rownames fto the matrix
if (remove_peakless_spectra & length(peakless) > 0) {
rownames(featureMatrix) <- names_spectra[-which(names_spectra %in% peakless)]
} else {
rownames(featureMatrix) <- names_spectra
}
return(featureMatrix)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.