#' Extracts peak data from a raw or mzML file
#'
#' @description Extract the M/Z and Intensity data for a specific scan number from
#' a raw or mzML file. Use "make_peak_data" to create a "peak_data" object.
#'
#' @param ScanMetadata Object of the scan_metadata class from get_scan_metadata. Required.
#' @param ScanNumber Integer indicating which scan number to pull the peak data. Required.
#' @param MinAbundance Filter out peaks with an abundance below threshold. Ranges from 0-100. Default is 0.
#' @param MinIntensity Filter out peaks with an intensity below the threshold. Default is 0.
#' If MinAbundance is 0, then the minimum intensity filter will be applied.
#'
#' @details
#' The data.table outputted by this function contains both the M/Z and Intensity vectors of the spectra.
#'
#' Information regarding user input and calculated metrics are stored in the attributes
#' \tabular{ll}{
#' ScanNumber \tab The scan number of the spectra, as given by the user in the get_peak_data parameters \cr
#' \tab \cr
#' TotalNumberPeaks \tab The total number of peaks stored in the MS file. \cr
#' \tab \cr
#' NumberPeaksPostFilter \tab The number of peaks remaining post filter. \cr
#' \tab \cr
#' PercentagePeaksRemain \tab The percentage or proportion of peaks remaining post-filter. This is NumberPeaksPostFilter/TotalNumberPeaks. \cr
#' \tab \cr
#' MSPath \tab The path to the MS file \cr
#' }
#'
#' @examples
#' \dontrun{
#'
#' # Pull peak data from various examples
#' peak1 <- get_peak_data(ScanMetadata = BU_ScanMetadata, ScanNumber = 32000)
#' peak2 <- get_peak_data(ScanMetadata = TD_ScanMetadata, ScanNumber = 5700, MinAbundance = 0)
#' peak3 <- get_peak_data(ScanMetadata = TD_ScanMetadata, ScanNumber = 5700, MinAbundance = 2)
#' peak4 <- get_peak_data(ScanMetadata = RAW_ScanMetadata, ScanNumber = 136, MinAbundance = 0.1)
#' peak5 <- get_peak_data(ScanMetadata = RAW_ScanMetadata, ScanNumber = 136, MinAbundance = 10)
#'
#' }
#'
#' @export
get_peak_data <- function(ScanMetadata,
ScanNumber,
MinAbundance = 0,
MinIntensity = 0) {
##################
## CHECK INPUTS ##
##################
# Assert that ScanMetadata is a ScanMetadata object.
if ("scan_metadata" %in% class(ScanMetadata) == FALSE) {
stop("ScanMetadata must be a scan_metadata object generated by get_scan_metadata.")
}
# Assert that scan number is an integer
if (is.numeric(ScanNumber) == F) {
stop("Scan Number must be an integer.")
}
# Round scan number if the input is not a whole number
ScanNumber <- round(ScanNumber)
# Assert that the scan number is within the range of scan numbers in the scan metadata object.
if (ScanNumber < min(ScanMetadata$`Scan Number`) ||
ScanNumber > max(ScanMetadata$`Scan Number`)) {
stop(paste("Scan Number must be within the range of", min(ScanMetadata$`Scan Number`),
"&", max(ScanMetadata$`Scan Number`)))
}
# Assert that the Abundance Minimum is a numeric value
if (!is.numeric(MinAbundance)) {
stop("MinAbundance needs to be a numeric value.")
}
if (MinAbundance < 0 | MinAbundance > 100) {
stop("MinAbundance needs to range between 0 and 100.")
}
# Assert that the Intensity Minimum is a numeric value
if (!is.numeric(MinIntensity)) {
stop("MinIntensity should be a number.")
}
####################
## PULL PEAK DATA ##
####################
# Use mzR and mzRpwiz object to pull peaks if the file is XML
if (attr(ScanMetadata, "pspecter")$MSFileType == "XML") {
# The mzR package reads entry number, and not scan number.
uniqueScanNumbers <- ScanMetadata$`Scan Number` %>% unique()
orderScanNumbers <- uniqueScanNumbers[order(uniqueScanNumbers)]
entryNum <- match(ScanNumber, orderScanNumbers)
# Use mzR to read the file. Because this function is buggy, it was removed from
# the attributes of the scan_metadata object
mzRpwiz <- mzR::openMSfile(attr(ScanMetadata, "pspecter")$MSPath, backend = "pwiz")
# Pull peaks
Peaks <- mzR::peaks(attr(ScanMetadata, "pspecter")$mzRpwiz, entryNum) %>% data.table::data.table()
colnames(Peaks) <- c("M/Z", "Intensity")
# Get Number of Peaks
TotalNumberPeaks <- nrow(Peaks)
# Use rawrr if the file type is raw
} else if (attr(ScanMetadata, "pspecter")$MSFileType == "RAW") {
# Read peak data from an individual scan number. File path must be full path
Peaks <- rawrr::readSpectrum(attr(ScanMetadata, "pspecter")$MSPath, scan = ScanNumber)
Peaks <- data.table::data.table("M/Z" = Peaks[[1]]$mZ, "Intensity" = Peaks[[1]]$intensity)
Peaks <- Peaks[Peaks$Intensity != 0,]
TotalNumberPeaks <- nrow(Peaks)
}
###################
## APPLY FILTERS ##
###################
# Add abundance data
Peaks$Abundance <- round(Peaks$Intensity / max(Peaks$Intensity) * 100, 4)
# Remove peaks that do no meet the minimum intensity value
NumberPeaksPostFilter <- TotalNumberPeaks
if (MinAbundance != 0) {
Peaks <- subset(Peaks, Abundance >= MinAbundance)
NumberPeaksPostFilter <- nrow(Peaks)
} else {
Peaks <- subset(Peaks, Intensity >= MinIntensity)
NumberPeaksPostFilter <- nrow(Peaks)
}
##################
## BUILD OBJECT ##
##################
# Keep track of the inputs in the attributes
attr(Peaks, "pspecter") <- list(
"MSPath" = attr(ScanMetadata, "pspecter")$MSPath,
"ScanNumber" = ScanNumber,
"MinimumAbundance" = MinAbundance,
"TotalNumberPeaks" = TotalNumberPeaks,
"NumberPeaksPostFilter" = NumberPeaksPostFilter,
"PercentagePeaksRemain" = paste0(round(NumberPeaksPostFilter / TotalNumberPeaks * 100, digits = 2), "%")
)
# Assign the peak data class
class(Peaks) <- c(class(Peaks), "peak_data")
# Return Results
return(Peaks)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.