R/get_peak_data.R

Defines functions get_peak_data

Documented in get_peak_data

#' Extracts peak data from a raw or mzML file
#'
#' @description Extract the M/Z and Intensity data for a specific scan number from
#'    a raw or mzML file. Use "make_peak_data" to create a "peak_data" object.
#'
#' @param ScanMetadata Object of the scan_metadata class from get_scan_metadata. Required.
#' @param ScanNumber Integer indicating which scan number to pull the peak data. Required.
#' @param MinAbundance Filter out peaks with an abundance below threshold. Ranges from 0-100. Default is 0.
#' @param MinIntensity Filter out peaks with an intensity below the threshold. Default is 0. 
#'     If MinAbundance is 0, then the minimum intensity filter will be applied. 
#'
#' @details
#' The data.table outputted by this function contains both the M/Z and Intensity vectors of the spectra.
#'
#' Information regarding user input and calculated metrics are stored in the attributes
#' \tabular{ll}{
#' ScanNumber \tab The scan number of the spectra, as given by the user in the get_peak_data parameters \cr
#' \tab \cr
#' TotalNumberPeaks \tab The total number of peaks stored in the MS file. \cr
#' \tab \cr
#' NumberPeaksPostFilter \tab The number of peaks remaining post filter. \cr
#' \tab \cr
#' PercentagePeaksRemain \tab The percentage or proportion of peaks remaining post-filter. This is NumberPeaksPostFilter/TotalNumberPeaks. \cr
#' \tab \cr
#' MSPath \tab The path to the MS file \cr
#' }
#'
#' @examples
#' \dontrun{
#'
#' # Pull peak data from various examples
#' peak1 <- get_peak_data(ScanMetadata = BU_ScanMetadata, ScanNumber = 32000)
#' peak2 <- get_peak_data(ScanMetadata = TD_ScanMetadata, ScanNumber = 5700, MinAbundance = 0)
#' peak3 <- get_peak_data(ScanMetadata = TD_ScanMetadata, ScanNumber = 5700, MinAbundance = 2)
#' peak4 <- get_peak_data(ScanMetadata = RAW_ScanMetadata, ScanNumber = 136, MinAbundance = 0.1)
#' peak5 <- get_peak_data(ScanMetadata = RAW_ScanMetadata, ScanNumber = 136, MinAbundance = 10)
#'
#' }
#'
#' @export
get_peak_data <- function(ScanMetadata,
                          ScanNumber,
                          MinAbundance = 0,
                          MinIntensity = 0) {

  ##################
  ## CHECK INPUTS ##
  ##################

  # Assert that ScanMetadata is a ScanMetadata object.
  if ("scan_metadata" %in% class(ScanMetadata) == FALSE) {
    stop("ScanMetadata must be a scan_metadata object generated by get_scan_metadata.")
  }

  # Assert that scan number is an integer
  if (is.numeric(ScanNumber) == F) {
    stop("Scan Number must be an integer.")
  }

  # Round scan number if the input is not a whole number
  ScanNumber <- round(ScanNumber)

  # Assert that the scan number is within the range of scan numbers in the scan metadata object.
  if (ScanNumber < min(ScanMetadata$`Scan Number`) ||
      ScanNumber > max(ScanMetadata$`Scan Number`)) {

    stop(paste("Scan Number must be within the range of", min(ScanMetadata$`Scan Number`),
               "&", max(ScanMetadata$`Scan Number`)))

  }

  # Assert that the Abundance Minimum is a numeric value
  if (!is.numeric(MinAbundance)) {
    stop("MinAbundance needs to be a numeric value.")
  }
  if (MinAbundance < 0 | MinAbundance > 100) {
    stop("MinAbundance needs to range between 0 and 100.")
  }
  
  # Assert that the Intensity Minimum is a numeric value
  if (!is.numeric(MinIntensity)) {
    stop("MinIntensity should be a number.")
  }

  ####################
  ## PULL PEAK DATA ##
  ####################

  # Use mzR and mzRpwiz object to pull peaks if the file is XML
  if (attr(ScanMetadata, "pspecter")$MSFileType == "XML") {

    # The mzR package reads entry number, and not scan number.
    uniqueScanNumbers <- ScanMetadata$`Scan Number` %>% unique()
    orderScanNumbers <- uniqueScanNumbers[order(uniqueScanNumbers)]
    entryNum <- match(ScanNumber, orderScanNumbers)

    # Use mzR to read the file. Because this function is buggy, it was removed from
    # the attributes of the scan_metadata object
    mzRpwiz <- mzR::openMSfile(attr(ScanMetadata, "pspecter")$MSPath, backend = "pwiz")

    # Pull peaks
    Peaks <- mzR::peaks(attr(ScanMetadata, "pspecter")$mzRpwiz, entryNum) %>% data.table::data.table()
    colnames(Peaks) <- c("M/Z", "Intensity")

    # Get Number of Peaks
    TotalNumberPeaks <- nrow(Peaks)

    # Use rawrr if the file type is raw
  } else if (attr(ScanMetadata, "pspecter")$MSFileType == "RAW") {

    # Read peak data from an individual scan number. File path must be full path
    Peaks <- rawrr::readSpectrum(attr(ScanMetadata, "pspecter")$MSPath, scan = ScanNumber)
    Peaks <- data.table::data.table("M/Z" = Peaks[[1]]$mZ, "Intensity" = Peaks[[1]]$intensity)
    Peaks <- Peaks[Peaks$Intensity != 0,]

    TotalNumberPeaks <- nrow(Peaks)

  }

  ###################
  ## APPLY FILTERS ##
  ###################

  # Add abundance data
  Peaks$Abundance <- round(Peaks$Intensity / max(Peaks$Intensity) * 100, 4)

  # Remove peaks that do no meet the minimum intensity value
  NumberPeaksPostFilter <- TotalNumberPeaks
  
  if (MinAbundance != 0) {
    Peaks <- subset(Peaks, Abundance >= MinAbundance)
    NumberPeaksPostFilter <- nrow(Peaks)
  } else {
    Peaks <- subset(Peaks, Intensity >= MinIntensity)
    NumberPeaksPostFilter <- nrow(Peaks)
  }

  ##################
  ## BUILD OBJECT ##
  ##################

  # Keep track of the inputs in the attributes
  attr(Peaks, "pspecter") <- list(
    "MSPath" = attr(ScanMetadata, "pspecter")$MSPath,
    "ScanNumber" = ScanNumber,
    "MinimumAbundance" = MinAbundance,
    "TotalNumberPeaks" = TotalNumberPeaks,
    "NumberPeaksPostFilter" = NumberPeaksPostFilter,
    "PercentagePeaksRemain" = paste0(round(NumberPeaksPostFilter / TotalNumberPeaks * 100, digits = 2), "%")
  )

  # Assign the peak data class
  class(Peaks) <- c(class(Peaks), "peak_data")

  # Return Results
  return(Peaks)

}
EMSL-Computing/pspecterlib documentation built on Jan. 28, 2024, 8:13 p.m.