R/integrateFIR.R

Defines functions integrateFIR

Documented in integrateFIR

#' Integrate fallback integration regions
#'
#' Integrate region defined in FIR if a feature is not found
#'
#' @param rawSpec an \code{\link[MSnbase]{OnDiskMSnExp-class}}
#' @param FIR (data.frame) Fallback Integration Regions (FIR) to integrate when a feature is not found. Compounds as row are identical to the targeted features, columns are \code{rtMin} (float in seconds), \code{rtMax} (float in seconds), \code{mzMin} (float), \code{mzMax} (float)
#' @param foundPeakTable a \code{data.frame} as generated by \code{\link{findTargetFeatures}}, with features as rows and peak properties as columns. The following columns are mandatory: \code{found}, \code{is_filled}, \code{mz}, \code{mzMin}, \code{mzMax}, \code{rt}, \code{rtMin}, \code{rtMax}, \code{peakArea}, \code{maxIntMeasured}, \code{maxIntPredicted}.
#' @param verbose (bool) if TRUE message progress
#'
#' @return an updated foundPeakTable with FIR integration values
integrateFIR <- function(rawSpec, FIR, foundPeakTable, verbose=TRUE) {

  ## Check input
  if (dim(FIR)[1] != dim(foundPeakTable)[1]) {
    stop('Check input, FIR must have the same number of rows as foundPeakTable')
  }

  ## init
  stime             <- Sys.time()
  needsFilling      <- !(foundPeakTable$found)
  needsFilling_idx  <- seq(dim(foundPeakTable)[1])[needsFilling]
  outTable          <- foundPeakTable

  ## only run where replacement is needed
  if (sum(needsFilling) != 0) {

    if (verbose) { message(sum(needsFilling), " features to integrate with FIR") }

    # store results
    tmpResult   <- data.frame(matrix(vector(), sum(needsFilling), 9, dimnames=list(c(), c("mzMin", "mz", "mzMax", "rtMin", "rt", "rtMax", "peakArea", "maxIntMeasured", "maxIntPredicted"))))

    # extract data for all fallback windows from raw (list of windows)
    all_peakData    <- extractSignalRawData(rawSpec, mz = data.frame(mzMin=FIR$mzMin[needsFilling_idx], mzMax=FIR$mzMax[needsFilling_idx]), rt = data.frame(rtMin=FIR$rtMin[needsFilling_idx], rtMax=FIR$rtMax[needsFilling_idx]), verbose=verbose)
    
    ## iterate over features to integrate
    for (cnt in 1:length(needsFilling_idx)) {
      peakData  <- all_peakData[[cnt]]
      i         <- needsFilling_idx[cnt]
      
      # Only continue if a scan is found in the box
      if (dim(peakData)[1] != 0) {

        # scanDist is the mean distance in sec between scans (used for integral)
        rtRange   <- unique(peakData$rt)
        scanDist  <- diff(c(min(rtRange), max(rtRange))) / length(rtRange)

        # mzMin, mzMax, rtMin, rtMax (values taken from input)
        tmpResult[i, c("mzMin", "mzMax", "rtMin", "rtMax")]   <- FIR[i, c("mzMin", "mzMax", "rtMin", "rtMax")]

        # rt (rt of max intensity)
        tmpResult[i, "rt"]    <- peakData$rt[which.max(peakData$i)]

        # mz (weighted average of total intensity across all rt for each mz)
        # total intensity across rt for each mz
        mzRange               <- unique(peakData$mz)
        mzTotalIntensity      <- sapply(mzRange, function(x) {sum(peakData$i[peakData$mz == x])})
        # mz (is weighted average)
        tmpResult[i, "mz"]    <- stats::weighted.mean(mzRange, mzTotalIntensity)

        # maxIntMeasured (max intensity)
        tmpResult[i, "maxIntMeasured"]  <- max(peakData$i)
        # maxIntPredicted is NA (we don't have a fit)
        tmpResult[i, "maxIntPredicted"] <- as.numeric(NA)
        
        # into
        # max intensity across mz for each rt
        rtMaxIntensity  <- sapply(rtRange, function(x) {max(peakData$i[peakData$rt == x])})
        # peakArea is the max intensities summed over (discrete) rt, multiplied by the mean distance in sec between scans
        tmpResult[i, "peakArea"]  <- sum(rtMaxIntensity) * scanDist

      ## If no scan found in that region, return default values
      } else {

        if (verbose) { message("No scan present in the FIR # ", i, ": rt and mz are set as the middle of the FIR box; peakArea, maxIntMeasured and maxIntPredicted are set to 0") }

        tmpResult[i, c("mzMin", "mzMax", "rtMin", "rtMax")]   <- FIR[i, c("mzMin", "mzMax", "rtMin", "rtMax")]
        tmpResult[i, "rt"]              <- mean(c(FIR$rtMin[i], FIR$rtMax[i]))
        tmpResult[i, "mz"]              <- mean(c(FIR$mzMin[i], FIR$mzMax[i]))
        tmpResult[i, "peakArea"]        <- 0
        tmpResult[i, "maxIntMeasured"]  <- 0
        tmpResult[i, "maxIntPredicted"] <- 0
      }
    }

    # Replace results with FIR integration
    outTable[needsFilling_idx, c("mzMin", "mz", "mzMax", "rtMin", "rt", "rtMax", "peakArea", "maxIntMeasured", "maxIntPredicted")] <- tmpResult[needsFilling_idx, c("mzMin", "mz", "mzMax", "rtMin", "rt", "rtMax", "peakArea", "maxIntMeasured", "maxIntPredicted")]
    outTable$is_filled[needsFilling_idx]  <- TRUE
    outTable$found[needsFilling_idx]      <- TRUE
  }

  ## Output
  etime <- Sys.time()
  if (verbose) { message('FIR integrated in: ', round(as.double(difftime(etime,stime)),2),' ',units( difftime(etime,stime))) }

  return(outTable)
}

Try the peakPantheR package in your browser

Any scripts or data that you put into this service are public.

peakPantheR documentation built on May 1, 2019, 10:53 p.m.