R/integrateFIR.R
In peakPantheR: Peak Picking and Annotation of High Resolution Experiments

Documented in integrateFIR

#' @title Integrate fallback integration regions
#'
#' @description Integrate region defined in FIR if a feature is not found
#'
#' @param rawSpec an \code{\link[MSnbase]{OnDiskMSnExp-class}}
#' @param FIR (data.frame) Fallback Integration Regions (FIR) to integrate when
#' a feature is not found. Compounds as row are identical to the targeted
#' features, columns are \code{rtMin} (float in seconds), \code{rtMax} (float in
#' seconds), \code{mzMin} (float), \code{mzMax} (float)
#' @param foundPeakTable a \code{data.frame} as generated by
#' \link{findTargetFeatures}, with features as rows and peak properties as
#' columns. The following columns are mandatory: \code{found}, \code{is_filled},
#' \code{mz}, \code{mzMin}, \code{mzMax}, \code{rt}, \code{rtMin}, \code{rtMax},
#' \code{peakArea}, \code{maxIntMeasured}, \code{maxIntPredicted}.
#' @param verbose (bool) if TRUE message progress
#'
#' @return an updated foundPeakTable with FIR integration values
integrateFIR <- function(rawSpec, FIR, foundPeakTable, verbose = TRUE) {
    # Check input
    if (dim(FIR)[1] != dim(foundPeakTable)[1]) {
        stop("Check input, FIR must have the same number of rows as ",
            "foundPeakTable") }
    # init
    stime               <- Sys.time()
    needsFilling        <- !(foundPeakTable$found)
    needsFilling_idx    <- seq(dim(foundPeakTable)[1])[needsFilling]
    outTable            <- foundPeakTable

    # only run where replacement is needed
    if (sum(needsFilling) != 0) {
        if (verbose) {
            message(sum(needsFilling), " features to integrate with FIR") }
        # store results
        tmpResult <- data.frame(matrix(vector(), sum(needsFilling), 9,
                                dimnames = list(c(),
                                                c("mzMin", "mz", "mzMax",
                                                "rtMin", "rt", "rtMax",
                                                "peakArea", "maxIntMeasured",
                                                "maxIntPredicted"))))
        # extract data for all fallback windows from raw (list of windows)
        all_peakData <- extractSignalRawData(rawSpec,
            mz = data.frame(mzMin = FIR$mzMin[needsFilling_idx],
                mzMax = FIR$mzMax[needsFilling_idx]),
            rt = data.frame(rtMin = FIR$rtMin[needsFilling_idx],
                rtMax = FIR$rtMax[needsFilling_idx]),
            verbose = verbose)
        # iterate over features to integrate
        tmpResult <- integrateFIR_features(needsFilling_idx, all_peakData, FIR,
                                            tmpResult, verbose)
        # Replace results with FIR integration
        outTable[needsFilling_idx,
            c("mzMin", "mz", "mzMax", "rtMin", "rt", "rtMax", "peakArea",
            "maxIntMeasured", "maxIntPredicted")] <- tmpResult[needsFilling_idx,
            c("mzMin", "mz", "mzMax", "rtMin", "rt", "rtMax", "peakArea",
            "maxIntMeasured",  "maxIntPredicted")]
        outTable$is_filled[needsFilling_idx] <- TRUE
        outTable$found[needsFilling_idx] <- TRUE
    }

    # Output
    etime <- Sys.time()
    if (verbose) {
        message("FIR integrated in: ",round(as.double(difftime(etime,stime)),2),
                " ", units(difftime(etime, stime)))
    }
    return(outTable)
}


# -----------------------------------------------------------------------------
# integrateFIR helper functions

# iterate over features to integrate
integrateFIR_features <- function(needsFilling_idx, all_peakData, FIR,
                                    tmpResult, verbose){
    for (cnt in seq_len(length(needsFilling_idx))) {
        peakData    <- all_peakData[[cnt]]
        i           <- needsFilling_idx[cnt]

        if (dim(peakData)[1] != 0){ #Only continue if a scan is found in the box
            # scanDist is the mean distance in sec between scans(used for integ)
            rtRange <- unique(peakData$rt)
            scanDist <- diff(c(min(rtRange), max(rtRange)))/length(rtRange)
            # mzMin, mzMax, rtMin, rtMax (values taken from input)
            tmpResult[i, c("mzMin", "mzMax", "rtMin", "rtMax")] <- FIR[i,
                c("mzMin", "mzMax", "rtMin", "rtMax")]
            # rt (rt of max intensity)
            tmpResult[i, "rt"] <- peakData$rt[which.max(peakData$i)]
            # mz (weighted average of total intensity across all rt for each mz)
            # total intensity across rt for each mz
            mzRange <- unique(peakData$mz)
            mzTotalIntensity <- vapply(mzRange, function(x) {
                sum(peakData$i[peakData$mz == x])}, FUN.VALUE = numeric(1))
            # mz (is weighted average)
            tmpResult[i, "mz"] <- stats::weighted.mean(mzRange,mzTotalIntensity)
            # maxIntMeasured (max intensity)
            tmpResult[i, "maxIntMeasured"] <- max(peakData$i)
            # maxIntPredicted is NA (we don't have a fit)
            tmpResult[i, "maxIntPredicted"] <- as.numeric(NA)
            # into max intensity across mz for each rt
            rtMaxIntensity <- vapply(rtRange, function(x) {
                max(peakData$i[peakData$rt == x])}, FUN.VALUE = numeric(1))
            # peakArea is the max intensities summed over (discrete) rt,
            # multiplied by the mean distance in sec between scans
            tmpResult[i, "peakArea"] <- sum(rtMaxIntensity) * scanDist

        } else { # If no scan found in that region, return default values
            if (verbose) { message('No scan present in the FIR # ', i,
                            ': rt and mz are set as the middle of the FIR box;',
                            ' peakArea, maxIntMeasured and maxIntPredicted are',
                            ' set to 0') }

            tmpResult[i, c("mzMin", "mzMax", "rtMin", "rtMax")] <- FIR[i,
                c("mzMin", "mzMax", "rtMin", "rtMax")]
            tmpResult[i, "rt"] <- mean(c(FIR$rtMin[i], FIR$rtMax[i]))
            tmpResult[i, "mz"] <- mean(c(FIR$mzMin[i], FIR$mzMax[i]))
            tmpResult[i, "peakArea"] <- 0
            tmpResult[i, "maxIntMeasured"] <- 0
            tmpResult[i, "maxIntPredicted"] <- 0
        }
    }
    return(tmpResult)
}