R/integrateFIR.R

Defines functions integrateFIR_features integrateFIR

Documented in integrateFIR

#' @title Integrate fallback integration regions
#'
#' @description Integrate region defined in FIR if a feature is not found
#'
#' @param rawSpec an \code{\link[MSnbase]{OnDiskMSnExp-class}}
#' @param FIR (data.frame) Fallback Integration Regions (FIR) to integrate when
#' a feature is not found. Compounds as row are identical to the targeted
#' features, columns are \code{rtMin} (float in seconds), \code{rtMax} (float in
#' seconds), \code{mzMin} (float), \code{mzMax} (float)
#' @param foundPeakTable a \code{data.frame} as generated by
#' \link{findTargetFeatures}, with features as rows and peak properties as
#' columns. The following columns are mandatory: \code{found}, \code{is_filled},
#' \code{mz}, \code{mzMin}, \code{mzMax}, \code{rt}, \code{rtMin}, \code{rtMax},
#' \code{peakArea}, \code{peakAreaRaw}, \code{maxIntMeasured},
#' \code{maxIntPredicted}.
#' @param verbose (bool) if TRUE message progress
#'
#' @return an updated foundPeakTable with FIR integration values
integrateFIR <- function(rawSpec, FIR, foundPeakTable, verbose = TRUE) {
    # Check input
    if (dim(FIR)[1] != dim(foundPeakTable)[1]) {
        stop("Check input, FIR must have the same number of rows as ",
            "foundPeakTable") }
    # init
    stime               <- Sys.time()
    needsFilling        <- !(foundPeakTable$found)
    needsFilling_idx    <- seq(dim(foundPeakTable)[1])[needsFilling]
    outTable            <- foundPeakTable

    # only run where replacement is needed
    if (sum(needsFilling) != 0) { if (verbose) {
            message(sum(needsFilling), " features to integrate with FIR") }
        # store results
        tmpResult <- data.frame(matrix(vector(), sum(needsFilling), 10,
                                dimnames = list(c(),
                                                c("mzMin", "mz", "mzMax",
                                                "rtMin", "rt", "rtMax",
                                                "peakArea", "peakAreaRaw",
                                                "maxIntMeasured",
                                                "maxIntPredicted"))))
        # extract data for all fallback windows from raw (list of windows)
        all_peakData <- extractSignalRawData(rawSpec,
            mz = data.frame(mzMin = FIR$mzMin[needsFilling_idx],
                mzMax = FIR$mzMax[needsFilling_idx]),
            rt = data.frame(rtMin = FIR$rtMin[needsFilling_idx],
                rtMax = FIR$rtMax[needsFilling_idx]),
            verbose = verbose)
        # iterate over features to integrate
        tmpResult <- integrateFIR_features(needsFilling_idx, all_peakData, FIR,
                                            tmpResult, verbose)
        # Replace results with FIR integration
        outTable[needsFilling_idx,
            c("mzMin", "mz", "mzMax", "rtMin", "rt", "rtMax", "peakArea",
            "peakAreaRaw", "maxIntMeasured", "maxIntPredicted")] <-
            tmpResult[needsFilling_idx, c("mzMin", "mz", "mzMax", "rtMin", "rt",
            "rtMax", "peakArea", "peakAreaRaw", "maxIntMeasured",
            "maxIntPredicted")]
        outTable$is_filled[needsFilling_idx] <- TRUE
        outTable$found[needsFilling_idx] <- TRUE }

    # Output
    etime <- Sys.time()
    if (verbose) {
        message("FIR integrated in: ",round(as.double(difftime(etime,stime)),2),
                " ", units(difftime(etime, stime)))
    }
    return(outTable)
}

# -----------------------------------------------------------------------------
# integrateFIR helper functions

# iterate over features to integrate
integrateFIR_features <- function(needsFilling_idx, all_peakData, FIR,
                                    tmpResult, verbose){
    for (cnt in seq_len(length(needsFilling_idx))) {
        peakData    <- all_peakData[[cnt]]
        i           <- needsFilling_idx[cnt]

        if (dim(peakData)[1] != 0){ #Only continue if a scan is found in the box
            rtRange <- unique(peakData$rt)
            # mzMin, mzMax, rtMin, rtMax (values taken from input)
            tmpResult[i, c("mzMin", "mzMax", "rtMin", "rtMax")] <- FIR[i,
                c("mzMin", "mzMax", "rtMin", "rtMax")]
            # rt (rt of max intensity)
            tmpResult[i, "rt"] <- peakData$rt[which.max(peakData$i)]
            # mz (weighted average of total intensity across all rt for each mz)
            # total intensity across rt for each mz
            mzRange <- unique(peakData$mz)
            mzTotalIntensity <- vapply(mzRange, function(x) {
            sum(peakData$i[peakData$mz == x])}, FUN.VALUE = numeric(1))
            # mz (is weighted average)
            tmpResult[i, "mz"] <- stats::weighted.mean(mzRange,mzTotalIntensity)
            # maxIntMeasured (max intensity)
            tmpResult[i, "maxIntMeasured"] <- max(peakData$i)
            # maxIntPredicted is NA (we don't have a fit)
            tmpResult[i, "maxIntPredicted"] <- as.numeric(NA)
            # into max intensity across mz for each rt
            rtMaxIntensity <- vapply(rtRange, function(x) {
            max(peakData$i[peakData$rt == x])}, FUN.VALUE = numeric(1))
            # peakArea/peakAreaRaw calculated trapezoid rule
            peakArea <- pracma::trapz(x=rtRange, y=rtMaxIntensity)
            tmpResult[i, "peakArea"] <- peakArea
            tmpResult[i, "peakAreaRaw"] <- peakArea

        } else { # If no scan found in that region, return default values
            if (verbose) { message('No scan present in the FIR # ', i,
                            ': rt and mz are set as the middle of the FIR box;',
                            ' peakArea, maxIntMeasured and maxIntPredicted are',
                            ' set to 0') }
            tmpResult[i, c("mzMin", "mzMax", "rtMin", "rtMax")] <- FIR[i,
                c("mzMin", "mzMax", "rtMin", "rtMax")]
            tmpResult[i, "rt"] <- mean(c(FIR$rtMin[i], FIR$rtMax[i]))
            tmpResult[i, "mz"] <- mean(c(FIR$mzMin[i], FIR$mzMax[i]))
            tmpResult[i, "peakArea"] <- 0
            tmpResult[i, "peakAreaRaw"] <- 0
            tmpResult[i, "maxIntMeasured"] <- 0
            tmpResult[i, "maxIntPredicted"] <- 0  }
    }
    return(tmpResult)
}
phenomecentre/peakPantheR documentation built on Feb. 29, 2024, 9:07 p.m.