#' @title Calculate chromatographic peak properties
#'
#' @description Calculate the ppm error, retention time deviation, tailing
#' factor and asymmetry factor for each measured feature.
#'
#' @param fittedCurves (list) A list (one entry per ROI window) of
#' \code{peakPantheR_curveFit} or NA
#' @param targetFeatTable a \code{\link{data.frame}} of compounds to target as
#' rows. Columns: \code{cpdID} (str), \code{cpdName} (str), \code{rtMin} (float
#' in seconds), \code{rt} (float in seconds, or \emph{NA}), \code{rtMax} (float
#' in seconds), \code{mzMin} (float), \code{mz} (float or \emph{NA}),
#' \code{mzMax} (float).
#' @param foundPeakTable a \code{data.frame} as generated by
#' \code{\link{findTargetFeatures}}, with features as rows and peak properties
#' as columns. The following columns are mandatory: \code{mzMin}, \code{mz},
#' \code{mzMax}, \code{rtMin}, \code{rt}, \code{rtMax}
#' @param verbose (bool) if TRUE message when NA scans are removed
#'
#' @return A \code{data.frame} with measured compounds as rows and measurements
#' and properties as columns (see Details).
#'
#' \subsection{Details:}{
#' The returned \code{data.frame} is structured as follow:
#' \tabular{ll}{
#' found \tab was the peak found\cr
#' rt \tab retention time of peak apex (sec)\cr
#' rtMin \tab leading edge of peak retention time (sec) determined at 0.5\% of
#' apex intensity\cr
#' rtMax \tab trailing edge of peak retention time (sec) determined at 0.5\% of
#' apex intensity\cr
#' mz \tab weighted (by intensity) mean of peak m/z across scans\cr
#' mzMin \tab m/z peak minimum (between rtMin, rtMax)\cr
#' mzMax \tab m/z peak maximum (between rtMin, rtMax)\cr
#' peakArea \tab integrated peak area\cr
#' peakAreaRaw \tab integrated peak area from raw data points\cr
#' maxIntMeasured \tab maximum peak intensity in raw data\cr
#' maxIntPredicted \tab maximum peak intensity based on curve fit\cr
#' ppm_error \tab difference in ppm between the expected and measured m/z\cr
#' rt_dev_sec \tab difference in seconds between the expected and measured rt\cr
#' tailingFactor \tab the tailing factor is a measure of peak tailing. It is
#' defined as the distance from the front slope of the peak to the back slope
#' divided by twice the distance from the center line of the peak to the front
#' slope, with all measurements made at 5\% of the maximum peak height. The
#' tailing factor of a peak will typically be similar to the asymmetry factor
#' for the same peak, but the two values cannot be directly converted\cr
#' asymmetryFactor \tab the asymmetry factor is a measure of peak tailing. It is
#' defined as the distance from the center line of the peak to the back slope
#' divided by the distance from the center line of the peak to the front slope,
#' with all measurements made at 10\% of the maximum peak height. The asymmetry
#' factor of a peak will typically be similar to the tailing factor for the same
#' peak, but the two values cannot be directly converted\cr
#' }
#' }
#'
#' @details
#' ## Examples cannot be computed as the function is not exported:
#' # fittedCurve
#' cFit1 <- list(amplitude=162404.8057918259, center=3341.888,
#' sigma=0.078786133031045896, gamma=0.0018336101984172684,
#' fitStatus=2, curveModel='skewedGaussian')
#' class(cFit1) <- 'peakPantheR_curveFit'
#' cFit2 <- list(amplitude=199249.10572753669, center=3382.577,
#' sigma=0.074904415304607966, gamma=0.0011471899372353885,
#' fitStatus=2, curveModel='skewedGaussian')
#' class(cFit2) <- 'peakPantheR_curveFit'
#' input_fitCurves <- list(cFit1, cFit2)
#'
#' # ROI
#' input_ROI <- data.frame(matrix(vector(), 2, 8, dimnames=list(c(), c('cpdID',
#' 'cpdName', 'rtMin', 'rt', 'rtMax', 'mzMin', 'mz', 'mzMax'))),
#' stringsAsFactors=FALSE)
#' input_ROI[1,] <- c('ID-1', 'testCpd 1', 3310., 3344.88, 3390., 522.19, 522.2,
#' 522.21)
#' input_ROI[2,] <- c('ID-2', 'testCpd 2', 3280., 3385.58, 3440., 496.19, 496.2,
#' 496.21)
#' input_ROI[,3:8] <- vapply(input_ROI[,3:8], as.numeric, FUN.VALUE=numeric(2))
#'
#' # foundPeakTable
#' input_foundPeakTable <- data.frame(matrix(vector(), 2, 10, dimnames=list(c(),
#' c('found', 'rtMin', 'rt', 'rtMax', 'mzMin', 'mz',
#' 'mzMax','peakArea','maxIntMeasured','maxIntPredicted'))),
#' stringsAsFactors=FALSE)
#' input_foundPeakTable[1,] <- c(TRUE, 3309.758, 3346.827, 3385.410, 522.19,
#' 522.2, 522.21, 26133726, 889280, 901015)
#' input_foundPeakTable[2,] <- c(TRUE, 3345.376, 3386.529, 3428.279, 496.19,
#' 496.2, 496.21, 35472141, 1128960, 1113576)
#' input_foundPeakTable[,1] <- vapply(input_foundPeakTable[,c(1)], as.logical,
#' FUN.VALUE=logical(1))
#'
#' # Run peak statistics
#' peakStatistics <- getTargetFeatureStatistic(input_fitCurves, input_ROI,
#' input_foundPeakTable)
#' peakStatistics
#' # found rtMin rt rtMax mzMin mz mzMax peakArea
#' # 1 TRUE 3309.758 3346.827 3385.410 522.19 522.2 522.21 26133726
#' # 2 TRUE 3345.376 3386.529 3428.279 496.19 496.2 496.21 35472141
#' # maxIntMeasured maxIntPredicted ppm_error rt_dev_sec tailingFactor
#' # 1 889280 901015 0 1.947 1.015385
#' # 2 1128960 1113576 0 0.949 1.005372
#' # asymmetryFactor
#' # 1 1.026886
#' # 2 1.009304
getTargetFeatureStatistic <- function(fittedCurves, targetFeatTable,
foundPeakTable, verbose = FALSE) {
stime <- Sys.time()
# Check input
if (dim(targetFeatTable)[1] != dim(foundPeakTable)[1]) {
stop("Number of features in \"targetFeatTable\" (",
dim(targetFeatTable)[1], ") and \"foundPeakTable\" (",
dim(foundPeakTable)[1], ") do not match!") }
if (length(fittedCurves) != dim(foundPeakTable)[1]) {
stop("Number of fitted curves in \"fittedCurves\" (",
length(fittedCurves),
") and number of features in \"foundPeakTable\" (",
dim(foundPeakTable)[1], ") do not match!") }
# Calculate the statistics
peakStat <- data.frame(matrix(vector(), dim(targetFeatTable)[1], 4,
dimnames = list(c(), c("ppm_error", "rt_dev_sec",
"tailingFactor",
"asymmetryFactor"))),
stringsAsFactors = FALSE)
# only work with found features
for (i in seq_len(dim(targetFeatTable)[1])) {
if (foundPeakTable$found[i]) {
# ppm_error
if (!is.na(targetFeatTable$mz[i])) {
peakStat$ppm_error[i] <- (abs(foundPeakTable$mz[i] -
targetFeatTable$mz[i])/targetFeatTable$mz[i]) * 1e+06 }
# rt_dev_sec
if (!is.na(targetFeatTable$rt[i])) {
peakStat$rt_dev_sec[i] <- foundPeakTable$rt[i] -
targetFeatTable$rt[i] }
# Tailing Factor
peakStat$tailingFactor[i] <- peak_shape_stat(fittedCurves[[i]],
foundPeakTable$rt[i],
foundPeakTable$rtMin[i],
foundPeakTable$rtMax[i],
statistic = "tailingFactor")
# Asymmetry Factor
peakStat$asymmetryFactor[i] <- peak_shape_stat(fittedCurves[[i]],
foundPeakTable$rt[i],
foundPeakTable$rtMin[i],
foundPeakTable$rtMax[i],
statistic = "asymmetryFactor")}}
# group the results and return
finalTable <- cbind.data.frame(foundPeakTable, peakStat)
etime <- Sys.time()
if (verbose) { message("Peak statistics done in: ",
round(as.double(difftime(etime, stime)), 2),
" ", units(difftime(etime, stime))) }
return(finalTable)
}
# -----------------------------------------------------------------------------
# getTargetFeatureStatistic helper functions
## Define the peak_shape_stat()
#
# Calculate tailing factor and asymmetry factor
#
# Tailing Factor and Asymmetry Factor following the equations from
# http://www.chromforum.org/viewtopic.php?t=20079
#
# @param fittedCurve A \code{peakPantheR_curveFit} for the feature of
# interest or NA
# @param apexRT (float) retention time in seconds of the measured peak apex
# @param rtMin (float) leading edge retention time in seconds (at 0.5%)
# @param rtMax (float) tailing edge retention time in seconds (at 0.5%)
# @param statistic (str) either \code{tailingFactor} or
# \code{asymmetryFactor}
# @param sampling (int) Number of points to employ when subsampling the
# fittedCurve between rtMin and rtMax @return Tailing factor or Asymmetry
# factor value
peak_shape_stat <- function(fittedCurve, apexRT, rtMin, rtMax,
statistic = "tailingFactor", sampling = 250) {
# Check input
if (is.na(apexRT)) { return(as.numeric(NA)) }
if (is.na(rtMin)) { return(as.numeric(NA)) }
if (is.na(rtMax)) { return(as.numeric(NA)) }
if (all(is.na(fittedCurve))) { return(as.numeric(NA)) }
# A B and C are retention time in seconds. C is RT of the apex, A is
# left side at x%, B is right side at x%.
# x% (cutoff) depends of the statistic employed.
## Find position of A and B
cutoff <- switch(statistic, tailingFactor = 0.05, asymmetryFactor = 0.1)
cutoff_int <- cutoff * predictCurve(fittedCurve, x = apexRT)
# intensity at cutoff pt of interest
## A side
# reverse order for up slope
A_grid_rt <- seq(from = apexRT, to = rtMin,
by = ((rtMin - apexRT)/(sampling - 1)))
A_slope_int <- predictCurve(fittedCurve, x = A_grid_rt)
# pos of 1st point past cutoff
A_cutoff_pt <- match(-1, sign(A_slope_int - cutoff_int))
# should not be NA as rtMin Max is at 0.5% while cutoff is 5 or 10%
if (is.na(A_cutoff_pt)) { return(NA) }
# points left and right from pt of interest
A_key_pt <- c(A_cutoff_pt - 1, A_cutoff_pt)
# linear interpolation of exact rt
A <- stats::approx(x = A_slope_int[A_key_pt],
y = A_grid_rt[A_key_pt],
xout = cutoff_int)$y
## B side
B_grid_rt <- seq(from = apexRT, to = rtMax,
by = ((rtMax - apexRT)/(sampling - 1)))
B_slope_int <- predictCurve(fittedCurve, x = B_grid_rt)
# pos of 1st point past cutoff
B_cutoff_pt <- match(-1, sign(B_slope_int - cutoff_int))
# should not be NA as rtMin Max is at 0.5% while cutoff is 5 or 10%
if (is.na(B_cutoff_pt)) { return(NA) }
# points left and right from pt of interest
B_key_pt <- c(B_cutoff_pt - 1, B_cutoff_pt)
# linear interpolation of exact rt
B <- stats::approx(x = B_slope_int[B_key_pt],
y = B_grid_rt[B_key_pt],
xout = cutoff_int)$y
## C is apexRT
C <- apexRT
## Peak statistic
if (statistic == "tailingFactor") { return((B - A)/(2 * (C - A))) }
if (statistic == "asymmetryFactor") { return((B - C)/(C - A)) }
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.