Nothing
#' Calculate chromatographic peak properties
#'
#' Calculate the ppm error, retention time deviation, tailing factor and asymmetry factor for each measured feature.
#'
#' @param fittedCurves (list) A list (one entry per ROI window) of \code{peakPantheR_curveFit} or NA
#' @param targetFeatTable a \code{\link{data.frame}} of compounds to target as rows. Columns: \code{cpdID} (str), \code{cpdName} (str), \code{rtMin} (float in seconds), \code{rt} (float in seconds, or \emph{NA}), \code{rtMax} (float in seconds), \code{mzMin} (float), \code{mz} (float or \emph{NA}), \code{mzMax} (float).
#' @param foundPeakTable a \code{data.frame} as generated by \code{\link{findTargetFeatures}}, with features as rows and peak properties as columns. The following columns are mandatory: \code{mzMin}, \code{mz}, \code{mzMax}, \code{rtMin}, \code{rt}, \code{rtMax}
#' @param verbose (bool) if TRUE message when NA scans are removed
#'
#' @return A \code{data.frame} with measured compounds as rows and measurements and properties as columns (see Details).
#'
#' \subsection{Details:}{
#' The returned \code{data.frame} is structured as follow:
#' \tabular{ll}{
#' found \tab was the peak found\cr
#' rt \tab retention time of peak apex (sec)\cr
#' rtMin \tab leading edge of peak retention time (sec) determined at 0.5\% of apex intensity\cr
#' rtMax \tab trailing edge of peak retention time (sec) determined at 0.5\% of apex intensity\cr
#' mz \tab weighted (by intensity) mean of peak m/z across scans\cr
#' mzMin \tab m/z peak minimum (between rtMin, rtMax)\cr
#' mzMax \tab m/z peak maximum (between rtMin, rtMax)\cr
#' peakArea \tab integrated peak area\cr
#' maxIntMeasured \tab maximum peak intensity in raw data\cr
#' maxIntPredicted \tab maximum peak intensity based on curve fit\cr
#' ppm_error \tab difference in ppm between the expected and measured m/z\cr
#' rt_dev_sec \tab difference in seconds between the expected and measured rt\cr
#' tailingFactor \tab the tailing factor is a measure of peak tailing. It is defined as the distance from the front slope of the peak to the back slope divided by twice the distance from the center line of the peak to the front slope, with all measurements made at 5\% of the maximum peak height. The tailing factor of a peak will typically be similar to the asymmetry factor for the same peak, but the two values cannot be directly converted\cr
#' asymmetryFactor \tab the asymmetry factor is a measure of peak tailing. It is defined as the distance from the center line of the peak to the back slope divided by the distance from the center line of the peak to the front slope, with all measurements made at 10\% of the maximum peak height. The asymmetry factor of a peak will typically be similar to the tailing factor for the same peak, but the two values cannot be directly converted\cr
#' }
#' }
#'
#' @examples
#' \dontrun{
#' # fittedCurve
#' cFit1 <- list(amplitude=162404.8057918259, center=3341.888,
#' sigma=0.078786133031045896, gamma=0.0018336101984172684,
#' fitStatus=2, curveModel="skewedGaussian")
#' class(cFit1) <- 'peakPantheR_curveFit'
#' cFit2 <- list(amplitude=199249.10572753669, center=3382.577,
#' sigma=0.074904415304607966, gamma=0.0011471899372353885,
#' fitStatus=2, curveModel="skewedGaussian")
#' class(cFit2) <- 'peakPantheR_curveFit'
#' input_fitCurves <- list(cFit1, cFit2)
#'
# # ROI
#' input_ROI <- data.frame(matrix(vector(), 2, 8, dimnames=list(c(), c("cpdID",
#' "cpdName", "rtMin", "rt", "rtMax", "mzMin", "mz", "mzMax"))),
#' stringsAsFactors=F)
#' input_ROI[1,] <- c("ID-1", "testCpd 1", 3310., 3344.88, 3390., 522.19, 522.2, 522.21)
#' input_ROI[2,] <- c("ID-2", "testCpd 2", 3280., 3385.58, 3440., 496.19, 496.2, 496.21)
#' input_ROI[,3:8] <- sapply(input_ROI[,3:8], as.numeric)
#'
#' # foundPeakTable
#' input_foundPeakTable <- data.frame(matrix(vector(), 2, 10, dimnames=list(c(),
#' c("found", "rtMin", "rt", "rtMax", "mzMin", "mz", "mzMax",
#' "peakArea", "maxIntMeasured", "maxIntPredicted"))),
#' stringsAsFactors=F)
#' input_foundPeakTable[1,] <- c(TRUE, 3309.758, 3346.827, 3385.410, 522.19, 522.2, 522.21,
#' 26133726, 889280, 901015)
#' input_foundPeakTable[2,] <- c(TRUE, 3345.376, 3386.529, 3428.279, 496.19, 496.2, 496.21,
#' 35472141, 1128960, 1113576)
#' input_foundPeakTable[,1] <- sapply(input_foundPeakTable[,c(1)], as.logical)
#'
#' # Run peak statistics
#' peakStatistics <- getTargetFeatureStatistic(input_fitCurves, input_ROI, input_foundPeakTable)
#' peakStatistics
#' # found rtMin rt rtMax mzMin mz mzMax peakArea maxIntMeasured maxIntPredicted
#' # 1 TRUE 3309.758 3346.827 3385.410 522.19 522.2 522.21 26133726 889280 901015
#' # 2 TRUE 3345.376 3386.529 3428.279 496.19 496.2 496.21 35472141 1128960 1113576
#' # ppm_error rt_dev_sec tailingFactor asymmetryFactor
#' # 1 0 1.947 1.015385 1.026886
#' # 2 0 0.949 1.005372 1.009304
#' }
getTargetFeatureStatistic <- function(fittedCurves, targetFeatTable, foundPeakTable, verbose=FALSE) {
## Define the peak_shape_stat() -----------------------
#
# Calculate tailing factor and asymmetry factor
#
# Tailing Factor and Asymmetry Factor following the equations from http://www.chromforum.org/viewtopic.php?t=20079
#
# @param fittedCurve A \code{peakPantheR_curveFit} for the feature of interest or NA
# @param apexRT (float) retention time in seconds of the measured peak apex
# @param rtMin (float) leading edge retention time in seconds (at 0.5%)
# @param rtMax (float) tailing edge retention time in seconds (at 0.5%)
# @param statistic (str) either \code{tailingFactor} or \code{asymmetryFactor}
# @param sampling (int) Number of points to employ when subsampling the fittedCurve between rtMin and rtMax
#
# @return Tailing factor or Asymmetry factor value
#
peak_shape_stat <- function(fittedCurve, apexRT, rtMin, rtMax, statistic="tailingFactor", sampling=250){
# Check input
if (is.na(apexRT)) { return(as.numeric(NA)) }
if (is.na(rtMin)) { return(as.numeric(NA)) }
if (is.na(rtMax)) { return(as.numeric(NA)) }
if (all(is.na(fittedCurve))) { return(as.numeric(NA)) }
# A B and C are retention time in seconds. C is RT of the apex, A is left side as x%, B is right side at x%. x% (cutoff) depends of the statistic employed.
## Find position of A and B
cutoff <- switch(statistic, tailingFactor=0.05, asymmetryFactor=0.1)
cutoff_int <- cutoff * predictCurve(fittedCurve, x=apexRT) # intensity at cutoff pt of interest
## A side
# reverse order for up slope
A_grid_rt <- seq(from=apexRT, to=rtMin, by=((rtMin-apexRT)/(sampling-1)))
A_slope_int <- predictCurve(fittedCurve, x=A_grid_rt)
# pos of 1st point past cutoff
A_cutoff_pt <- match(-1,sign(A_slope_int - cutoff_int))
# should not be NA as rtMin Max is at 0.5% while cutoff is 5 or 10%
if (is.na(A_cutoff_pt)) { return(NA) }
# points left and right from pt of interest
A_key_pt <- c(A_cutoff_pt-1, A_cutoff_pt)
# linear interpolation of exact rt
A <- stats::approx(x=A_slope_int[A_key_pt], y=A_grid_rt[A_key_pt], xout=cutoff_int)$y
## B side
B_grid_rt <- seq(from=apexRT, to=rtMax, by=((rtMax-apexRT)/(sampling-1)))
B_slope_int <- predictCurve(fittedCurve, x=B_grid_rt)
# pos of 1st point past cutoff
B_cutoff_pt <- match(-1,sign(B_slope_int - cutoff_int))
# should not be NA as rtMin Max is at 0.5% while cutoff is 5 or 10%
if (is.na(B_cutoff_pt)) { return(NA) }
# points left and right from pt of interest
B_key_pt <- c(B_cutoff_pt-1, B_cutoff_pt)
# linear interpolation of exact rt
B <- stats::approx(x=B_slope_int[B_key_pt], y=B_grid_rt[B_key_pt], xout=cutoff_int)$y
## C is apexRT
C <- apexRT
## Peak statistic
if(statistic=="tailingFactor") {
return( (B-A)/(2*(C-A)) )
}
if(statistic=="asymmetryFactor") {
return( (B-C)/(C-A) )
}
}
## ----------------------------------------------------
stime <- Sys.time()
## Check input
if (dim(targetFeatTable)[1] != dim(foundPeakTable)[1]) {
stop('Number of features in "targetFeatTable" (', dim(targetFeatTable)[1],') and "foundPeakTable" (', dim(foundPeakTable)[1], ') do not match!')
}
if (length(fittedCurves) != dim(foundPeakTable)[1]) {
stop('Number of fitted curves in "fittedCurves" (', length(fittedCurves),') and number of features in "foundPeakTable" (', dim(foundPeakTable)[1], ') do not match!')
}
## Calculate the statistics
peakStat <- data.frame(matrix(vector(), dim(targetFeatTable)[1], 4, dimnames=list(c(), c("ppm_error", "rt_dev_sec", "tailingFactor", "asymmetryFactor"))), stringsAsFactors=F)
for (i in 1:dim(targetFeatTable)[1]) {
# If the feature wasn't found we cannot work with it
if (foundPeakTable$found[i]) {
# ppm_error
if (!is.na(targetFeatTable$mz[i])) {
peakStat$ppm_error[i] <- (abs(foundPeakTable$mz[i] - targetFeatTable$mz[i])/targetFeatTable$mz[i])*1E6
}
# rt_dev_sec
if (!is.na(targetFeatTable$rt[i])) {
peakStat$rt_dev_sec[i] <- abs(foundPeakTable$rt[i] - targetFeatTable$rt[i])
}
# Tailing Factor
peakStat$tailingFactor[i] <- peak_shape_stat(fittedCurves[[i]], foundPeakTable$rt[i], foundPeakTable$rtMin[i], foundPeakTable$rtMax[i], statistic = "tailingFactor")
# Asymmetry Factor
peakStat$asymmetryFactor[i] <- peak_shape_stat(fittedCurves[[i]], foundPeakTable$rt[i], foundPeakTable$rtMin[i], foundPeakTable$rtMax[i], statistic = "asymmetryFactor")
}
}
## group the results and return
finalTable <- cbind.data.frame(foundPeakTable, peakStat)
etime <- Sys.time()
if (verbose) {
message('Peak statistics done in: ', round(as.double(difftime(etime,stime)),2),' ',units( difftime(etime,stime)))
}
return( finalTable )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.