#' potentially problematic peak identification
#' @param object an 'AdductQuantif' class object
#' @param nTimesMad numeric number of median absolute
#' deviations to identify potential
#' problem peaks.
#' @param metrics character string column names of metrics with
#' which to identify
#' potential problem peaks or a list with individual nTimesMad arguments
#' and with
#' list element names corresponding to column names of metrics.
#' @param ... further arguments to \code{\link{mad}}
#' @usage probPeaks(object = NULL, nTimesMad = 3,
#' metrics = c("nMadDotProdDistN",
#' "nMadSkewness", "nMadKurtosis", "nMadRtGroupDev",
#' "nMadPeakArea", "duplicates"))
#' @return 'AdductQuantif' class object
probPeaks <- function(object = NULL,
nTimesMad = 3,
metrics = c(
"nMadDotProdDistN",
"nMadSkewness",
"nMadKurtosis",
"nMadRtGroupDev",
"nMadPeakArea",
"duplicates"
)) {
# error handling
if (is.null(object)) {
stop("argument object is missing with no default.")
} else if (!is(object, 'AdductQuantif')) {
stop("argument object is not AdductQuantif class object.")
}
allMetricColNames <- c(
"nMadDotProdDistN",
"nMadSkewness",
"nMadKurtosis",
"nMadRtGroupDev",
"nMadPeakArea",
"duplicates"
)
if (!is.list(metrics)) {
if (!all(metrics %in% allMetricColNames)) {
stop(
"metrics argument name(s) must be one
or a combination of:\n1. nMadDotProdDistN\n2. nMadSkewness\n3.
nMadKurtosis\n4. nMadRtGroupDev\n5. nMadPeakArea\n"
)
}
} else {
if (!all(names(metrics) %in% allMetricColNames)) {
stop(
"metrics list name(s) must be one or a combination of:\n1.
nMadDotProdDistN\n2.
nMadSkewness\n3. nMadKurtosis\n4. nMadRtGroupDev\n5.
nMadPeakArea\n6. duplicates\n"
)
}
nTimesMad <- as.numeric(unlist(metrics))
metrics <- names(metrics)
}
indxTmp <- as.numeric(peakQuantTable(object)[, "peakArea"]) != 0
resultsTmp <- matrix(0, ncol = 11, nrow = length(indxTmp))
colnames(resultsTmp) <- c(
"dotProdDistN",
"skewness",
"kurtosis",
"nMadRtGroupDev",
"nMadPeakArea",
"nMadDotProdDistN",
"nMadSkewness",
"nMadKurtosis",
"nMadDuplicates",
"possOutPeak",
"peakOutMetrics"
)
quantTableTmp <- peakQuantTable(object)
# remove previous results if necessary
quantTableTmp <-
quantTableTmp[, setdiff(colnames(quantTableTmp),
colnames(resultsTmp))]
# median absolute deviation of the rt deviation
madRtDevTmp <-
tapply(abs(as.numeric(quantTableTmp[indxTmp, "rtDev"])),
quantTableTmp[indxTmp,
"featureName"], function(feat)
mad(feat))
matchIndx <-
match(quantTableTmp[, "featureName"], names(madRtDevTmp))
# calculate median peak area for the group
peakAreaMed <-
tapply(as.numeric(quantTableTmp[indxTmp, "peakArea"]),
quantTableTmp[indxTmp,
"featureName"], median)
peakAreaMed <- peakAreaMed[matchIndx]
# deviation of peak area from the group median
peakAreaDev <-
as.numeric(quantTableTmp[, "peakArea"]) - peakAreaMed
madPeakAreaTmp <-
tapply(as.numeric(quantTableTmp[indxTmp, "peakArea"]),
quantTableTmp[indxTmp,
"featureName"], function(feat)
mad(feat))
madRtDevTmp <- madRtDevTmp[matchIndx]
madPeakAreaTmp <- madPeakAreaTmp[matchIndx]
for (i in which(indxTmp)) {
# setTxtProgressBar(pb, i)
peakRangeTmp <- peakIdData(object)[[i]]$peakRange
peakIntIndx <-
which(peakRangeTmp$troughs == "intPeakTrough")
if (length(peakIntIndx) == 0) {
next
}
peakIntTmp <-
peakRangeTmp[min(peakIntIndx):max(peakIntIndx), ,
drop = FALSE]
# calculate guassian
apexTmp <- which(peakIntTmp$peaks == "intPeak")
seqTmp <- seq(-4, 4, length = nrow(peakIntTmp))
normGaussian <- dnorm(seqTmp, sd = 2)
# calc inner product autoscaled normal and integrated peak
normGaussian <- normGaussian / max(normGaussian) * 100
scaledPeakIntTmp <-
peakIntTmp$intensity / max(peakIntTmp$intensity) * 100
# inner prod
resultsTmp[i, "dotProdDistN"] <-
as.vector((scaledPeakIntTmp %*%
normGaussian) /
(sqrt(sum(
scaledPeakIntTmp ^ 2
)) *
sqrt(sum(
normGaussian ^ 2
))))
# skewness
n <- length(scaledPeakIntTmp)
resultsTmp[i, "skewness"] <- (sum((
scaledPeakIntTmp -
mean(scaledPeakIntTmp)
) ^ 3) / n) /
(sum((
scaledPeakIntTmp -
mean(scaledPeakIntTmp)
) ^ 2) / n) ^ (3 / 2)
# kurtosis
resultsTmp[i, "kurtosis"] <- n * sum((scaledPeakIntTmp -
mean(scaledPeakIntTmp)) ^ 4) /
(sum((scaledPeakIntTmp - mean(scaledPeakIntTmp)
) ^ 2) ^ 2)
# number of median absolute deviations from the Rt deviation
resultsTmp[i, "nMadRtGroupDev"] <-
abs(as.numeric(quantTableTmp[i,
"rtDev"])) / madRtDevTmp[i]
# number of median absolute deviations from the peak area
resultsTmp[i, "nMadPeakArea"] <-
abs(peakAreaDev[i]) / madPeakAreaTmp[i]
}
# madness skewness
madSkewness <- mad(resultsTmp[indxTmp, "skewness"])
devMedianSkewness <- ifelse(resultsTmp[, "skewness"] == 0, 0,
abs(abs(resultsTmp[,
"skewness"]) - median(
resultsTmp[
indxTmp, "skewness"])))
resultsTmp[, "nMadSkewness"] <- devMedianSkewness / madSkewness
# mad kurtosis
madKurtosis <- mad(resultsTmp[indxTmp, "kurtosis"])
devMedianKurtosis <- ifelse(resultsTmp[, "kurtosis"] == 0, 0,
abs(resultsTmp[,
"kurtosis"] - median(
resultsTmp[
indxTmp, "kurtosis"])))
resultsTmp[, "nMadKurtosis"] <- devMedianKurtosis / madKurtosis
# mad dotprod
madDotProd <- mad(resultsTmp[indxTmp, "dotProdDistN"])
devMedianDotProd <-
ifelse(resultsTmp[, "dotProdDistN"] == 0, 0,
abs(resultsTmp[,
"dotProdDistN"] - median(resultsTmp[
indxTmp, "dotProdDistN"])))
resultsTmp[, "nMadDotProdDistN"] <- devMedianDotProd / madDotProd
# duplicate ratios
nPeakGrs <- length(unique(quantTableTmp[, "featureName"]))
seqTmp <- seq_len(nrow(quantTableTmp))
nSamps <- length(seqTmp) / nPeakGrs
if (nSamps %% 2 != 0) {
stop(
"Cannot compute duplicate injection peak area ratios
as there are an odd number of samples (n=",
nSamps,
").\n"
)
}
splSeq <- rep(c(rep(1, nPeakGrs), rep(2, nPeakGrs)), nSamps)
inj1 <- seqTmp[splSeq == 1]
inj2 <- seqTmp[splSeq == 2]
peakRatios <- as.numeric(quantTableTmp[inj1, "peakArea"]) /
as.numeric(quantTableTmp[inj2,
"peakArea"])
# possible problem peaks logical
possOutPeaksTmp <-
apply(resultsTmp[, allMetricColNames, drop = FALSE],
2, function(colTmp)
colTmp >=
nTimesMad)
resultsTmp[, "possOutPeak"] <-
(rowSums(possOutPeaksTmp[, metrics,
drop = FALSE]) >=
1) * 1
message(
"Summary nPeaks > ",
nTimesMad,
" * median absolute deviation (mad):\n\n",
paste0(metrics, " ",
colSums(possOutPeaksTmp[, metrics, drop = FALSE]), "\n")
)
par(mfrow = c(3, 2))
for (j in seq_len(5)) {
colNameTmp <- colnames(resultsTmp)[j]
yTmp <- resultsTmp[indxTmp, colNameTmp]
orderTmp <- order(yTmp)
possProbIndx <- possOutPeaksTmp[indxTmp, j] + 1
plot(
yTmp[orderTmp],
ylab = colNameTmp,
col = possProbIndx[orderTmp],
main = paste0(
ifelse(colnames(possOutPeaksTmp)[j] %in%
metrics, "***", ""),
colNameTmp,
" (median = ",
round(median(yTmp), 2),
")"
),
col.main = ifelse(
colnames(possOutPeaksTmp)[j] %in% metrics,
"blue",
"black"
)
)
}
plot(
c(0, 1),
c(0, 1),
ann = FALSE,
bty = "n",
type = "n",
xaxt = "n",
yaxt = "n"
)
text(
x = 0.5,
y = 0.5,
paste("*** selected metrics used
to identify possible problem peaks"),
cex = 1.3,
col = "blue"
)
text(
x = 0.5,
y = 0.3,
paste("peaks > ", nTimesMad,
" * MAD of each metric"),
cex = 1.3,
col = "red"
)
message(
"based on a maximum number of median
absolute deviation(s) of ",
nTimesMad,
":\n\n",
sum(resultsTmp[, "possOutPeak"]),
" peaks ",
"(",
round(sum(resultsTmp[,
"possOutPeak"]) / sum(indxTmp) * 100, 1),
"%)",
" of a total of ",
sum(indxTmp),
" peaks quantified were potentially non-gaussian,
long tailed, or deviant from the peak group retention
time or peak area.\n"
)
nMadColNames <-
colnames(resultsTmp)[grep("nMad", colnames(resultsTmp))]
resultsTmp[, "peakOutMetrics"] <-
apply(resultsTmp[, nMadColNames], 1,
function(rowTmp) {
paste0(nMadColNames[rowTmp >= nTimesMad], collapse = "; ")
})
peakQuantTable(object) <-
cbind(resultsTmp[, c("possOutPeak",
"peakOutMetrics")], quantTableTmp,
resultsTmp[, c(
"dotProdDistN",
"skewness",
"kurtosis",
"nMadRtGroupDev",
"nMadPeakArea",
"nMadDotProdDistN",
"nMadSkewness",
"nMadKurtosis"
)])
return(object)
} # end function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.