Nothing
#' @title EvaluatePairsFromXCMSSet.
#'
#' @description
#' \code{EvaluatePairsFromXCMSSet} will analyze an xcmsSet result for mass pairs (mz1, mz2) with changes due to any 13C incorporation.
#'
#' @details
#' Using 'APCI' as method assumes that (i) you analyze TMS-derivatized compounds and (ii) your MS resolution does not allow to seperate Si and C isotopes
#' but reportes an intermediate mass as m/z. In this case you will find carbon isotopes below there expected masses, i.e. M+1 would be 1.001mDa apart from M+0 instead of 1.003.
#' The effect is increased with isotope number, i.e. M+6 will be ~20mDa below the expected value. Hence, selecting method 'APCI' will combine your selected dmz
#' with a allowed deviation due to Si-isotope caused mass shifts. Use 'ESI' if you are not sure if this effect takes place in your settings.
#'
#' @param xg xcmsSet object with group information.
#' @param tp Timepoint information for all samples (obviously required, internally converted to factor).
#' @param gr Group information for all samples, e.g. different genotypes or concentrations (optional, factor).
#' @param dmz Allowed mass deviation in Da.
#' @param drt Allowed rt deviation in time units of xcmsSet (usually seconds) to test for candidates.
#' @param mz_iso Mass defect of the isotope under investigation.
#' @param n Number of maximal incorporated carbons to test.
#' @param method Currently APCI or ESI. If APCI, dmz will be modified depending on n (see details).
#' @param specific_row A single row from groupval(xg) to process.
#' @param testing Stop in function using browser() if specific_row is specified; can be a isotope number, i.e. 3 will stop at third isotope.
#' @param silent Suppress warnings and console output if TRUE.
#'
#' @return
#' A dataframe with all observable pairs within the provided xcmsSet peak list including mean group intensities and P values.
#'
#' @examples
#' # Please use examples from previous versions as xcms (and xcms objects) are
#' # no longer supported during CRAN checks leading to package rejection
#' # if included (and I do not know a work around).
#' \dontrun{
#' load(xcms_cand)
#' head(xcms_cand[order(xcms_cand$P), ])
#' }
#'
#' @importFrom plyr ldply
#'
#' @export
#'
EvaluatePairsFromXCMSSet <- function(xg = NULL, tp = NULL, gr = NULL, drt = 1, dmz = 0.025, mz_iso = 1.00335, n = 6, method = c("APCI", "ESI")[1], specific_row = NULL, testing = FALSE, silent = FALSE) {
# check if grouped xcmsSet provided and extract data from object
# [20190618 JL needed to remove class information regarding xcms objects for CRAN]
# stopifnot(class(xg)=="xcmsSet")
# [20190514 JL xcms needs to be put to suggest due to mzR on request by CRAN --> replaced xcms::groupval by quick and dirty own function]
gv <- groupval(xg = xg)
# if (requireNamespace("xcms", quietly = TRUE)) {
# gv <- xcms::groupval(xg, value = "maxo", method="maxint")
# } else {
# warning("Package xcms not available.")
# }
xg <- xg@groups
utils::data("mz_shift_corrector", package = "HiResTEC")
mz_shift_corrector <- mz_shift_corrector[[method]][1:(n + 1)]
# check if time (and group) information provided and prepare
if (is.numeric(tp)) tp <- factor(tp)
stopifnot(!is.null(tp))
stopifnot(length(levels(tp)) >= 2)
if (is.null(gr)) gr <- gl(1, ncol(gv)) else gr <- factor(gr)
inter <- interaction(tp, gr, drop = TRUE, sep = "_")
tp <- as.numeric(as.character(tp))
# check if processing is limited to specific row
if (is.null(specific_row)) specific_row <- 1:nrow(xg)
stopifnot(all(specific_row %in% 1:nrow(xg)))
# find all pairs
out <- plyr::ldply(specific_row, function(i) {
# filter for coeluting peaks
rtok <- abs(xg[i, "rtmed"] - xg[, "rtmed"]) < drt # table(rtok)
# specify potential mz2 candidate masss which may pair with mz1
mz_cand <- xg[i, "mzmed"] + c(1:n * mz_iso)
# check each candidate for being present
plyr::ldply(mz_cand, function(x) {
# filter for fitting masses within coeluting peaks
# mzok <- which(abs(x-xg[which(rtok),"mzmed"]) < dmz)
# browser()
ni <- round(x - xg[i, "mzmed"])
md <- xg[which(rtok), "mzmed"] - x
# upper boud defined by dmz; lower bound defined by dmz+correction for Si-Isotopes
mzok <- which(md < dmz & md > -(mz_shift_corrector[ni] + dmz))
if (length(mzok) >= 1) {
if (length(mzok) >= 2) {
if (!silent) warning(paste("\nFound more than one fitting mz2 for line", i, "and mz2", x, " and keep closest by dmz."))
mzok <- mzok[which.min(abs(x - xg[which(rtok)[mzok], "mzmed"]))]
}
j <- which(rtok)[mzok]
# get peaks and ratios
pks <- gv[c(i, j), ]
# ratios <- pks[2,]/pks[1,]
# 'ratios' are PseudoEnrichments now!!!
ratios <- apply(pks, 2, CalcEnrichment)
# evaluate intensity group means
int_mz1 <- data.frame(lapply(split(pks[1, ], inter), function(x) {
ifelse(any(is.finite(x)), mean(x, na.rm = T), NA)
}), check.names = FALSE)
colnames(int_mz1) <- paste0("I1_", colnames(int_mz1))
int_mz2 <- data.frame(lapply(split(pks[2, ], inter), function(x) {
ifelse(any(is.finite(x)), mean(x, na.rm = T), NA)
}), check.names = FALSE)
colnames(int_mz2) <- paste0("I2_", colnames(int_mz2))
# set a limit for peak ratio at t=0 (to avoid wrong candidates due to [M+] vs [M+H] ratios)
if (any(tp == 0)) {
test_int_iso <- int_mz1 / int_mz2
flt <- grep("I1_0", names(test_int_iso))
test_int_iso <- any(test_int_iso[flt] > 0.7, na.rm = TRUE) | all(is.na(test_int_iso[flt]))
# old version
# test_int_iso <- any((int_mz1/int_mz2)>0.7, na.rm=TRUE)
} else {
test_int_iso <- TRUE
}
# compute peak distance (dRT)
dRT <- abs(xg[i, "rtmed"] - xg[j, "rtmed"])
# stop process here if requested by user via option 'testing'
if (is.numeric(testing) && testing == which(mz_cand == x)) testing <- TRUE
browser(expr = !is.null(specific_row) && is.logical(testing) && testing)
# compute ANOVA model for 'ratios ~ time * group'
# if (test_int_iso & sum(sapply(split(ratios, inter), function(y) { any(is.finite(y)) }))>=0.5*length(levels(inter))) {
if (test_int_iso & sum(sapply(split(ratios, inter), function(y) {
all(is.finite(y))
})) >= floor(0.5 * length(levels(inter)))) {
# check if peak ratios change over time using a linear model with interaction
if (length(levels(gr)) > 1) {
# if groups are specified use:
ratios_lm <- try(lm(ratios ~ tp * gr), silent = TRUE)
} else {
# if only timepoints are specified use:
ratios_lm <- try(lm(ratios ~ tp), silent = TRUE)
}
# browser()
if (!attr(ratios_lm, "class") == "lm") {
if (!silent) cat(paste0("\ni=", i, " x=", x, " lm failed -> set P=1 and dR=0"))
P <- 1
dR <- 0
} else {
alm <- anova(ratios_lm)
alm_tp <- grep("tp", rownames(alm))
if (length(alm_tp) >= 1) {
P <- min(alm[alm_tp, "Pr(>F)"], na.rm = T)
# [ToDo] think about best option to calculate dR
# dR <- 100*diff(range(sapply(split(ratios, inter), median,na.rm=T)))
# dR <- coef(ratios_lm)["tp"]*diff(range(tp))*100
# browser()
# taking only the extreme time points avoids partially FalsePositives
# filt <- tp %in% range(tp)
# dR <- diff(sapply(split(ratios[filt], tp[filt]), median, na.rm=T))
dR <- round(median(ratios[tp == max(tp)], na.rm = T) - median(ratios[tp == min(tp)], na.rm = T), 2)
} else {
# no time dependent P-value could be found using a linear model
# a reason could be that mz2 was n.d. at t=0 and mz1 n.d. at t=1
P <- 1
dR <- 0
}
}
return(data.frame("peak_idx" = i, "rt" = xg[i, "rtmed"], "mz1" = xg[i, "mzmed"], "mz2" = xg[j, "mzmed"], "P" = P, "dR" = dR, "dRT" = dRT, int_mz1, int_mz2))
} else {
return(data.frame("peak_idx" = i, "rt" = xg[i, "rtmed"], "mz1" = xg[i, "mzmed"], "mz2" = xg[j, "mzmed"], "P" = NA, "dR" = NA, "dRT" = dRT, int_mz1, int_mz2))
}
} else {
dummy <- data.frame(as.list(rep(NA, 2 * length(levels(inter)))))
colnames(dummy) <- paste0(rep(c("I1_", "I2_"), each = length(levels(inter))), levels(inter))
return(data.frame("peak_idx" = i, "rt" = xg[i, "rtmed"], "mz1" = xg[i, "mzmed"], "mz2" = NA, "P" = NA, "dR" = NA, "dRT" = NA, dummy)[-1, , drop = F])
}
})
}, .progress = ifelse(silent, "none", "text"))
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.