Nothing
#' @title Interpreting High-Res-MS spectra.
#'
#' @description \code{InterpretMSSpectrum} will read, evaluate and plot a deconvoluted
#' mass spectrum (mass*intensity pairs) from either TMS-derivatized GC-APCI-MS data
#' or ESI+/- data. The main purpose is to identify the causal metabolite or more
#' precisely the sum formula of the molecular peak by annotating and interpreting
#' all visible fragments and isotopes.
#'
#' @details For further details refer to and if using please cite
#' Jaeger et al. (2016, <doi:10.1021/acs.analchem.6b02743>) in case of GC-APCI and
#' Jaeger et al. (2017, <doi:10.1002/rcm.7905>) for ESI data. The Interpretation is
#' extremely speed up if 'formula_db' (a predetermined database of potential sum
#' formulas) is provided within the function call. Within the package you may use
#' \link{GenerateMetaboliteSQLiteDB} to prepare one for yourself or request
#' a download link from \email{jan.lisec@bam.de} as de-novo calculation for a wide
#' mass range may take several days.
#'
#' @param spec A 2-column matrix of mz/int pairs. If spec=NULL then \code{InterpretMSSpectrum}
#' tries to read data from clipboard (i.e. two columns copied from an Excel spreadsheet).
#' @param precursor The ion (m/z) from spec closest to this mass will be considered as
#' precursor (can be nominal, i.e. if precursor=364 then 364.1234 would be selected from
#' spectrum if it is closest).
#' @param correct_peak For testing purposes. A character in the form of "name, formula, mz"
#' to evaluate spectra against. Note! Separating character is ', '.
#' @param met_db A metabolite DB (e.g. GMD or internal) can be provided to search for
#' candidates comparing M+H ions (cf. Examples).
#' @param typical_losses_definition A file name (e.g. D:/BuildingBlocks_GCAPCI.txt) from
#' where to load relevant neutral losses (cf. Details). Alternatively an data frame with
#' columns 'Name', 'Formula' and 'Mass'.
#' @param silent Logical. If TRUE no plot is generated and no output except final candidate
#' list is returned.
#' @param dppm Specifies ppm error for Rdisop formula calculation.
#' @param param Either a list of parameters or a character shortcut indicating a predefined
#' set. Currently 'APCIpos', 'ESIpos' and 'ESIneg' are supported as shortcuts. If a list
#' is provided, list elements will overwrite similar named entries from the list that
#' can be acessed using utils::data(param.default, package = "InterpretMSSpectrum").
#' @param formula_db A pre calculated database of sum formulas and their isotopic fine
#' structures can be used to extremely speed up the function.
#'
#' @return An annotated plot of the mass spectrum and detailed information within the
#' console. Main result, list of final candidate formulas and their putative fragments,
#' will be returned invisibly.
#'
#' @examples
#' # load test data
#' utils::data(apci_spectrum)
#'
#' # provide information of a correct peak (if you know) as a character containing
#' # name, formula and ion mass -- separated by ', ' as shown below
#' cp <- "Glutamic acid (3TMS), C14H33NO4Si3, 364.1790"
#'
#' # provide database of known peaks and correct peak
#' mdb <- data.frame(
#' "Name" = c("Glutamic acid (3TMS)", "other peak with same sum formula"),
#' "Formula" = c("C14H33NO4Si3", "C14H33NO4Si3"),
#' "M+H" = c(364.179, 364.179), stringsAsFactors = FALSE, check.names = FALSE
#' )
#'
#' # provide a database of precalculated formulas to speed up the process
#' fdb <- system.file("extdata", "APCI_min.db", package = "InterpretMSSpectrum")
#'
#' # apply function providing above arguments (dppm is set to 0.5 to reduce run time)
#' InterpretMSSpectrum(spec=apci_spectrum, correct_peak=cp, met_db=mdb, formula_db=fdb)
#'
#' @export
#'
#' @importFrom enviPat check_chemform mergeform
InterpretMSSpectrum <- function(
spec=NULL,
precursor=NULL,
correct_peak=NULL,
met_db=NULL,
typical_losses_definition=NULL,
silent=FALSE,
dppm=3,
param="APCIpos",
formula_db=NULL
) {
# check for Rdisop to be able to keep it in suggested packages
if (!requireNamespace("Rdisop", quietly = TRUE)) {
message("\nThe use of this function requires package 'Rdisop'.\nPlease install with 'BiocManager::install(\"Rdisop\")\'\n")
return(NULL)
}
# POTENTIAL PARAMETERS that could be allowed for the user to modify
# !!! carefull limiting (to maximum of 5 peaks) is experimental
max_isomain_peaks <- 5
stopifnot(is.list(param) | param %in% c("APCIpos","ESIpos","ESIneg","default"))
# param.default <- list("ionization"="ESI",
# "ionmode"="positive",
# "allowed_elements"=c("C","H","N","O","P","S","Cl","Na","K"), # for Rdisop formula generation
# "maxElements"="P4S4Na2K2", # for Rdisop formula generation
# "minElements"="C1",
# "substitutions"=data.frame("s1"=c("H1","H1","Na1","Na1","K1"),"s2"=c("Na1","K1","K1","H1","H1")),
# "quick_isos"=TRUE,
# "score_cutoff"=0.5,
# "neutral_loss_cutoff"=2,
# "ruleset"="none")
# save(param.default, file = "Data/param.default.rda", compress = "gzip")
# load default parameters
# using a parameter set is an attempt to summarize a number of parameters useful for either GC-APCI or LC-ESI
# however, to be more flexible this parameterset can be provided directly as a list
param.default <- NULL
utils::data(param.default, envir=environment(), package = "InterpretMSSpectrum")
# now modify values of the default parameter set according to the provided option in 'param'
if (is.list(param)) {
for (n in names(param)) param.default[[n]] <- param[[n]]
} else {
if (param=="APCIpos") {
param.default$"ionization" <- "APCI"
param.default$"allowed_elements" <- c("C","H","N","O","P","S","Si")
param.default$"maxElements" <- "P2S2"
param.default["substitutions"] <- list(NULL)
param.default$"neutral_loss_cutoff" <- 0.5
param.default$"quick_isos" <- FALSE
param.default$"ruleset" <- "APCI"
}
if (substr(param,1,3)=="ESI") {
param.default$"allowed_elements" <- c("C","H","N","O","P","S","Cl")
param.default$"maxElements" <- "P4S4"
param.default$"substitutions" <- data.frame("s1"=c("H1","H1","Na1"),"s2"=c("Na1","K1","K1"))
param.default$"ruleset" <- "ESI"
}
if (substr(param,nchar(param)-2,nchar(param))=="pos") {
param.default$"ionmode" <- "positive"
} else {
param.default$"ionmode" <- "negative"
}
}
param <- param.default
param$"isotopes" <- as.matrix(param$"allowed_elements",ncol=1)
param$"iso_mass" <- ifelse(param$"ionization"=="APCI",1.0015,1.003355)
# Input CHECKS
# correct_peak
if (!is.null(correct_peak)) {
test <- FALSE
#find_sep <- gregexpr(", ", correct_peak)[[1]]
tmp <- strsplit(correct_peak,", ")[[1]]
if (length(tmp)<3) test <- TRUE; msg <- "cant split string into 3 components"
if (length(tmp)>3) tmp <- c(paste(tmp[1:(length(tmp)-2)],collapse=","),tmp[-(1:(length(tmp)-2))])
if (!test && is.na(as.numeric(tmp[3]))) test <- TRUE; msg <- "cant convert 3rd component to numeric"
#if (!test && enviPat::check_chemform(isotopes=data.frame("element"=allowed_elements,"isotope"="X","mass"=0,"abundance"=0), tmp[2])$warning) test <- TRUE
if (!test && enviPat::check_chemform(isotopes=param$isotopes, tmp[2])$warning) {
test <- TRUE
msg <- "formula check of 2nd component failed"
} else {
tmp[2] <- enviPat::check_chemform(isotopes=param$isotopes, tmp[2])$new_formula
# store formula for later use seperately
fml <- enviPat::mergeform(tmp[2], "H1")
correct_peak <- paste(tmp, collapse=", ")
}
if (test) {
correct_peak <- NULL
if (!silent) warning("InterpretMSSpectrum: correct_peak set to NULL as it didn't pass QC. (", msg, ")")
}
}
# met_db
if (!is.null(met_db)) {
test <- FALSE
colN <- grep("[Nn]ame", colnames(met_db))[1]; if (length(colN)==1) colnames(met_db)[colN] <- "Name"
colF <- grep("[Ff]ormula", colnames(met_db))[1]; if (length(colF)==1) colnames(met_db)[colF] <- "Formula"
colM <- which(colnames(met_db) %in% c("M+H","mz"))[1]; if (length(colM)==1) colnames(met_db)[colM] <- "M+H"
if (!all(c("Name","Formula","M+H") %in% colnames(met_db))) {
test <- TRUE
} else {
met_db <- met_db[,c("Name","Formula","M+H"),drop=FALSE]
met_db[,"Name"] <- as.character(met_db[,"Name"])
met_db[,"Formula"] <- as.character(met_db[,"Formula"])
met_db[,"M+H"] <- as.numeric(met_db[,"M+H"])
met_db <- met_db[!is.na(met_db[,"M+H"]),,drop=FALSE]
if (nrow(met_db)==0) test <- TRUE else met_db <- met_db[!(is.na(met_db[,"Formula"]) | met_db[,"Formula"]==""),,drop=FALSE]
if (nrow(met_db)==0) test <- TRUE else met_db <- met_db[!enviPat::check_chemform(isotopes=param$isotopes, met_db[,"Formula"])$warning,,drop=FALSE]
if (nrow(met_db)==0) test <- TRUE
}
if (test) {
met_db <- NULL
if (!silent) warning("InterpretMSSpectrum: met_db set to NULL as it didn't pass QC.")
}
}
# data_base
sf_db <- FALSE
if (!is.null(formula_db)) {
if (is.character(formula_db) && length(formula_db)==1 && file.exists(formula_db)) {
check_pkg <- sapply(c("RSQLite", "DBI"), requireNamespace, quietly = TRUE)
if (!all(check_pkg)) {
msg <- paste0(
"The use a predefined Database requires package", ifelse(sum(!check_pkg)>1, "s", ""),
paste(names(check_pkg)[!check_pkg], collapse=", "),
". Please install. Running with 'formula_db'=NULL."
)
formula_db <- NULL
} else {
db_con <- DBI::dbConnect(RSQLite::SQLite(), formula_db)
sf_db <- ifelse(class(db_con)=="SQLiteConnection", TRUE, FALSE)
}
}
# if (prod(dim(formula_db))>1 && all(c("Formula","Mass","m0","a0") %in% colnames(formula_db))) {
# # allow data.frame in parallel to SQLite DB ???
# }
}
# load neutral loss table
if (is.null(typical_losses_definition)) {
if (param$ionization=="APCI") {
neutral_losses_APCI <- NULL
utils::data(neutral_losses_APCI, envir=environment(), package = "InterpretMSSpectrum")
neutral_losses <- neutral_losses_APCI
}
if (param$ionization=="ESI") {
neutral_losses_ESI <- NULL
utils::data(neutral_losses_ESI, envir=environment(), package = "InterpretMSSpectrum")
neutral_losses <- neutral_losses_ESI
}
} else {
if (length(typical_losses_definition)==1 && is.character(typical_losses_definition) && file.exists(typical_losses_definition)) {
neutral_losses <- utils::read.table(typical_losses_definition, sep="\t", header=T, dec=",", as.is=T)
} else {
neutral_losses <- typical_losses_definition
}
}
# ensure proper formula in neutral_loss table (for later add/sub-molecule functions)
neutral_losses[,"Formula"] <- enviPat::check_chemform(isotopes=param$isotopes, neutral_losses[,"Formula"])[,"new_formula"]
# internal functions
ReadSpecClipboard <- function() {
# source could be Excel (German/English) or DA directly
spec <- readLines("clipboard")
spec <- gsub("\t"," ",spec) # replace Tabs
if (length(grep("[^[:digit:],/. ]", spec[1]))==1) spec <- spec[-1] # strip header if present
spec <- gsub(",",".",spec) # replace Colons
spec <- gsub(" +$","",spec) # trim white space end
spec <- gsub("^ +","",spec) # trim white space start
spec <- t(as.matrix(sapply(spec, function(x) { as.numeric(strsplit(x," ")[[1]]) }))) # convert to numeric matrix
if (ncol(spec)>=3) spec <- spec[,-1]
return(spec)
}
GetFragmentData <- function(M0=NULL, spec=NULL, n=2, iso_mass=1.003355) {
# try to get reasonable isotope peaks for M0 from spectrum
p <- sapply(0:n, function(dmz) {
tmp <- which(abs((M0+dmz*iso_mass)-spec[,1])<0.01)
if (length(tmp)>1) {
tmp <- which.min(abs((M0+dmz*iso_mass)-spec[,1]))
}
return(ifelse(length(tmp)==1,tmp,NA))
})
if (any(is.na(p))) p <- p[1:(min(which(is.na(p)))-1)]
frag <- rbind(spec[p,1], spec[p,2]/sum(spec[p,2], na.rm=T))
attr(frag, "M0") <- M0
return(frag)
}
EvaluateFragment <- function(frag=NULL, dppm=3, param = NULL, silent=FALSE, sf_db=NULL) {
# we need to correct our observed fragment data with the mass of an electron (charge) being +/- 0.00055 depending on positive/negative mode
em <- 0.00055
frag[1,] <- frag[1,]+ifelse(param$ionmode=="positive",1,-1)*em
#if (round(frag[1,1])==364) browser()
if (sf_db) {
# data base approach --> much faster compared to default solution below
dmz <- 0.0005+frag[1,1]*dppm/10^6
dbq <- DBI::dbSendQuery(db_con, "SELECT * FROM sfdb WHERE Mass > (:x) AND Mass < (:y);", data.frame(x=frag[1,1]-dmz, y=frag[1,1]+dmz))
out <- DBI::dbFetch(dbq, -1)
DBI::dbClearResult(dbq)
if (nrow(out)>=1) {
# adaptive algorithm to compute isotopic pattern on the fly and fill sf_db over time...
miss_iso <- is.na(out[,"m0"])
if (any(miss_iso)) {
#browser()
out[miss_iso,4:11] <- plyr::ldply(out[miss_iso,"Formula"], function(fml) {
matrix(t(GetIsotopeDistribution(fml=fml, res=30000, n=3, ele_vec = param$allowed_elements)), nrow=1)
})
#sf_db[mcand[miss_iso],4:11] <<- out[miss_iso,4:11]
}
# we predetermined 3 isotopes so we can compare at max 3 even if we find more within our spectrum...
max_iso <- min(c(ncol(frag),3))
out[,"Score"] <- sapply(1:nrow(out), function(j) {
the <- matrix(unlist(c(out[j,4:(4+max_iso-1)],out[j,8:(8+max_iso-1)])),nrow=2,byrow=TRUE)
the[2,] <- the[2,]/sum(the[2,])
mScore(obs=frag[,1:max_iso,drop=FALSE], the=the, dppm=dppm)
})
out <- out[order(out[,"Score"], decreasing = TRUE),c("Formula","Score","Valid","Mass"),drop=F]
} else {
out <- data.frame("Formula"=I(character(0)),"Score"=numeric(0),"Valid"=character(0),"Mass"=numeric(0))
}
attr(out, "M0") <- attr(frag, "M0")
} else {
# de novo calculation
if (length(frag[1,])>=2) {
molecules <- Rdisop::decomposeIsotopes(frag[1,], frag[2,], mzabs=0.0005, ppm=dppm, z=1, maxisotopes=1+length(frag[1,]), elements=Rdisop::initializeElements(param$allowed_elements), minElements=param$minElements, maxElements=param$maxElements)
} else {
molecules <- Rdisop::decomposeMass(frag[1,], mzabs=0.0005, ppm=dppm, z=1, maxisotopes=1, elements=Rdisop::initializeElements(param$allowed_elements), minElements=param$minElements, maxElements=param$maxElements)
}
if (is.null(molecules)) {
out <- data.frame("Formula"=I(character(0)),"Score"=numeric(0),"Valid"=character(0),"Mass"=numeric(0))
attr(out, "M0") <- attr(frag, "M0")
} else {
out <- data.frame("Formula"=I(molecules$formula), "Score"=molecules$score, "Valid"=molecules$valid, "Mass"=molecules$exactmass)
attr(out, "M0") <- attr(frag, "M0")
# extract isotope fine structure
if (param$ionization=="APCI" & !param$quick_isos) {
# for APCI data the Rdisop isotope distribution is less correct and can be better estimated using enviPat
isos <- lapply(1:nrow(out), function(j) {
x <- GetIsotopeDistribution(fml=out[j,"Formula"], res=30000, n=2, ele_vec=param$allowed_elements)
# fall back sollution for no C suggestions
if (ncol(x)<=2) {
x <- molecules[["isotopes"]][[j]]
}
if (diff(range(x[1,]))<=(ncol(x)-1.5)) {
x <- molecules[["isotopes"]][[j]]
}
return(x)
})
}
if (param$ionization=="ESI" | param$quick_isos) {
isos <- lapply(molecules[["isotopes"]], function(x) {
x[1,] <- x[1,]
x[2,] <- x[2,]/sum(x[2,])
return(x)
})
}
# the whole following part tries to determined an optimal and fair integrative score for each suggestion, including mass precision and intensity distribution for up to 2 isotopes
# but it is slow and for some fraction of the spectra error prone...
out[,"Score"] <- sapply(1:nrow(out), function(j) {
max_iso <- min(c(ncol(frag),ncol(isos[[j]])))
mScore(obs=frag[,1:max_iso,drop=FALSE], the=isos[[j]][,1:max_iso,drop=FALSE], dppm=dppm)
})
# ensure with enviPat valid chemical formulas (necessary for neutral loss detection and scoring later)
out[,"Formula"] <- enviPat::check_chemform(isotopes=param$isotopes, out[,"Formula"])[,"new_formula"]
}
}
out <- out[order(out[,"Score"], decreasing = TRUE),,drop=F]
return(out)
}
RemoveEmptyFragments <- function(rdisop_res, silent=TRUE, step="") {
# remove fragments without suggestions
flt <- sapply(rdisop_res,nrow)==0
if (any(flt)) {
if (all(flt)) {
warning(paste0("[RemoveEmptyFragments] No Fragments left after step ", step))
# keep empty list of length=1
rdisop_res <- rdisop_res[1]
} else {
if (!silent) paste0("[RemoveEmptyFragments] Fragments", paste(which(flt),collapse=" & "), "removed at step ", step)
flt <- rev(which(sapply(rdisop_res,nrow)==0))
for (k in flt) rdisop_res[[k]] <- NULL
}
}
return(rdisop_res)
}
GetRdisopResult <- function(spec=NULL, isomain=NULL, silent=TRUE, dppm=3, param = NULL, sf_db=sf_db) {
if (!is.null(attr(isomain,"adducthyp"))) {
rdisop_res <- lapply(isomain, function(M0) {
frag <- GetFragmentData(M0=M0, spec=spec, n=3, iso_mass = param$iso_mass)
if (M0==isomain[length(isomain)]) {
#to calculate de nove sum formulas we need to correct the MID for the (de)protonation
frag[1,] <- frag[1,]-attr(isomain,"adducthyp")+ifelse(param$ionmode=="positive",1,-1)*1.0073
}
rdisop_res <- EvaluateFragment(frag=frag, dppm=2*dppm, silent=silent, param = param, sf_db=sf_db)
invisible(rdisop_res)
})
} else {
rdisop_res <- lapply(isomain, function(M0) {
frag <- GetFragmentData(M0=M0, spec=spec, n=3, iso_mass = param$iso_mass)
rdisop_res <- EvaluateFragment(frag=frag, dppm=2*dppm, silent=silent, param = param, sf_db=sf_db)
invisible(rdisop_res)
})
}
RemoveEmptyFragments(rdisop_res, silent=silent, step="GetRdisopResult")
}
RemoveByScore <- function(rdisop_res, score_cutoff=0, silent=TRUE) {
rdisop_res <- lapply(rdisop_res, function(x) { x[x[,"Score"]>=(score_cutoff*max(x[,"Score"])),] })
rdisop_res <- RemoveEmptyFragments(rdisop_res, silent=silent, step="RemoveByScore")
}
GenerateMainOutput <- function(rdisop_res_list, stats, met_db, isomain) {
cat(paste("\n\nTotal number of formulas per fragment before and after filtering...\n"))
print(stats)
rdisop_res_best <- rdisop_res_list[[1]]
rdisop_res_best <- rdisop_res_best[!is.na(rdisop_res_best[,1]),]
cat(paste("\n\nDetails of best candidate...\n"))
print(rdisop_res_best)
MH <- ifelse(nrow(rdisop_res_best)==1, 1, ifelse(abs(rdisop_res_best[nrow(rdisop_res_best),"Mass"]-rdisop_res_best[nrow(rdisop_res_best)-1,"Mass"]-72.0395)<0.0005, nrow(rdisop_res_best)-1, nrow(rdisop_res_best)))
# check resulting candidate list against a DB for best matche
if (!is.null(met_db)) {
met_db[,"Formula"] <- enviPat::check_chemform(isotopes=param$isotopes, met_db[,"Formula"])[,"new_formula"]
M0 <- enviPat::subform(rdisop_res_best[MH,"Formula"], "H1")
if (any(met_db$Formula %in% M0)) {
if (!silent) print(met_db[met_db$Formula %in% M0,])
best_cand <- paste(met_db[met_db$Formula %in% M0, "Name"], collapse="; ")
best_cand_col <- 3
} else {
if (any(abs(met_db$"M+H"-rdisop_res_best[MH,"Mass"])<0.002)) {
if (!silent) print(met_db[abs(met_db$"M+H"-rdisop_res_best[MH,"Mass"])<0.002,-1])
best_cand <- paste(met_db[abs(met_db$"M+H"-rdisop_res_best[MH,"Mass"])<0.002,"Name"],collapse="; ")
best_cand_col <- 6
} else {
best_cand <- ""
best_cand_col <- grDevices::grey(0.9)
}
}
} else {
best_cand <- ""
best_cand_col <- 1
}
# outcommented to allow combination of spectrum plot with other plots
# opar <- par(no.readonly=TRUE)
# set colors for spectra plot (isomain peaks get a red color)
tmp.col <- rep(1,nrow(spec))
tmp.col[which(spec[,1] %in% isomain)] <- 2
PlotSpec(x=spec, cols=tmp.col, txt=data.frame("mz"=rdisop_res_best[,"Mass"],"Formula"=rdisop_res_best[,"Formula"]), neutral_losses=neutral_losses, neutral_loss_cutoff=param$neutral_loss_cutoff, substitutions=param$substitutions, ionization=param$ionization)
graphics::mtext(paste("Remaining combinations:",length(rdisop_res_list)), line=-1.2, adj=0, side=3, col=grDevices::grey(0.5))
graphics::mtext(best_cand, line=-2.4, adj=0, side=3, col=best_cand_col)
if (!is.null(correct_peak)) {
graphics::mtext(correct_peak, line=-3.6, adj=0, side=3, col=grDevices::grey(0.5))
fcor <- strsplit(correct_peak,", ")[[1]]
fcor <- fcor[-1][grep("^C[[:digit:]]",fcor[-1])]
if (length(fcor)==1) {
fcor <- enviPat::check_chemform(isotopes=param$isotopes, chemforms=fcor)
if (fcor[,1]) {
warning("[InterpretMSSpectrum] Probably a wrong specification of 'correct_peak'", call. = FALSE)
} else {
fcor <- enviPat::mergeform(fcor[,2], "H1")
}
# [Modification:if correct peak is specified but smaller than the 'observed' M0 the now uncommented line leads to trouble]
# fcor <- which(sapply(rdisop_res_list, function(x) {fcor %in% x[,"Formula"]}))
fcor <- which(sapply(rdisop_res_list, function(x) {fcor == x[nrow(x),"Formula"]}))
cat(paste("\n\nRank of specified M0 =", ifelse(length(fcor)==1,fcor,"NA"), "\n"))
graphics::mtext(paste0("Rank = ", fcor), line=-4.6, adj=0, side=3, col=grDevices::grey(0.5))
}
}
# restore parameters
# par(opar)
}
# read data (mz,int -table) from clipboard if not provided explicitly and sort by mz (should be standard but you never know...)
if (is.null(spec)) spec <- ReadSpecClipboard()
spec <- spec[order(spec[,1]),,drop=FALSE]
# check if spectrum is okay
if (!nrow(spec)>=1) {
#[ToDo] Further checks may be useful
warning("[InterpretMSSpectrum] Spectra does not contain any information (nrow=0).", call. = FALSE)
invisible(NULL)
} else {
# keep global error message for testing purposes (if correct_peak is known/provided)
if (!is.null(correct_peak)) {
#global_err <<- NULL # global assignment removed to pass RCheck without NOTEs
global_err <- NULL
local_check <- 0
}
#browser()
# evaluate main peaks
#browser()
isomain <- DetermineIsomainPeaks(spec=spec, int_cutoff=0.03, precursor=precursor, ionization=param$ionization, ionmode=param$ionmode, limit=max_isomain_peaks)
# # modify spectra if no good [M+H] is found but alternative adduct hypothesis instead
# if (!is.null(attr(isomain,"adducthyp"))) {
#
# }
stats <- data.frame("mz"=round(isomain,4), "initial"=NA, "score_cutoff"=NA, "PlausibleFormula"=NA, "TypicalLosses"=NA)
# start timing for testing purposes
time_elapse <- Sys.time()
# use Rdisop to get potential formulas (using up to n=2 isotopic peaks found)
rdisop_res <- GetRdisopResult(spec=spec, isomain=isomain, silent=silent, dppm=dppm, param = param, sf_db=sf_db)
stats[stats[,"mz"] %in% round(sapply(rdisop_res,attr,"M0"),4),"initial"] <- sapply(rdisop_res,nrow)
if (!is.null(correct_peak)) {
if (local_check==0 && length(grep(fml, rdisop_res[[length(rdisop_res)]][,1]))!=1) local_check <- 1
}
time_elapse <- c(time_elapse, Sys.time())
# remove according to individual score based on mz deviation and isotopic fit
rdisop_res <- RemoveByScore(rdisop_res, score_cutoff=param$score_cutoff, silent=silent)
stats[stats[,"mz"] %in% round(sapply(rdisop_res,attr,"M0"),4),3] <- sapply(rdisop_res,nrow)
if (!is.null(correct_peak) && local_check==0 && length(grep(fml, rdisop_res[[length(rdisop_res)]][,1]))!=1) local_check <- 2
time_elapse <- c(time_elapse, Sys.time())
# restrict to plausible formulas
if (!sf_db) {
# if a predetermined database is used there is no need to check formula plausibility again
rdisop_res <- lapply(rdisop_res, function(x) { x[sapply(x[,"Formula"], PlausibleFormula, ruleset=param$ruleset),] })
rdisop_res <- RemoveEmptyFragments(rdisop_res, silent=silent, step="PlausibleFormula")
stats[stats[,"mz"] %in% round(sapply(rdisop_res,attr,"M0"),4),4] <- sapply(rdisop_res,nrow)
} else {
#stats[stats[,"mz"] %in% round(sapply(rdisop_res,attr,"M0"),4),4] <- sapply(rdisop_res,nrow)
}
if (!is.null(correct_peak) && local_check==0 && length(grep(fml, rdisop_res[[length(rdisop_res)]][,1]))!=1) local_check <- 3
time_elapse <- c(time_elapse, Sys.time())
#rdisop_res <<- rdisop_res
# restrict based on neutral losses (only if more than 1 fragment is left in spectrum)
# test of all losses are potentially helpfull
nl_vec <- sapply(neutral_losses[,"Formula"], function(x) { neutral_losses[neutral_losses[,"Formula"]==x,"Mass"] })
if (length(rdisop_res)>=2) {
for (k in 1:length(nl_vec)) {
rdisop_res <- RestrictByTypicalLosses(rdisop_res=rdisop_res, tl=nl_vec[k], neutral_loss_cutoff=param$neutral_loss_cutoff, punish=0.5, substitutions=param$substitutions)
if (!is.null(correct_peak) && local_check==0 && length(grep(fml, rdisop_res[[length(rdisop_res)]][,1]))!=1) {
local_check <- 4
#print(paste(names(nl_vec)[k], "caused a problem."))
}
}
}
stats[stats[,"mz"] %in% round(sapply(rdisop_res,attr,"M0"),4),5] <- sapply(rdisop_res,nrow)
time_elapse <- c(time_elapse, Sys.time())
# close SQLite connection
if (sf_db) DBI::dbDisconnect(db_con)
if (sum(sapply(rdisop_res,nrow))>0) {
# obtain most likely combination
#rdisop_res <<- rdisop_res
#browser()
rdisop_res_list <- ScoreFormulaCombination(rdisop_res, nl_vec=nl_vec, punish_invalid=0.5, punish_S=0.2, punish_nonplausible=0.5, return_rank=NA, neutral_loss_cutoff=param$neutral_loss_cutoff, substitutions=param$substitutions, silent=silent)
time_elapse <- c(time_elapse, Sys.time())
# plot annotated spectrum
if (!silent) GenerateMainOutput(rdisop_res_list, stats, met_db, isomain)
time_elapse <- c(time_elapse, Sys.time())
# write potential error source to global
if (!is.null(correct_peak) && local_check>0) {
global_err <- paste(correct_peak, "not present after", c("initial generation","score filter","plausibility filter","neutral loss filter")[local_check])
#assign("global_err", global_err, envir=.GlobalEnv)
print(global_err)
}
time_elapse <- round(diff(time_elapse),4); names(time_elapse) <- c("FormulaGen","ScoreFilt","Plausible","NeutralLoss","PathEval","Plot")
if (!silent) {
cat("\n\nTime elapsed during individual processing steps...\n")
print(time_elapse)
}
# return final result list
attr(rdisop_res_list,"stats") <- stats
invisible(rdisop_res_list)
} else {
invisible(NULL)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.