#' @include main.R
#' @include utils-bruker.R
NULL
# NOTE: No coverage calculation for Bruker tools as they cannot be run on CI
# nocov start
simplifyDAFormula <- function(formula)
{
# Remove spaces
formula <- gsub(" ", "", formula)
# Removes any element counts of 1: match all "1"'s after a character but not before another number
return(gsub("(?<=[[:alpha:]])1{1}(?![[:digit:]])", "", formula, perl = TRUE))
}
getEmptyDAFragInfo <- function() data.table(mz = numeric(), ion_formula = character(), error = numeric(),
mSigma = numeric(), dbe = numeric(), score = numeric())
#' Generate formula with Bruker DataAnalysis
#'
#' Uses Bruker DataAnalysis to generate chemical formulae.
#'
#' @templateVar algo Bruker DataAnalysis
#' @templateVar do generate formula candidates
#' @templateVar generic generateFormulas
#' @templateVar algoParam bruker
#' @template algo_generator
#'
#' @details This method supports scoring based on overlap between measured and theoretical isotopic patterns (both MS
#' and MS/MS data) and the presence of 'fitting' MS/MS fragments. The method will iterate through all features (or
#' "Compounds" in DataAnalysis terms) and call \command{SmartFormula} (and \command{SmartFormula3D} if MS/MS data is
#' available) to generate all formulae. Parameters affecting formula calculation have to be set in advance within the
#' DataAnalysis method for each analysis (\emph{e.g.} by \code{\link{setDAMethod}}).
#'
#' This method requires that features were obtained with \code{\link{findFeaturesBruker}}. It is recommended, but not
#' mandatory, that the \code{\link{MSPeakLists}} are also generated by DataAnalysis.
#'
#' Calculation of formulae with DataAnalysis always occurs with the 'feature approach' (see \verb{Candidate
#' assignment} in \code{\link{generateFormulas}}).
#'
#' @param precursorMzSearchWindow Search window for \emph{m/z} values (+/- the feature \emph{m/z}) used to find back
#' feature data of precursor/parent ions from MS/MS spectra (this data is not readily available from
#' \command{SmartFormula3D} results).
#' @param MSMode Whether formulae should be generated only from MS data (\code{"ms"}), MS/MS data (\code{"msms"}) or
#' both (\code{"both"}). Selecting "both" will calculate formulae from MS data and MS/MS data and combines the results
#' (duplicated formulae are removed). This is useful when poor MS/MS data would exclude proper candidates.
#'
#' @template adduct-arg
#' @templateVar algo bruker
#' @template form_algo-args
#'
#' @inheritParams generateFormulas
#'
#' @inherit generateFormulas return
#'
#' @template dasaveclose-args
#'
#' @template DA-restart-note
#'
#' @templateVar what generateFormulasDA
#' @template main-rd-method
#' @export
setMethod("generateFormulasDA", "featureGroups", function(fGroups, MSPeakLists, precursorMzSearchWindow = 0.002,
MSMode = "both", adduct = NULL, featThreshold = 0,
featThresholdAnn = 0.75, absAlignMzDev = 0.002, save = TRUE,
close = save)
{
ac <- checkmate::makeAssertCollection()
checkmate::assertClass(MSPeakLists, "MSPeakLists", add = ac)
checkmate::assertNumber(precursorMzSearchWindow, lower = 0, finite = TRUE, add = ac)
checkmate::assertChoice(MSMode, c("ms", "msms", "both"), add = ac)
aapply(checkmate::assertNumber, . ~ featThreshold + featThresholdAnn + absAlignMzDev, lower = 0, upper = 1,
fixed = list(add = ac))
assertDACloseSaveArgs(close, save, add = ac)
checkmate::reportAssertions(ac)
adduct <- checkAndToAdduct(adduct, fGroups)
fgAdd <- getFGroupAdducts(names(fGroups), annotations(fGroups), adduct, "generic")
DA <- getDAApplication()
anaInfo <- analysisInfo(fGroups)
ftind <- groupFeatIndex(fGroups)
gTable <- groupTable(fGroups)
gCount <- length(fGroups)
gNames <- names(fGroups)
hideDAInScope()
fts <- featureTable(fGroups)
fTable <- list()
cacheDB <- openCacheDBScope() # open manually so caching code doesn't need to on each R/W access
setHash <- makeHash(fGroups, precursorMzSearchWindow, MSMode)
cachedSet <- loadCacheSet("formulasBruker", setHash, cacheDB)
formHashes <- vector("character", nrow(anaInfo) * gCount)
formHashCount <- 0
setFormColOrder <- function(dt)
{
setcolorder(dt, c("neutral_formula", "ion_formula", "neutralMass", "ion_formula_mz", "error", "mSigma", "dbe",
"score", "explainedPeaks", "fragInfo"))
}
for (anai in seq_len(nrow(anaInfo)))
{
ana <- anaInfo$analysis[anai]
DAAnaInd <- getDAFileIndex(DA, ana, anaInfo$path[anai])
checkDAFMFCompounds(DA, fts[[ana]], DAAnaInd, TRUE)
printf("Loading all formulas from analysis '%s'...\n", ana)
baseHash <- makeHash(fGroups, ana, precursorMzSearchWindow, MSMode)
cmpds <- DA[["Analyses"]][[DAAnaInd]][["Compounds"]]
prog <- openProgBar(0, gCount)
for (grpi in seq_len(gCount))
{
grp <- gNames[grpi]
fti <- ftind[[grp]][anai]
if (fti == 0)
next
hash <- makeHash(baseHash, grp)
formHashCount <- formHashCount + 1
formHashes[formHashCount] <- hash
flist <- NULL
if (!is.null(cachedSet))
flist <- cachedSet[[hash]]
if (is.null(flist))
flist <- loadCacheData("formulasBruker", hash, cacheDB)
if (is.null(flist))
{
cmpd <- cmpds[[fts[[ana]]$ID[fti]]]
ftable <- vector("list", gCount)
ftableCount <- 0
if (MSMode != "msms")
{
cmpd$SmartFormula()
SMFResults <- cmpd[[1]][["SmartFormulaResults"]]
SMFResultsCount <- SMFResults[["Count"]]
if (SMFResultsCount > 0)
{
for (k in 1:SMFResultsCount)
{
SMFResult <- SMFResults[[k]]
SMFResultCount <- SMFResult[["Count"]]
if (SMFResultCount < 1 || all.equal(SMFResult[["m_over_z"]], fts[[ana]]$mz[fti]) != TRUE)
next
dt <- rbindlist(lapply(seq_len(SMFResultCount), function(resi)
{
SMFResultItem <- SMFResult[[resi]]
return(list(
neutral_formula = simplifyDAFormula(SMFResultItem[["NeutralFormula"]]),
ion_formula = simplifyDAFormula(SMFResultItem[["SumFormula"]]),
ion_formula_mz = SMFResultItem[["m_over_z"]],
error = SMFResultItem[["Error"]],
mSigma = SMFResultItem[["Sigma"]] * 1000,
dbe = SMFResultItem[["RingsAndDoubleBonds"]],
score = SMFResultItem[["Score"]],
fragInfo = list(getEmptyDAFragInfo())
))
}))
dt <- addMiscFormulaInfo(dt, adduct)
dt <- setFormColOrder(dt)
ftableCount <- ftableCount + 1
ftable[[ftableCount]] <- dt
break # Shouldn't be any more results
}
}
}
ftable3D <- vector("list", gCount)
ftable3DCount <- 0
if (MSMode != "ms" && cmpd[["Count"]] > 1)
{
cmpd$SmartFormula3D()
SMF3DResults <- cmpd[[2]][["SmartFormula3DResults"]]
SMF3DResultsCount <- SMF3DResults[["Count"]]
if (SMF3DResultsCount > 0)
{
ftable3DFrag <- vector("list", SMF3DResultsCount)
ftable3DFragCount <- 0
for (k in 1:SMF3DResultsCount)
{
SMF3DResult <- SMF3DResults[[k]]
SMF3DResultCount <- SMF3DResult[["Count"]]
parent <- SMF3DResult[["ParentResults"]]
if (SMF3DResultCount < 1 || abs(parent[["m_over_z"]] - fts[[ana]]$mz[fti]) > precursorMzSearchWindow)
next
form <- simplifyDAFormula(parent[["SumFormula"]])
nform <- calculateNeutralFormula(form, fgAdd$grpAdducts[[grp]])
dt <- data.table(neutral_formula = nform,
ion_formula = form,
ion_formula_mz = parent[["m_over_z"]],
error = parent[["Error"]],
mSigma = parent[["Sigma"]] * 1000,
dbe = parent[["RingsAndDoubleBonds"]],
score = parent[["Score"]])
fi <- rbindlist(lapply(seq_len(SMF3DResultCount), function(resi)
{
frag <- SMF3DResult[[resi]]
fform <- simplifyDAFormula(frag[["SumFormula"]])
return(data.table(
mz = frag[["m_over_z"]],
ion_formula = fform,
error = frag[["Error"]],
mSigma = frag[["Sigma"]] * 1000,
dbe = frag[["RingsAndDoubleBonds"]],
score = frag[["Score"]]))
}))
dt[, fragInfo := list(fi)]
dt <- addMiscFormulaInfo(dt, adduct)
dt <- setFormColOrder(dt)
ftable3DFragCount <- ftable3DFragCount + 1
ftable3DFrag[[ftable3DFragCount]] <- dt
}
if (ftable3DFragCount > 0)
{
ftable3DCount <- ftable3DCount + 1
ftable3D[[ftable3DCount]] <- rbindlist(ftable3DFrag[1:ftable3DFragCount])
}
}
}
flist <- list()
if (ftableCount > 0)
flist <- rbindlist(ftable[1:ftableCount])
if (ftable3DCount > 0)
{
MSMSFlist <- rbindlist(ftable3D[1:ftable3DCount])
if (ftableCount > 0)
flist <- flist[!neutral_formula %in% MSMSFlist[["neutral_formula"]]]
flist <- rbind(flist, MSMSFlist, fill = TRUE)
}
saveCacheData("formulasBruker", flist, hash, cacheDB)
}
if (!is.null(flist) && length(flist) > 0)
{
if (is.null(fTable[[ana]]))
fTable[[ana]] <- list()
fTable[[ana]][[grp]] <- flist
}
if ((grpi %% 5) == 0)
setTxtProgressBar(prog, grpi)
}
setTxtProgressBar(prog, gCount)
close(prog)
closeSaveDAFile(DA, DAAnaInd, close, save)
ngrp <- length(fTable[[ana]])
printf("Loaded %d formulas for %d features (%.2f%%).\n", countUniqueFormulas(fTable[[ana]]),
ngrp, if (gCount == 0) 0 else ngrp * 100 / gCount)
}
if (is.null(cachedSet))
saveCacheSet("formulasBruker", formHashes[1:formHashCount], setHash, cacheDB)
fTable <- pruneList(sapply(fTable, function(ft) ft[sapply(ft, nrow) > 0], simplify = FALSE), TRUE)
if (length(fTable) > 0)
{
groupFormulas <- generateGroupFormulasByConsensus(fTable, lapply(ftind, function(x) sum(x > 0)),
featThreshold, featThresholdAnn, gNames)
groupFormulas <- setFormulaPLID(groupFormulas, MSPeakLists, absAlignMzDev)
}
else
groupFormulas <- list()
return(formulas(groupAnnotations = groupFormulas, featureFormulas = fTable, algorithm = "bruker"))
})
#' @template featAnnSets-gen_args
#' @rdname generateFormulasDA
#' @export
setMethod("generateFormulasDA", "featureGroupsSet", function(fGroups, MSPeakLists, precursorMzSearchWindow = 0.002,
MSMode = "both", adduct = NULL, ..., setThreshold = 0,
setThresholdAnn = 0, setAvgSpecificScores = FALSE)
{
generateFormulasSet(fGroups, MSPeakLists, adduct, generateFormulasDA,
precursorMzSearchWindow = precursorMzSearchWindow, MSMode = MSMode, ...,
setThreshold = setThreshold, setThresholdAnn = setThresholdAnn,
setAvgSpecificScores = setAvgSpecificScores)
})
# nocov end
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.