#' @include main.R
#' @include mspeaklists.R
#' @include formulas.R
#' @include utils-sirius.R
NULL
#' Formulas class for SIRIUS results.
#'
#' This class is derived from \code{\link{formulas}} and contains additional specific \command{SIRIUS} data.
#'
#' Objects from this class are generated by \code{\link{generateFormulasSIRIUS}}
#'
#' @slot fingerprints A \code{list} with for each feature group result a \code{data.table} containing fingerprints
#' obtained with \command{CSI:FingerID}. Will be empty unless the \code{getFingerprints} argument to
#' \code{\link{generateFormulasSIRIUS}} was set to \code{TRUE}.
#' @slot MS2QuantMeta Metadata from \pkg{MS2Quant} filled in by \code{predictRespFactors}.
#'
#' @templateVar class formulasSIRIUS
#' @template class-hierarchy
#'
#' @seealso \code{\link{formulas}} and \code{\link{generateFormulasSIRIUS}}
#'
#' @inherit generateFormulasSIRIUS references
#'
#' @export
formulasSIRIUS <- setClass("formulasSIRIUS", slots = c(fingerprints = "list", MS2QuantMeta = "list"),
contains = "formulas")
# callback for executeMultiProcess()
processSIRIUSFormulas <- function(msFName, outPath, adduct, ...)
{
noResult <- forms <- data.table::data.table(neutral_formula = character(), ion_formula = character(),
neutralMass = numeric(), ion_formula_mz = numeric(),
adduct = character(), score = numeric(), MSMSScore = numeric(),
isoScore = numeric(), explainedPeaks = integer(),
explainedIntensity = numeric(), fragInfo = list())
fingerprints <- data.table()
resultPath <- patRoon:::getSiriusResultPath(outPath, msFName)
summary <- file.path(resultPath, "formula_candidates.tsv")
if (length(summary) == 0 || !file.exists(summary))
forms <- noResult
else
{
forms <- fread(summary)
fragFiles <- patRoon:::getAndPrepareSIRIUSResFiles(resultPath, "spectra", "tsv")
if (nrow(forms) == 0 || length(fragFiles) == 0)
forms <- noResult
else
{
data.table::setnames(forms,
c("molecularFormula", "precursorFormula", "SiriusScore",
"TreeScore", "IsotopeScore", "numExplainedPeaks", "medianMassErrorFragmentPeaks(ppm)",
"medianAbsoluteMassErrorFragmentPeaks(ppm)", "massErrorPrecursor(ppm)"),
c("neutral_formula", "neutral_adduct_formula", "score", "MSMSScore", "isoScore",
"explainedPeaks", "error_frag_median", "error_frag_median_abs", "error"))
ionImpAdductsCached <- patRoon:::makeEmptyListNamed(list())
fragInfoList <- lapply(fragFiles, function(ff)
{
fragInfo <- data.table::fread(ff)
data.table::setnames(fragInfo,
c("exactmass", "formula"),
c("ion_formula_mz", "ion_formula_SIR"))
fragInfo[, rel.intensity := NULL]
fragInfo[, ionization := gsub(" ", "", ionization, fixed = TRUE)]
# sirius neutralizes fragments, make them ion again
if (!is.null(fragInfo[["implicitAdduct"]]))
ionImpAdducts <- ifelse(nzchar(fragInfo$implicitAdduct),
mapply(paste0("+", fragInfo$implicitAdduct, "]"), fragInfo$ionization,
FUN = sub, MoreArgs = list(pattern = "\\]")),
fragInfo$ionization)
else
ionImpAdducts <- fragInfo$ionization
notCached <- setdiff(ionImpAdducts, names(ionImpAdductsCached))
if (length(notCached) > 0)
ionImpAdductsCached <<- c(ionImpAdductsCached, sapply(notCached, patRoon:::as.adduct,
format = "sirius", simplify = FALSE))
ionImpAdducts <- ionImpAdductsCached[ionImpAdducts]
fragInfo[, ion_formula := mapply(ion_formula_SIR, ionImpAdducts, FUN = patRoon:::calculateIonFormula)]
if (!is.null(fragInfo[["implicitAdduct"]]))
{
# 'correct' formula masses: SIRIUS subtract implicit adduct from it
fragInfo[nzchar(implicitAdduct), ion_formula_mz := ion_formula_mz +
sapply(implicitAdduct, patRoon:::getFormulaMass)]
}
return(fragInfo)
})
names(fragInfoList) <- sapply(fragFiles, patRoon:::getFormulaFromSIRIUSResFile, "tsv")
# initialize all with empty fragInfos
if (length(fragInfoList) > 0)
forms[match(names(fragInfoList), neutral_adduct_formula), fragInfo := fragInfoList]
else
forms[, fragInfo := list()]
forms <- patRoon:::addMiscFormulaInfo(forms, adduct)
forms[, rank := NULL]
forms[, fragInfo := Map(ion_formula, fragInfo, f = function(form, fi)
{
# Precursor is always present in MS/MS spectrum: it's added by SIRIUS if necessary (with zero intensity).
# --> Remove it
fi <- fi[intensity != 0]
fi[, intensity := NULL]
return(fi)
})]
forms[, explainedPeaks := sapply(fragInfo, nrow)] # update
# set nice column order
data.table::setcolorder(forms, c("neutral_formula", "ion_formula", "neutral_adduct_formula", "neutralMass",
"ion_formula_mz", "error", "error_frag_median", "error_frag_median_abs",
"adduct", "score", "MSMSScore", "isoScore", "explainedPeaks",
"explainedIntensity"))
forms <- patRoon:::rankFormulaTable(forms)
if (nrow(forms) > 0)
fingerprints <- loadSIRIUSFingerprints(resultPath, forms$neutral_formula, adduct)
}
}
return(list(formtab = forms, fingerprints = fingerprints))
}
#' @rdname formulas-class
#' @export
setMethod("delete", "formulasSIRIUS", function(obj, i = NULL, j = NULL, ...)
{
obj <- callNextMethod()
return(syncSIRFPs(obj))
})
#' @rdname pred-quant
#' @export
setMethod("predictRespFactors", "formulasSIRIUS", function(obj, fGroups, calibrants, eluent, organicModifier, pHAq,
concUnit = "ugL", calibConcUnit = concUnit)
{
checkPackage("MS2Quant", "kruvelab/MS2Quant")
ac <- checkmate::makeAssertCollection()
checkmate::assertClass(fGroups, "featureGroups", add = ac)
assertQuantEluent(eluent, fGroups, add = ac)
checkmate::assertChoice(organicModifier, c("MeOH", "MeCN"), add = ac)
checkmate::assertNumber(pHAq, finite = TRUE, add = ac)
aapply(assertConcUnit, . ~ concUnit + calibConcUnit, fixed = list(add = ac))
checkmate::reportAssertions(ac)
calibrants <- assertAndPrepareQuantCalib(calibrants, calibConcUnit)
if (length(obj) == 0)
return(obj)
printf("Predicting response factors from fingerprints with MS2Quant for %d candidates...\n", length(obj))
res <- predictRespFactorsSIRFPs(obj, groupInfo(fGroups), calibrants, eluent, organicModifier, pHAq, concUnit)
obj@MS2QuantMeta <- res$MD
obj@groupAnnotations <- Map(groupNames(obj), annotations(obj), f = function(grp, ann)
{
ann <- copy(ann)
if (nrow(res$RFs) == 0)
{
ann[, RF_SIRFP := NA_real_]
return(ann)
}
if (!is.null(ann[["RF_SIRFP"]]))
ann[, RF_SIRFP := NULL] # clearout for merge below
return(merge(ann, res$RFs[group == grp, c("neutral_formula", "RF_SIRFP"), with = FALSE], by = "neutral_formula",
sort = FALSE, all.x = TRUE))
})
# UNDONE: do something equivalent for formula rankings?
# obj <- addCompoundScore(obj, "RF_SIRFP", FALSE, 1)
return(obj)
})
#' @rdname pred-tox
#' @export
setMethod("predictTox", "formulasSIRIUS", function(obj, LC50Mode = "static", concUnit = "ugL")
{
checkPackage("MS2Tox", "kruvelab/MS2Tox")
checkmate::assertChoice(LC50Mode, c("static", "flow"))
assertConcUnit(concUnit)
if (length(obj) == 0)
return(obj)
printf("Predicting LC50 values from fingerprints with MS2Tox for %d candidates...\n", length(obj))
LC50Tab <- predictLC50SIRFPs(obj, LC50Mode, concUnit)
obj@groupAnnotations <- Map(groupNames(obj), annotations(obj), f = function(grp, ann)
{
ann <- copy(ann)
if (nrow(LC50Tab) == 0)
{
ann[, LC50_SIRFP := NA_real_]
return(ann)
}
if (!is.null(ann[["LC50_SIRFP"]]))
ann[, LC50_SIRFP := NULL] # clearout for merge below
return(merge(ann, LC50Tab[group == grp, c("neutral_formula", "LC50_SIRFP"), with = FALSE],
by = "neutral_formula", sort = FALSE, all.x = TRUE))
})
# UNDONE: do something equivalent for formula rankings?
# obj <- addCompoundScore(obj, "LC50_SIRFP", FALSE, 1)
return(obj)
})
#' Generate formula with SIRIUS
#'
#' Uses \href{https://bio.informatik.uni-jena.de/software/sirius/}{SIRIUS} to generate chemical formulae candidates.
#'
#' @templateVar algo SIRIUS
#' @templateVar do generate formula candidates
#' @templateVar generic generateFormulas
#' @templateVar algoParam sirius
#' @template algo_generator
#'
#' @details Similarity of measured and theoretical isotopic patterns will be used for scoring candidates. Note that
#' \command{SIRIUS} requires availability of MS/MS data.
#'
#' @param getFingerprints Set to \code{TRUE} to load \command{SIRIUS-CSI:FingerID} MS/MS fingerprints for the formula
#' candidates. This is currently only supported with \code{calculateFeatures=FALSE} to avoid heavy server traffic. The
#' fingerprints are stored in the \code{fingerprints} slot of the returned \code{\link{formulasSIRIUS}} object, and
#' are used by the \code{\link{predictTox}} and \code{\link{predictRespFactors}} methods.
#' @param verbose If \code{TRUE} then more output is shown in the terminal.
#'
#' @template sirius_form-args
#' @templateVar ident FALSE
#' @template sirius-args
#' @template adduct-arg
#' @templateVar algo sirius
#' @template form_algo-args
#'
#' @inheritParams generateFormulas
#'
#' @return A \code{\link{formulasSIRIUS}} object.
#'
#' @templateVar what \code{generateFormulasSIRIUS}
#' @template uses-multiProc
#'
#' @templateVar what generateFormulasSIRIUS
#' @template main-rd-method
#' @export
setMethod("generateFormulasSIRIUS", "featureGroups", function(fGroups, MSPeakLists, relMzDev = 5,
adduct = NULL, projectPath = NULL, elements = "CHNOP",
profile = "qtof", database = NULL, noise = NULL,
cores = NULL, getFingerprints = FALSE, topMost = 100,
token = NULL, extraOptsGeneral = NULL,
extraOptsFormula = NULL, calculateFeatures = TRUE,
featThreshold = 0, featThresholdAnn = 0.75,
absAlignMzDev = 0.002, verbose = TRUE,
splitBatches = FALSE, dryRun = FALSE)
{
ac <- checkmate::makeAssertCollection()
checkmate::assertClass(MSPeakLists, "MSPeakLists", add = ac)
checkmate::assertNumber(relMzDev, lower = 0, finite = TRUE, add = ac)
checkmate::assertString(projectPath, null.ok = TRUE, add = ac)
aapply(checkmate::assertString, . ~ elements + profile, fixed = list(add = ac))
checkmate::assertString(database, null.ok = TRUE, add = ac)
checkmate::assertNumber(noise, lower = 0, finite = TRUE, null.ok = TRUE, add = ac)
checkmate::assertCount(cores, positive = TRUE, null.ok = TRUE, add = ac)
checkmate::assertCount(topMost, positive = TRUE, add = ac)
checkmate::assertString(token, null.ok = TRUE, add = ac)
aapply(checkmate::assertCharacter, . ~ extraOptsGeneral + extraOptsFormula, null.ok = TRUE, fixed = list(add = ac))
checkmate::assertFlag(getFingerprints, add = ac)
checkmate::assertFlag(calculateFeatures, add = ac)
aapply(checkmate::assertNumber, . ~ featThreshold + featThresholdAnn + absAlignMzDev, lower = 0, upper = 1,
fixed = list(add = ac))
checkmate::assertFlag(verbose, add = ac)
checkmate::assertFlag(splitBatches, add = ac)
checkmate::assertFlag(dryRun, add = ac)
checkmate::reportAssertions(ac)
if (getFingerprints && calculateFeatures)
stop("Fingerprints can currently only be loaded if calculateFeatures is set to FALSE", call. = FALSE)
if (length(fGroups) == 0)
return(formulas(algorithm = "sirius"))
if (!is.null(projectPath))
projectPath <- normalizePath(projectPath, mustWork = FALSE)
adduct <- checkAndToAdduct(adduct, fGroups)
gNames <- names(fGroups)
gCount <- length(fGroups)
printf("Processing %d feature groups with SIRIUS...\n---\n", gCount)
formTable <- doSIRIUS(fGroups, MSPeakLists, calculateFeatures, profile, adduct, relMzDev, elements,
database, noise, cores, if (getFingerprints) "fingerprint" else "none", NULL, topMost,
projectPath, token, extraOptsGeneral, extraOptsFormula, verbose, "formulasSIRIUS",
patRoon:::processSIRIUSFormulas, NULL, splitBatches, dryRun)
groupFormulas <- fingerprints <- list()
if (length(formTable) > 0)
{
# omit empty result tables and extract formulae/fingerprints
hasResults <- function(r) !is.null(r[["formtab"]]) && nrow(r$formtab) > 0
if (calculateFeatures)
{
formTable <- lapply(formTable, function(fta)
{
fta <- fta[sapply(fta, hasResults)]
return(lapply(fta, "[[", "formtab"))
})
groupFormulas <- generateGroupFormulasByConsensus(formTable,
lapply(groupFeatIndex(fGroups), function(x) sum(x > 0)),
featThreshold, featThresholdAnn, gNames)
}
else
{
formTable <- formTable[sapply(formTable, hasResults)]
fingerprints <- pruneList(lapply(formTable, "[[", "fingerprints"), checkZeroRows = TRUE)
groupFormulas <- lapply(formTable, "[[", "formtab")
formTable <- list()
}
}
groupFormulas <- setFormulaPLID(groupFormulas, MSPeakLists, absAlignMzDev)
if (verbose)
{
printf("-------\n")
if (calculateFeatures && length(formTable) > 0)
{
fCounts <- sapply(formTable, countUniqueFormulas)
fTotCount <- sum(fCounts)
printf("Formula statistics:\n")
printf("%s: %d (%.1f%%)\n", names(formTable), fCounts, if (fTotCount == 0) 0 else fCounts * 100 / fTotCount)
printf("Total: %d\n", fTotCount)
}
ngrp <- length(groupFormulas)
printf("Assigned %d unique formulas to %d feature groups (%.2f%% coverage).\n", countUniqueFormulas(groupFormulas),
ngrp, if (gCount == 0) 0 else ngrp * 100 / gCount)
}
return(formulasSIRIUS(groupAnnotations = groupFormulas, featureFormulas = formTable, fingerprints = fingerprints,
algorithm = "sirius"))
})
#' @template featAnnSets-gen_args
#' @rdname generateFormulasSIRIUS
#' @export
setMethod("generateFormulasSIRIUS", "featureGroupsSet", function(fGroups, MSPeakLists, relMzDev = 5, adduct = NULL,
projectPath = NULL, ..., setThreshold = 0,
setThresholdAnn = 0, setAvgSpecificScores = FALSE)
{
checkmate::assertCharacter(projectPath, len = length(sets(fGroups)), null.ok = TRUE)
sa <- if (!is.null(projectPath)) lapply(projectPath, function(p) list(projectPath = p)) else list()
generateFormulasSet(fGroups, MSPeakLists, adduct, generateFormulasSIRIUS, relMzDev = relMzDev, ...,
setThreshold = setThreshold, setThresholdAnn = setThresholdAnn,
setAvgSpecificScores = setAvgSpecificScores, setArgs = sa)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.