#' @include main.R
#' @include compounds.R
#' @include utils-sirius.R
#' @include feature_groups-set.R
NULL
#' Compounds class for SIRIUS results.
#'
#' This class is derived from \code{\link{compounds}} and contains additional specific \command{SIRIUS} data.
#'
#' Objects from this class are generated by \code{\link{generateCompoundsSIRIUS}}
#'
#' @slot fingerprints A \code{list} with for each feature group result a \code{data.table} containing fingerprints
#' obtained with \command{CSI:FingerID}.
#'
#' @templateVar class compoundsSIRIUS
#' @template class-hierarchy
#'
#' @seealso \code{\link{compounds}} and \code{\link{generateCompoundsSIRIUS}}
#'
#' @inherit generateCompoundsSIRIUS references
#'
#' @export
compoundsSIRIUS <- setClass("compoundsSIRIUS", slots = c(fingerprints = "list"),
contains = "compounds")
processSIRIUSCompounds <- function(msFName, outPath, MSMS, database, adduct, topMost)
{
resultPath <- getSiriusResultPath(outPath, msFName)
results <- scRanges <- fingerprints <- data.table()
if (length(resultPath) != 0) # zero for no results
{
summary <- file.path(resultPath, "structure_candidates.tsv")
summaryForms <- file.path(resultPath, "formula_candidates.tsv")
if (file.exists(summary) && file.exists(summaryForms))
{
results <- fread(summary)
results <- unifySirNames(results)
resultsForm <- fread(summaryForms)
# NOTE: so far SIRIUS only has one score
if (nrow(results) > 0)
scRanges <- list(score = range(results$score))
# manually sort results: SIRIUS seems to sometimes randomly order results with equal scores
# NOTE: do this before topMost filter step below to ensure proper filtering
setorderv(results, c("score", "identifier"), order = c(-1, 1))
if (!is.null(topMost))
{
if (nrow(results) > topMost)
results <- results[seq_len(topMost)] # results should already be sorted on score
}
# identifiers to characters, as they sometimes are and sometimes aren't depending on if multiple are present
results[, identifier := as.character(identifier)]
# NOTE: fragment info is based on SIRIUS results, ie from formula
# prediction and not by compounds! Hence, results are the same for
# candidates with the same formula.
fragFiles <- patRoon:::getAndPrepareSIRIUSResFiles(resultPath, "spectra", "tsv")
for (ff in fragFiles)
{
fragInfo <- fread(ff)
fragInfo[, ionization := gsub(" ", "", ionization)]
fragInfo[, PLID := sapply(mz, function(omz) MSMS[which.min(abs(omz - mz))]$ID)]
# it is quite a journey to see find the relevant fragment files, especially with non-standard adducts...
# 1. figure out, from the file name, to which formula candidate it belongs to
# 2. but, the formula that is part of the file name is some strange half-neutralized form
# 3. so dig in the formula summary file to which ionized formula this should belong to by looking at the 'precursorFormula' column
fragFileFormName <- getFormulaFromSIRIUSResFile(ff, "tsv")
fragFileFormNameNeutral <- resultsForm[match(fragFileFormName, precursorFormula)]$molecularFormula
wh <- which(results$neutral_formula %in% fragFileFormNameNeutral)
# Remove zero intensity peaks that aren't actually present in the peak list. These seem to be precursor
# peaks, taking into account the various adduct forms of SIRIUS...
fragInfo <- fragInfo[intensity != 0]
fragInfo[, c("rel.intensity", "exactmass", "intensity") := NULL]
if (length(wh) > 0)
{
# sirius neutralizes fragments, make them ion again
if (!is.null(fragInfo[["implicitAdduct"]]))
ionImpAdducts <- ifelse(nzchar(fragInfo$implicitAdduct),
mapply(paste0("+", fragInfo$implicitAdduct, "]"), fragInfo$ionization,
FUN = sub, MoreArgs = list(pattern = "\\]")),
fragInfo$ionization)
else
ionImpAdducts <- fragInfo$ionization
setnames(fragInfo, "formula", "formula_SIR")
fragInfo[, ion_formula := mapply(formula_SIR, ionImpAdducts, FUN = calculateIonFormula)]
ionform <- calculateIonFormula(results$neutral_formula[wh[1]], adduct)
fragInfo[, neutral_loss := sapply(ion_formula, subtractFormula, formula1 = ionform)]
set(results, wh, "fragInfo", list(list(fragInfo)))
set(results, wh, "explainedPeaks", nrow(fragInfo))
}
}
if (is.null(results[["fragInfo"]]))
{
# warning(sprintf("no fragment info for %s", cmd$gName))
results[, fragInfo := list(rep(list(data.table(mz = numeric(0), ion_formula = character(0),
neutral_loss = character(0), score = numeric(0),
PLID = numeric(0))),
nrow(results)))]
results[, explainedPeaks := 0]
}
results[, database := database][]
}
if (nrow(results) > 0)
fingerprints <- loadSIRIUSFingerprints(resultPath, results$neutral_formula, adduct)
}
ret <- list(comptab = results, scRanges = scRanges, fingerprints = fingerprints)
return(ret)
}
#' @rdname compounds-class
#' @export
setMethod("delete", "compoundsSIRIUS", function(obj, i = NULL, j = NULL, ...)
{
obj <- callNextMethod()
return(syncSIRFPs(obj))
})
#' @rdname pred-quant
#' @export
setMethod("predictRespFactors", "compoundsSIRIUS", function(obj, fGroups, calibrants, eluent, organicModifier, pHAq,
concUnit = "ugL", calibConcUnit = concUnit, type = "FP")
{
checkPackage("MS2Quant", "kruvelab/MS2Quant")
ac <- checkmate::makeAssertCollection()
checkmate::assertClass(fGroups, "featureGroups", add = ac)
assertQuantEluent(eluent, fGroups, add = ac)
checkmate::assertChoice(organicModifier, c("MeOH", "MeCN"), add = ac)
checkmate::assertNumber(pHAq, finite = TRUE, add = ac)
aapply(assertConcUnit, . ~ concUnit + calibConcUnit, fixed = list(add = ac))
checkmate::assertChoice(type, c("FP", "SMILES", "both"), add = ac)
checkmate::reportAssertions(ac)
if (type == "SMILES" || type == "both")
obj <- callNextMethod(obj, fGroups = fGroups, calibrants = calibrants, eluent = eluent,
organicModifier = organicModifier, pHAq = pHAq, concUnit = concUnit,
calibConcUnit = calibConcUnit, updateScore = FALSE, scoreWeight = 1)
calibrants <- assertAndPrepareQuantCalib(calibrants, calibConcUnit)
if (length(obj) == 0)
return(obj)
if (type == "FP" || type == "both")
{
printf("Predicting response factors from fingerprints with MS2Quant for %d candidates...\n", length(obj))
res <- predictRespFactorsSIRFPs(obj, groupInfo(fGroups), calibrants, eluent, organicModifier, pHAq, concUnit)
obj@MS2QuantMeta <- res$MD
obj@groupAnnotations <- Map(groupNames(obj), annotations(obj), f = function(grp, ann)
{
ann <- copy(ann)
if (nrow(res$RFs) == 0)
{
ann[, RF_SIRFP := NA_real_]
return(ann)
}
if (!is.null(ann[["RF_SIRFP"]]))
ann[, RF_SIRFP := NULL] # clearout for merge below
return(merge(ann, res$RFs[group == grp, c("neutral_formula", "RF_SIRFP"), with = FALSE],
by = "neutral_formula", sort = FALSE, all.x = TRUE))
})
obj <- addCompoundScore(obj, "RF_SIRFP", FALSE, 1)
}
return(obj)
})
#' @rdname pred-tox
#' @export
setMethod("predictTox", "compoundsSIRIUS", function(obj, type = "FP", LC50Mode = "static", concUnit = "ugL")
{
checkPackage("MS2Tox", "kruvelab/MS2Tox")
ac <- checkmate::makeAssertCollection()
checkmate::assertChoice(type, c("FP", "SMILES", "both"), add = ac)
checkmate::assertChoice(LC50Mode, c("static", "flow"), add = ac)
assertConcUnit(concUnit, add = ac)
checkmate::reportAssertions(ac)
if (type == "SMILES" || type == "both")
obj <- callNextMethod(obj, LC50Mode = LC50Mode, concUnit = concUnit, updateScore = FALSE, scoreWeight = 1)
if (length(obj) == 0)
return(obj)
if (type == "FP" || type == "both")
{
printf("Predicting LC50 values from fingerprints with MS2Tox for %d candidates...\n", length(obj))
LC50Tab <- predictLC50SIRFPs(obj, LC50Mode, concUnit = concUnit)
obj@groupAnnotations <- Map(groupNames(obj), annotations(obj), f = function(grp, ann)
{
ann <- copy(ann)
if (nrow(LC50Tab) == 0)
{
ann[, LC50_SIRFP := NA_real_]
return(ann)
}
if (!is.null(ann[["LC50_SIRFP"]]))
ann[, LC50_SIRFP := NULL] # clearout for merge below
return(merge(ann, LC50Tab[group == grp, c("neutral_formula", "LC50_SIRFP"), with = FALSE],
by = "neutral_formula", sort = FALSE, all.x = TRUE))
})
obj <- addCompoundScore(obj, "LC50_SIRFP", FALSE, 1)
}
return(obj)
})
#' Compound annotation with SIRIUS
#'
#' Uses \href{https://bio.informatik.uni-jena.de/software/sirius/}{SIRIUS} in combination with
#' \href{https://www.csi-fingerid.uni-jena.de/}{CSI:FingerID} for compound annotation.
#'
#' @templateVar algo SIRIUS
#' @templateVar do generate compound candidates
#' @templateVar generic generateCompounds
#' @templateVar algoParam sirius
#' @template algo_generator
#'
#' @details Similar to \code{\link{generateFormulasSIRIUS}}, candidate formulae are generated with SIRIUS. These results
#' are then feed to CSI:FingerID to acquire candidate structures. This method requires the availability of MS/MS data,
#' and feature groups without it will be ignored.
#'
#' @param fingerIDDatabase Database specifically used for \command{CSI:FingerID}. If \code{NULL}, the value of the
#' \code{formulaDatabase} parameter will be used or \code{"pubchem"} when that is also \code{NULL}. Sets the
#' \option{--fingerid-db} option.
#' @param topMostFormulas Do not return more than this number of candidate formulae. Note that only compounds for these
#' formulae will be searched. Sets the \option{--candidates} commandline option.
#' @param verbose If \code{TRUE} then more output is shown in the terminal.
#'
#' @templateVar ident TRUE
#' @template sirius-args
#' @template sirius_form-args
#' @template adduct-arg
#' @template comp_algo-args
#'
#' @inheritParams generateCompounds
#'
#' @return A \code{\link{compoundsSIRIUS}} object.
#'
#' @templateVar what \code{generateCompoundsSIRIUS}
#' @template uses-multiProc
#'
#' @templateVar what generateCompoundsSIRIUS
#' @template main-rd-method
#' @export
setMethod("generateCompoundsSIRIUS", "featureGroups", function(fGroups, MSPeakLists, relMzDev = 5, adduct = NULL,
projectPath = NULL, elements = "CHNOP",
profile = "qtof", formulaDatabase = NULL,
fingerIDDatabase = "pubchem", noise = NULL,
cores = NULL, topMost = 100, topMostFormulas = 5,
token = NULL, extraOptsGeneral = NULL,
extraOptsFormula = NULL, verbose = TRUE,
splitBatches = FALSE, dryRun = FALSE)
{
ac <- checkmate::makeAssertCollection()
checkmate::assertClass(fGroups, "featureGroups", add = ac)
checkmate::assertClass(MSPeakLists, "MSPeakLists", add = ac)
checkmate::assertNumber(relMzDev, lower = 0, finite = TRUE, add = ac)
checkmate::assertString(projectPath, null.ok = TRUE, add = ac)
aapply(checkmate::assertString, . ~ elements + profile + fingerIDDatabase, fixed = list(add = ac))
aapply(checkmate::assertString, . ~ formulaDatabase + fingerIDDatabase, null.ok = TRUE, fixed = list(add = ac))
checkmate::assertNumber(noise, lower = 0, finite = TRUE, null.ok = TRUE, add = ac)
checkmate::assertCount(cores, positive = TRUE, null.ok = TRUE, add = ac)
aapply(checkmate::assertCount, . ~ topMost + topMostFormulas, positive = TRUE, fixed = list(add = ac))
checkmate::assertString(token, null.ok = TRUE, add = ac)
aapply(checkmate::assertCharacter, . ~ extraOptsGeneral + extraOptsFormula, null.ok = TRUE, fixed = list(add = ac))
checkmate::assertFlag(verbose, add = ac)
checkmate::assertFlag(splitBatches, add = ac)
checkmate::assertFlag(dryRun, add = ac)
checkmate::reportAssertions(ac)
if (length(fGroups) == 0)
return(compoundsSIRIUS(algorithm = "sirius"))
if (!is.null(projectPath))
projectPath <- normalizePath(projectPath, mustWork = FALSE)
adduct <- checkAndToAdduct(adduct, fGroups)
if (is.null(fingerIDDatabase))
fingerIDDatabase <- if (!is.null(formulaDatabase)) formulaDatabase else "pubchem"
gCount <- length(fGroups)
printf("Processing %d feature groups with SIRIUS+CSI:FingerID...\n", gCount)
results <- doSIRIUS(fGroups, MSPeakLists, FALSE, profile, adduct, relMzDev, elements,
formulaDatabase, noise, cores, "structure", fingerIDDatabase, topMostFormulas, projectPath,
token, extraOptsGeneral, extraOptsFormula, verbose, "compoundsSIRIUS",
patRoon:::processSIRIUSCompounds, list(database = fingerIDDatabase, topMost = topMost),
splitBatches, dryRun)
# prune empty/NULL results
if (length(results) > 0)
results <- results[sapply(results, function(r) !is.null(r$comptab) && nrow(r$comptab) > 0, USE.NAMES = FALSE)]
if (verbose)
{
printf("-------\n")
ngrp <- length(results)
printf("Assigned %d compounds to %d feature groups (%.2f%%).\n", sum(unlist(lapply(results, function(r) nrow(r$comptab)))),
ngrp, if (gCount == 0) 0 else ngrp * 100 / gCount)
}
return(compoundsSIRIUS(groupAnnotations = lapply(results, "[[", "comptab"), scoreTypes = "score",
scoreRanges = lapply(results, "[[", "scRanges"),
fingerprints = pruneList(lapply(results, "[[", "fingerprints"), checkZeroRows = TRUE),
algorithm = "sirius"))
})
#' @template featAnnSets-gen_args
#' @rdname generateCompoundsSIRIUS
#' @export
setMethod("generateCompoundsSIRIUS", "featureGroupsSet", function(fGroups, MSPeakLists, relMzDev = 5, adduct = NULL,
projectPath = NULL, ..., setThreshold = 0,
setThresholdAnn = 0, setAvgSpecificScores = FALSE)
{
checkmate::assertCharacter(projectPath, len = length(sets(fGroups)), null.ok = TRUE)
sa <- if (!is.null(projectPath)) lapply(projectPath, function(p) list(projectPath = p)) else list()
generateCompoundsSet(fGroups, MSPeakLists, adduct, generateCompoundsSIRIUS, relMzDev = relMzDev, ...,
setThreshold = setThreshold, setThresholdAnn = setThresholdAnn,
setAvgSpecificScores = setAvgSpecificScores, setArgs = sa)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.