#' @include main.R
#' @include feature_annotations.R
NULL
#' Formula annotations class
#'
#' Contains data of generated chemical formulae for given feature groups.
#'
#' \code{formulas} objects are obtained with \code{\link{generateFormulas}}. This class is derived from the
#' \code{\link{featureAnnotations}} class, please see its documentation for more methods and other details.
#'
#' @slot featureFormulas A \code{list} with all generated formulae for each analysis/feature group. Use the
#' \code{annotations} method for access.
#'
#' @param obj,x,object The \code{formulas} object.
#' @param \dots For \code{plotSpectrum}: Further arguments passed to \code{\link[graphics]{plot}}.
#'
#' For \code{delete}: passed to the function specified as \code{j}.
#'
#' For \code{consensus}: Any further (and unique) \code{formulas} objects.
#'
#' \setsPassedArgs1{formulas}
#' @param index The candidate index (row). For \code{plotSpectrum} two indices can be specified to compare spectra. In
#' this case \code{groupName} and \code{analysis} (if not \code{NULL}) should specify values for the spectra to
#' compare.
#' @param i,j For \code{[[}: If both \code{i} and \code{j} are specified then \code{i} specifies the analysis and
#' \code{j} the feature group of the feature for which annotations should be returned. Otherwise \code{i} specifies
#' the feature group for which group annotations should be returned. \code{i}/\code{j} can be specified as
#' \code{integer} index or as a \code{character} name.
#'
#' Otherwise passed to the \code{\link[=filter,featureAnnotations-method]{featureAnnotations}} method.
#'
#' @templateVar normParam normalizeScores
#' @templateVar excludeParam excludeNormScores
#' @template norm-args
#'
#' @seealso The \code{\link{featureAnnotations}} base class for more relevant methods and
#' \code{\link{generateFormulas}}.
#'
#' @templateVar class formulas
#' @template class-hierarchy
#'
#' @export
formulas <- setClass("formulas", slots = c(featureFormulas = "list"),
contains = "featureAnnotations")
setMethod("initialize", "formulas", function(.Object, ...)
{
.Object <- callNextMethod(.Object, ...)
# NOTE/UNDONE: change this whenever we want formula scoring normalization
# based on _all_ results generated by the algorithm (ie like compounds)
if (length(.Object@groupAnnotations) > 0)
{
.Object@scoreRanges <- sapply(.Object@groupAnnotations, calculateFormScoreRanges, mergedConsensusNames(.Object),
simplify = FALSE)
.Object@scoreTypes <- unique(unlist(lapply(.Object@scoreRanges, names)))
}
.Object@scoreRanges <- makeEmptyListNamed(.Object@scoreRanges)
.Object@scoreTypes <- makeEmptyListNamed(.Object@scoreTypes)
.Object@featureFormulas <- makeEmptyListNamed(.Object@featureFormulas)
.Object@groupAnnotations <- lapply(.Object@groupAnnotations, function(ann) ann[, UID := neutral_formula])
return(.Object)
})
#' @rdname formulas-class
formulasConsensus <- setClass("formulasConsensus",
slots = c(mergedConsensusNames = "character"),
contains = "formulas")
setMethod("mergedConsensusNames", "formulasConsensus", function(obj) obj@mergedConsensusNames)
#' @describeIn formulas Accessor method to obtain generated formulae.
#'
#' @param features If \code{TRUE} returns formula data for features, otherwise
#' for feature groups.
#'
#' @return \code{annotations} returns a \code{list} containing for each feature
#' group (or feature if \code{features=TRUE}) a \code{\link{data.table}}
#' with an overview of all generated formulae and other data such as candidate
#' scoring and MS/MS fragments.
#'
#' @export
setMethod("annotations", "formulas", function(obj, features = FALSE) if (features) obj@featureFormulas else callNextMethod())
#' @templateVar class formulas
#' @templateVar what analyses
#' @template strmethod
#' @export
setMethod("analyses", "formulas", function(obj) names(obj@featureFormulas))
#' @describeIn formulas Returns default scorings that are excluded from normalization.
#' @export
setMethod("defaultExclNormScores", "formulas", function(obj) character())
#' @describeIn formulas Show summary information for this object.
#' @export
setMethod("show", "formulas", function(object)
{
callNextMethod()
ft <- annotations(object, TRUE)
hasFeatForms <- length(ft) > 0
ftcounts <- if (hasFeatForms) recursiveApplyDT(ft, function(x) length(unique(x$neutral_formula)), sapply) else 0
ma <- mean(sapply(ftcounts, sum))
mft <- mean(sapply(ftcounts, mean))
printf("Formulas assigned to features:\n")
printf(" - Total formula count: %d\n", sum(unlist(ftcounts)))
printf(" - Average formulas per analysis: %.1f\n", ma)
printf(" - Average formulas per feature: %.1f\n", mft)
gft <- annotations(object)
mfg <- if (length(gft) > 0) sapply(gft, function(ft) length(unique(ft$neutral_formula))) else 0
printf("Formulas assigned to feature groups:\n")
printf(" - Total formula count: %d\n", sum(mfg))
printf(" - Average formulas per feature group: %.1f\n", mean(mfg))
})
#' @describeIn formulas Extracts a formula table, either for a feature group or for features in an analysis.
#' @export
setMethod("[[", c("formulas", "ANY", "ANY"), function(x, i, j)
{
assertExtractArg(i)
if (!missing(j))
assertExtractArg(j)
if (!missing(j))
{
# both arguments specified, return feature formula table
if (length(x@featureFormulas) == 0)
stop("This object does not contain formulas for features.")
if (!is.character(i))
i <- analyses(x)[i]
if (!is.character(j))
j <- groupNames(x)[j]
return(x@featureFormulas[[c(i, j)]])
}
return(callNextMethod())
})
#' @rdname formulas-class
#' @export
setMethod("delete", "formulas", function(obj, i = NULL, j = NULL, ...)
{
old <- obj
obj <- callNextMethod()
# sync featureFormulas
# NOTE: these may contain results for other groups than the groupAnnotations --> just remove the removed groups
rmGroups <- setdiff(groupNames(old), groupNames(obj))
if (length(rmGroups) > 0)
{
obj@featureFormulas <- lapply(obj@featureFormulas, function(a) pruneList(a[!names(a) %chin% rmGroups]))
obj@featureFormulas <- pruneList(obj@featureFormulas, TRUE)
}
return(obj)
})
#' @describeIn formulas Generates a table with all candidate formulae for each feature group and other information such
#' as element counts.
#'
#' @param average If set to \code{TRUE} an 'average formula' is generated for each feature group by combining all
#' elements from all candidates and averaging their amounts. This obviously leads to non-existing formulae, however,
#' this data may be useful to deal with multiple candidate formulae per feature group when performing elemental
#' characterization. Setting this to \code{TRUE} disables reporting of most other data.
#' @param fGroups,fragments,countElements,countFragElements,OM Passed to the
#' \code{\link[=as.data.table,featureAnnotations-method]{featureAnnotations}} method.
#'
#' @export
setMethod("as.data.table", "formulas", function(x, fGroups = NULL, fragments = FALSE, countElements = NULL,
countFragElements = NULL, OM = FALSE, normalizeScores = "none",
excludeNormScores = defaultExclNormScores(x), average = FALSE)
{
checkmate::assertFlag(average)
ret <- callNextMethod(x, fGroups = fGroups, fragments = fragments, countElements = countElements,
countFragElements = countFragElements, OM = OM, normalizeScores = normalizeScores,
excludeNormScores = excludeNormScores)
if (average)
{
ret[, formula_avg_count := .N, by = "group"]
avgCols <- getAllMergedConsCols(c("ion_formula", "neutral_formula"), names(ret), mergedConsensusNames(x))
ret[, (avgCols) := lapply(.SD, function(f)
{
f <- f[!is.na(f)]
if (length(f) == 0)
return(NA_character_)
return(averageFormulas(unique(f)))
}), .SDcols = avgCols, by = "group"]
# just keep the essential columns as the rest doesn't make too much sense anymore
keepCols <- c("group", avgCols)
ret <- ret[, keepCols, with = FALSE]
ret <- unique(ret, by = c("group", "neutral_formula"))
# update if needed
ret <- addElementInfoToAnnTable(ret, countElements, countFragElements, OM, TRUE)
}
return(ret[])
})
#' @describeIn formulas Returns an MS/MS peak list annotated with data from a
#' given candidate formula.
#'
#' @param onlyAnnotated Set to \code{TRUE} to filter out any peaks that could
#' not be annotated.
#'
#' @export
setMethod("annotatedPeakList", "formulas", function(obj, index, groupName, analysis = NULL, MSPeakLists,
onlyAnnotated = FALSE)
{
# NOTE: keep args in sync with sets method
ac <- checkmate::makeAssertCollection()
checkmate::assertCount(index, positive = TRUE, add = ac)
assertChoiceSilent(groupName, groupNames(obj), add = ac)
checkmate::assertString(analysis, min.chars = 1, null.ok = TRUE, add = ac)
checkmate::assertClass(MSPeakLists, "MSPeakLists", add = ac)
checkmate::assertFlag(onlyAnnotated, add = ac)
checkmate::reportAssertions(ac)
if (!is.null(analysis))
return(doAnnotatePeakList(MSPeakLists[[analysis, groupName]][["MSMS"]],
annotations(obj, TRUE)[[analysis, groupName]], index, onlyAnnotated))
return(doAnnotatePeakList(MSPeakLists[[groupName]][["MSMS"]], annotations(obj)[[groupName]], index,
onlyAnnotated))
})
#' @describeIn formulas Plots an annotated spectrum for a given candidate formula of a feature or feature group. Two
#' spectra can be compared by specifying a two-sized vector for the \code{index}, \code{groupName} and (if desired)
#' \code{analysis} arguments.
#'
#' @param analysis A \code{character} specifying the analysis (or analyses when comparing spectra) for which the
#' annotated spectrum should be plotted. If \code{NULL} then annotation results for the complete feature group will be
#' plotted.
#' @param title The title of the plot. Set to \code{NULL} for an automatically generated title.
#'
#' @template plotSpec-args
#'
#' @template specSimParams-arg
#'
#' @template plot-lim
#'
#' @template fsubscript_source
#'
#' @export
setMethod("plotSpectrum", "formulas", function(obj, index, groupName, analysis = NULL, MSPeakLists,
title = NULL, specSimParams = getDefSpecSimParams(),
mincex = 0.9, xlim = NULL, ylim = NULL, ...)
{
ac <- checkmate::makeAssertCollection()
checkmate::assertIntegerish(index, lower = 1, min.len = 1, max.len = 2, any.missing = FALSE, add = ac)
checkmate::assertCharacter(groupName, min.len = 1, max.len = 2, min.chars = 1, add = ac)
checkmate::assertCharacter(analysis, min.len = 1, max.len = 2, min.chars = 1, null.ok = TRUE, add = ac)
if (length(index) != length(groupName))
stop("Lengths of index and groupName should be equal.")
if (!is.null(analysis) && length(analysis) != length(groupName))
stop("Lengths of analysis and groupName should be equal.")
assertSpecSimParams(specSimParams, add = ac)
checkmate::assertClass(MSPeakLists, "MSPeakLists", add = ac)
checkmate::assertString(title, null.ok = TRUE, add = ac)
checkmate::assertNumber(mincex, lower = 0, finite = TRUE, add = ac)
assertXYLim(xlim, ylim, add = ac)
checkmate::reportAssertions(ac)
if (length(groupName) == 1)
{
spec <- annotatedPeakList(obj, index, groupName, analysis, MSPeakLists)
if (is.null(spec))
return(NULL)
if (index > nrow(obj[[groupName]]))
stop(sprintf("Specified candidate index out of range %d/%d", index, nrow(obj[[groupName]])), call. = FALSE)
if (is.null(title))
title <- subscriptFormula(obj[[groupName]]$neutral_formula[index])
makeMSPlot(getMSPlotData(spec, 2), mincex, xlim, ylim, ..., main = title)
}
else
{
for (i in seq_len(2))
{
if (is.null(obj[[groupName[i]]]))
stop(paste("No data for specified feature group:", groupName[i]), call. = FALSE)
if (index[i] > nrow(obj[[groupName[i]]]))
stop(sprintf("Specified candidate index out of range %d/%d", index[i], nrow(obj[[groupName[i]]])),
call. = FALSE)
}
if (is.null(title))
title <- subscriptFormula(obj[[groupName[1]]]$neutral_formula[index[1]],
formulas2 = obj[[groupName[2]]]$neutral_formula[index])
binnedPLs <- getBinnedPLPair(MSPeakLists, groupName, analysis, 2, specSimParams, "unique", mustExist = TRUE)
topSpec <- mergeBinnedAndAnnPL(binnedPLs[[1]], annotatedPeakList(obj, index[1], groupName[1], analysis[1],
MSPeakLists), 1)
bottomSpec <- mergeBinnedAndAnnPL(binnedPLs[[2]], annotatedPeakList(obj, index[2], groupName[2],
analysis[2], MSPeakLists), 2)
plotData <- getMSPlotDataOverlay(list(topSpec, bottomSpec), TRUE, FALSE, 2, "overlap")
makeMSPlotOverlay(plotData, title, mincex, xlim, ylim, ...)
}
})
setMethod("plotSpectrumHash", "formulas", function(obj, index, groupName, analysis = NULL, MSPeakLists,
title = NULL, specSimParams = getDefSpecSimParams(),
mincex = 0.9, xlim = NULL, ylim = NULL, ...)
{
if (length(groupName) > 1)
{
# recursive call for both candidates
args <- list(obj = obj, MSPeakLists = MSPeakLists, title = title, specSimParams = specSimParams,
mincex = mincex, xlim = xlim, ylim = ylim, ...)
return(makeHash(do.call(plotSpectrumHash, c(args, list(index = index[1], groupName = groupName[1],
analysis = analysis[1]))),
do.call(plotSpectrumHash, c(args, list(index = index[2], groupName = groupName[2],
analysis = analysis[2])))))
}
formTable <- annotations(obj)[[groupName]]
fRow <- if (is.null(formTable) || nrow(formTable) == 0) NULL else formTable[index]
return(makeHash(fRow, getSpec(MSPeakLists, groupName, 2, NULL), title, mincex, xlim, ylim, ...))
})
#' @describeIn formulas Plots a barplot with scoring of a candidate formula.
#'
#' @export
setMethod("plotScores", "formulas", function(obj, index, groupName, analysis = NULL, normalizeScores = "max",
excludeNormScores = defaultExclNormScores(obj))
{
ac <- checkmate::makeAssertCollection()
checkmate::assertCount(index, positive = TRUE, add = ac)
checkmate::assertString(groupName, min.chars = 1, add = ac)
checkmate::assertString(analysis, min.chars = 1, null.ok = TRUE, add = ac)
checkmate::assertChoice(normalizeScores, c("none", "max", "minmax"))
checkmate::assertCharacter(excludeNormScores, min.chars = 1, null.ok = TRUE, add = ac)
checkmate::reportAssertions(ac)
if (is.null(analysis))
annTable <- annotations(obj)[[groupName]]
else
annTable <- annotations(obj, TRUE)[[analysis, groupName]]
if (is.null(annTable) || nrow(annTable) == 0 || index > nrow(annTable))
return(NULL)
mcn <- mergedConsensusNames(obj, FALSE)
mcnAll <- mergedConsensusNames(obj, TRUE)
if (length(mcn) == 0)
mcn <- mcnAll # HACK: only split set scores if not consensus
if (normalizeScores != "none")
annTable <- normalizeAnnScores(annTable, annScoreNames(obj, TRUE), obj@scoreRanges[[groupName]], mcnAll,
normalizeScores == "minmax", excludeNormScores)
scoreCols <- getAllMergedConsCols(annScoreNames(obj, FALSE), names(annTable), mcnAll)
makeScoresPlot(annTable[index, scoreCols, with = FALSE], mcn)
})
setMethod("plotScoresHash", "formulas", function(obj, index, groupName, analysis = NULL, normalizeScores = "max",
excludeNormScores = defaultExclNormScores(obj))
{
if (is.null(analysis))
annTable <- annotations(obj)[[groupName]]
else
annTable <- annotations(obj, TRUE)[[analysis, groupName]]
if (is.null(annTable) || nrow(annTable) == 0 || index > nrow(annTable))
annTable <- NULL
else if (normalizeScores == "none")
annTable <- annTable[index]
return(makeHash(index, annTable, normalizeScores, excludeNormScores))
})
#' @templateVar what formulas
#' @template consensus-form_comp
#'
#' @templateVar what formulas
#' @template consensus-common-args
#'
#' @param labels A \code{character} with names to use for labelling. If \code{NULL} labels are automatically generated.
#'
#' @return \code{consensus} returns a \code{formulas} object that is produced by
#' merging results from multiple \code{formulas} objects.
#'
#' @export
setMethod("consensus", "formulas", function(obj, ..., absMinAbundance = NULL, relMinAbundance = NULL,
uniqueFrom = NULL, uniqueOuter = FALSE, rankWeights = 1, labels = NULL)
{
# NOTE: keep args in sync with formulasSet method
allFormulas <- c(list(obj), list(...))
ac <- checkmate::makeAssertCollection()
checkmate::assertList(allFormulas, types = "formulas", min.len = 2, any.missing = FALSE,
unique = TRUE, .var.name = "...", add = ac)
checkmate::assertNumber(rankWeights, lower = 0, finite = TRUE, add = ac)
checkmate::assertCharacter(labels, min.chars = 1, len = length(allFormulas), null.ok = TRUE, add = ac)
checkmate::reportAssertions(ac)
labels <- prepareConsensusLabels(obj, ..., labels = labels)
assertConsCommonArgs(absMinAbundance, relMinAbundance, uniqueFrom, uniqueOuter, labels)
cons <- doFeatAnnConsensus(obj, ..., rankWeights = rankWeights, annNames = labels,
uniqueCols = c("neutral_formula", "error", "error_median", "dbe", "neutralMass",
"RF_SIRFP", "LC50_SIRFP"))
ret <- formulasConsensus(groupAnnotations = cons, featureFormulas = list(),
algorithm = paste0(unique(sapply(allFormulas, algorithm)), collapse = ","),
mergedConsensusNames = labels)
ret <- filterFeatAnnConsensus(ret, absMinAbundance, relMinAbundance, uniqueFrom, uniqueOuter, FALSE)
return(ret)
})
#' Automatic chemical formula generation
#'
#' Automatically calculate chemical formulae for all feature groups.
#'
#' Several algorithms are provided to automatically generate formulae for given feature groups. All algorithms use the
#' accurate mass of a feature to back-calculate candidate formulae. Depending on the algorithm and data availability,
#' other data such as isotopic pattern and MS/MS fragments may be used to further improve formula assignment and
#' ranking.
#'
#' @templateVar func generateFormulas
#' @templateVar what generate formulae
#' @templateVar ex1 generateFormulasDA
#' @templateVar ex2 generateFormulasGenForm
#' @templateVar algos bruker,genform,sirius
#' @templateVar algosSuffix DA,GenForm,SIRIUS
#' @templateVar ret formulas
#' @template generic-algo
#'
#' @param fGroups \code{\link{featureGroups}} object for which formulae should be generated. This should be the same or
#' a subset of the object that was used to create the specified \code{MSPeakLists}. In the case of a subset only the
#' remaining feature groups in the subset are considered.
#' @param MSPeakLists An \code{\link{MSPeakLists}} object that was generated for the supplied \code{fGroups}.
#' @param \dots Any parameters to be passed to the selected formula generation algorithm.
#'
#' @section Candidate assignment: Formula candidate assignment occurs in one of the following ways: \itemize{
#'
#' \item Candidates are first generated for each feature and then pooled to form consensus candidates for the feature
#' group.
#'
#' \item Candidates are directly generated for each feature group by group averaged MS peak list data.
#'
#' }
#'
#' With approach (1), scorings and mass errors are averaged and outliers are removed (controlled by
#' \code{featThreshold} and \code{featThresholdAnn} arguments). Other candidate properties that cannot be averaged are
#' from the feature from the analysis as specified in the \code{"analysis"} column of the results. The second approach only generates candidate formulae once for every feature group, and is therefore generally much
#' faster. However, this inherently prevents removal of outliers.
#'
#' Note that with either approach subsequent workflow steps that use formula data (\emph{e.g.}
#' \code{\link{addFormulaScoring}} and \link{reporting} functions) only use formula data that was eventually assigned
#' to feature groups.
#'
#' @section Scorings: Each algorithm implements their own scoring system. Their names have been harmonized where
#' possible. An overview is obtained with the \code{\link{formulaScorings}} function:
#' \Sexpr[results=rd,echo=FALSE,stage=build]{patRoon:::tabularRD(patRoon::formulaScorings())}
#'
#' @templateVar UID neutral formula
#' @template featAnnSets-gen
#'
#' @return A \code{\link{formulas}} object containing all generated formulae.
#'
#' @seealso The \href{https://www.researchgate.net/publication/307964728_MOLGEN-MSMS_Software_User_Manual}{GenForm
#' manual} (also known as MOLGEN-MSMS).
#'
#' @templateVar what generateFormulas
#' @template main-rd-method
#' @export
setMethod("generateFormulas", "featureGroups", function(fGroups, MSPeakLists, algorithm, ...)
{
ac <- checkmate::makeAssertCollection()
checkmate::assertClass(MSPeakLists, "MSPeakLists", add = ac)
checkmate::assertChoice(algorithm, c("bruker", "genform", "sirius"))
checkmate::reportAssertions(ac)
f <- switch(algorithm,
bruker = generateFormulasDA,
genform = generateFormulasGenForm,
sirius = generateFormulasSIRIUS)
f(fGroups, MSPeakLists, ...)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.