R/formulas.R

#' @include main.R
#' @include feature_annotations.R
NULL

#' Formula annotations class
#'
#' Contains data of generated chemical formulae for given feature groups.
#'
#' \code{formulas} objects are obtained with \code{\link{generateFormulas}}. This class is derived from the
#' \code{\link{featureAnnotations}} class, please see its documentation for more methods and other details.
#'
#' @slot featureFormulas A \code{list} with all generated formulae for each analysis/feature group. Use the
#'   \code{annotations} method for access.
#'
#' @param obj,x,object The \code{formulas} object.
#' @param \dots For \code{plotSpectrum}: Further arguments passed to \code{\link[graphics]{plot}}.
#'
#'   For \code{delete}: passed to the function specified as \code{j}.
#'
#'   For \code{consensus}: Any further (and unique) \code{formulas} objects.
#'
#'   \setsPassedArgs1{formulas}
#' @param index The candidate index (row). For \code{plotSpectrum} two indices can be specified to compare spectra. In
#'   this case \code{groupName} and \code{analysis} (if not \code{NULL}) should specify values for the spectra to
#'   compare.
#' @param i,j For \code{[[}: If both \code{i} and \code{j} are specified then \code{i} specifies the analysis and
#'   \code{j} the feature group of the feature for which annotations should be returned. Otherwise \code{i} specifies
#'   the feature group for which group annotations should be returned. \code{i}/\code{j} can be specified as
#'   \code{integer} index or as a \code{character} name.
#'
#'   Otherwise passed to the \code{\link[=filter,featureAnnotations-method]{featureAnnotations}} method.
#'
#' @templateVar normParam normalizeScores
#' @templateVar excludeParam excludeNormScores
#' @template norm-args
#'
#' @seealso The \code{\link{featureAnnotations}} base class for more relevant methods and
#'   \code{\link{generateFormulas}}.
#'
#' @templateVar class formulas
#' @template class-hierarchy
#'
#' @export
formulas <- setClass("formulas", slots = c(featureFormulas = "list"),
                     contains = "featureAnnotations")

setMethod("initialize", "formulas", function(.Object, ...)
{
    .Object <- callNextMethod(.Object, ...)

    # NOTE/UNDONE: change this whenever we want formula scoring normalization
    # based on _all_ results generated by the algorithm (ie like compounds)
    if (length(.Object@groupAnnotations) > 0)
    {
        .Object@scoreRanges <- sapply(.Object@groupAnnotations, calculateFormScoreRanges, mergedConsensusNames(.Object),
                                      simplify = FALSE)
        .Object@scoreTypes <- unique(unlist(lapply(.Object@scoreRanges, names)))
    }
    
    .Object@scoreRanges <- makeEmptyListNamed(.Object@scoreRanges)
    .Object@scoreTypes <- makeEmptyListNamed(.Object@scoreTypes)
    
    .Object@featureFormulas <- makeEmptyListNamed(.Object@featureFormulas)
    
    .Object@groupAnnotations <- lapply(.Object@groupAnnotations, function(ann) ann[, UID := neutral_formula])
    
    return(.Object)
})

#' @rdname formulas-class
formulasConsensus <- setClass("formulasConsensus",
                              slots = c(mergedConsensusNames = "character"),
                              contains = "formulas")
setMethod("mergedConsensusNames", "formulasConsensus", function(obj) obj@mergedConsensusNames)


#' @describeIn formulas Accessor method to obtain generated formulae.
#'
#' @param features If \code{TRUE} returns formula data for features, otherwise
#'   for feature groups.
#'
#' @return \code{annotations} returns a \code{list} containing for each feature
#'   group (or feature if \code{features=TRUE}) a \code{\link{data.table}}
#'   with an overview of all generated formulae and other data such as candidate
#'   scoring and MS/MS fragments.
#'
#' @export
setMethod("annotations", "formulas", function(obj, features = FALSE) if (features) obj@featureFormulas else callNextMethod())

#' @templateVar class formulas
#' @templateVar what analyses
#' @template strmethod
#' @export
setMethod("analyses", "formulas", function(obj) names(obj@featureFormulas))

#' @describeIn formulas Returns default scorings that are excluded from normalization.
#' @export
setMethod("defaultExclNormScores", "formulas", function(obj) character())

#' @describeIn formulas Show summary information for this object.
#' @export
setMethod("show", "formulas", function(object)
{
    callNextMethod()
    
    ft <- annotations(object, TRUE)
    hasFeatForms <- length(ft) > 0
    ftcounts <- if (hasFeatForms) recursiveApplyDT(ft, function(x) length(unique(x$neutral_formula)), sapply) else 0
    ma <- mean(sapply(ftcounts, sum))
    mft <- mean(sapply(ftcounts, mean))
    printf("Formulas assigned to features:\n")
    printf("  - Total formula count: %d\n", sum(unlist(ftcounts)))
    printf("  - Average formulas per analysis: %.1f\n", ma)
    printf("  - Average formulas per feature: %.1f\n", mft)
    
    gft <- annotations(object)
    mfg <- if (length(gft) > 0) sapply(gft, function(ft) length(unique(ft$neutral_formula))) else 0
    printf("Formulas assigned to feature groups:\n")
    printf("  - Total formula count: %d\n", sum(mfg))
    printf("  - Average formulas per feature group: %.1f\n", mean(mfg))
})

#' @describeIn formulas Extracts a formula table, either for a feature group or for features in an analysis.
#' @export
setMethod("[[", c("formulas", "ANY", "ANY"), function(x, i, j)
{
    assertExtractArg(i)
    if (!missing(j))
        assertExtractArg(j)
    
    if (!missing(j))
    {
        # both arguments specified, return feature formula table
        
        if (length(x@featureFormulas) == 0)
            stop("This object does not contain formulas for features.")
        
        if (!is.character(i))
            i <- analyses(x)[i]
        
        if (!is.character(j))
            j <- groupNames(x)[j]
        
        return(x@featureFormulas[[c(i, j)]])
    }
    
    return(callNextMethod())
})

#' @rdname formulas-class
#' @export
setMethod("delete", "formulas", function(obj, i = NULL, j = NULL, ...)
{
    old <- obj
    obj <- callNextMethod()
    
    # sync featureFormulas
    # NOTE: these may contain results for other groups than the groupAnnotations --> just remove the removed groups
    rmGroups <- setdiff(groupNames(old), groupNames(obj))
    if (length(rmGroups) > 0)
    {
        obj@featureFormulas <- lapply(obj@featureFormulas, function(a) pruneList(a[!names(a) %chin% rmGroups]))
        obj@featureFormulas <- pruneList(obj@featureFormulas, TRUE)
    }
    
    return(obj)
})

#' @describeIn formulas Generates a table with all candidate formulae for each feature group and other information such
#'   as element counts.
#'
#' @param average If set to \code{TRUE} an 'average formula' is generated for each feature group by combining all
#'   elements from all candidates and averaging their amounts. This obviously leads to non-existing formulae, however,
#'   this data may be useful to deal with multiple candidate formulae per feature group when performing elemental
#'   characterization. Setting this to \code{TRUE} disables reporting of most other data.
#' @param fGroups,fragments,countElements,countFragElements,OM Passed to the
#'   \code{\link[=as.data.table,featureAnnotations-method]{featureAnnotations}} method.
#'
#' @export
setMethod("as.data.table", "formulas", function(x, fGroups = NULL, fragments = FALSE, countElements = NULL,
                                                countFragElements = NULL, OM = FALSE, normalizeScores = "none",
                                                excludeNormScores = defaultExclNormScores(x), average = FALSE)
{
    checkmate::assertFlag(average)
    
    ret <- callNextMethod(x, fGroups = fGroups, fragments = fragments, countElements = countElements,
                          countFragElements = countFragElements, OM = OM, normalizeScores = normalizeScores,
                          excludeNormScores = excludeNormScores)
    
    if (average)
    {
        ret[, formula_avg_count := .N, by = "group"]
        
        avgCols <- getAllMergedConsCols(c("ion_formula", "neutral_formula"), names(ret), mergedConsensusNames(x))
        ret[, (avgCols) := lapply(.SD, function(f)
        {
            f <- f[!is.na(f)]
            if (length(f) == 0)
                return(NA_character_)
            return(averageFormulas(unique(f)))
        }), .SDcols = avgCols, by = "group"]
        
        # just keep the essential columns as the rest doesn't make too much sense anymore
        keepCols <- c("group", avgCols)
        ret <- ret[, keepCols, with = FALSE]
        ret <- unique(ret, by = c("group", "neutral_formula"))
        
        # update if needed
        ret <- addElementInfoToAnnTable(ret, countElements, countFragElements, OM, TRUE)
    }
    
    return(ret[])
})

#' @describeIn formulas Returns an MS/MS peak list annotated with data from a
#'   given candidate formula.
#'
#' @param onlyAnnotated Set to \code{TRUE} to filter out any peaks that could
#'   not be annotated.
#'
#' @export
setMethod("annotatedPeakList", "formulas", function(obj, index, groupName, analysis = NULL, MSPeakLists,
                                                    onlyAnnotated = FALSE)
{
    # NOTE: keep args in sync with sets method
    
    ac <- checkmate::makeAssertCollection()
    checkmate::assertCount(index, positive = TRUE, add = ac)
    assertChoiceSilent(groupName, groupNames(obj), add = ac)
    checkmate::assertString(analysis, min.chars = 1, null.ok = TRUE, add = ac)
    checkmate::assertClass(MSPeakLists, "MSPeakLists", add = ac)
    checkmate::assertFlag(onlyAnnotated, add = ac)
    checkmate::reportAssertions(ac)
    
    if (!is.null(analysis))
        return(doAnnotatePeakList(MSPeakLists[[analysis, groupName]][["MSMS"]],
                                  annotations(obj, TRUE)[[analysis, groupName]], index, onlyAnnotated))
    
    return(doAnnotatePeakList(MSPeakLists[[groupName]][["MSMS"]], annotations(obj)[[groupName]], index,
                              onlyAnnotated))
})

#' @describeIn formulas Plots an annotated spectrum for a given candidate formula of a feature or feature group. Two
#'   spectra can be compared by specifying a two-sized vector for the \code{index}, \code{groupName} and (if desired)
#'   \code{analysis} arguments.
#'
#' @param analysis A \code{character} specifying the analysis (or analyses when comparing spectra) for which the
#'   annotated spectrum should be plotted. If \code{NULL} then annotation results for the complete feature group will be
#'   plotted.
#' @param title The title of the plot. Set to \code{NULL} for an automatically generated title.
#'
#' @template plotSpec-args
#'
#' @template specSimParams-arg
#'
#' @template plot-lim
#'
#' @template fsubscript_source
#'
#' @export
setMethod("plotSpectrum", "formulas", function(obj, index, groupName, analysis = NULL, MSPeakLists,
                                               title = NULL, specSimParams = getDefSpecSimParams(),
                                               mincex = 0.9, xlim = NULL, ylim = NULL, ...)
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertIntegerish(index, lower = 1, min.len = 1, max.len = 2, any.missing = FALSE, add = ac)
    checkmate::assertCharacter(groupName, min.len = 1, max.len = 2, min.chars = 1, add = ac)
    checkmate::assertCharacter(analysis, min.len = 1, max.len = 2, min.chars = 1, null.ok = TRUE, add = ac)
    if (length(index) != length(groupName))
        stop("Lengths of index and groupName should be equal.")
    if (!is.null(analysis) && length(analysis) != length(groupName))
        stop("Lengths of analysis and groupName should be equal.")
    assertSpecSimParams(specSimParams, add = ac)
    checkmate::assertClass(MSPeakLists, "MSPeakLists", add = ac)
    checkmate::assertString(title, null.ok = TRUE, add = ac)
    checkmate::assertNumber(mincex, lower = 0, finite = TRUE, add = ac)
    assertXYLim(xlim, ylim, add = ac)
    checkmate::reportAssertions(ac)
    
    if (length(groupName) == 1)
    {
        spec <- annotatedPeakList(obj, index, groupName, analysis, MSPeakLists)
        if (is.null(spec))
            return(NULL)
        
        if (index > nrow(obj[[groupName]]))
            stop(sprintf("Specified candidate index out of range %d/%d", index, nrow(obj[[groupName]])), call. = FALSE)

        if (is.null(title))
            title <- subscriptFormula(obj[[groupName]]$neutral_formula[index])
        
        makeMSPlot(getMSPlotData(spec, 2), mincex, xlim, ylim, ..., main = title)
    }
    else
    {
        for (i in seq_len(2))
        {
            if (is.null(obj[[groupName[i]]]))
                stop(paste("No data for specified feature group:", groupName[i]), call. = FALSE)
            if (index[i] > nrow(obj[[groupName[i]]]))
                stop(sprintf("Specified candidate index out of range %d/%d", index[i], nrow(obj[[groupName[i]]])),
                     call. = FALSE)
        }
        
        if (is.null(title))
            title <- subscriptFormula(obj[[groupName[1]]]$neutral_formula[index[1]],
                                      formulas2 = obj[[groupName[2]]]$neutral_formula[index])
        
        binnedPLs <- getBinnedPLPair(MSPeakLists, groupName, analysis, 2, specSimParams, "unique", mustExist = TRUE)
        
        topSpec <- mergeBinnedAndAnnPL(binnedPLs[[1]], annotatedPeakList(obj, index[1], groupName[1], analysis[1],
                                                                         MSPeakLists), 1)
        
        bottomSpec <- mergeBinnedAndAnnPL(binnedPLs[[2]], annotatedPeakList(obj, index[2], groupName[2],
                                                                            analysis[2], MSPeakLists), 2)
        plotData <- getMSPlotDataOverlay(list(topSpec, bottomSpec), TRUE, FALSE, 2, "overlap")
        makeMSPlotOverlay(plotData, title, mincex, xlim, ylim, ...)
    }
})

setMethod("plotSpectrumHash", "formulas", function(obj, index, groupName, analysis = NULL, MSPeakLists,
                                                     title = NULL, specSimParams = getDefSpecSimParams(),
                                                     mincex = 0.9, xlim = NULL, ylim = NULL, ...)
{
    if (length(groupName) > 1)
    {
        # recursive call for both candidates
        args <- list(obj = obj, MSPeakLists = MSPeakLists, title = title, specSimParams = specSimParams,
                     mincex = mincex, xlim = xlim, ylim = ylim, ...)
        return(makeHash(do.call(plotSpectrumHash, c(args, list(index = index[1], groupName = groupName[1],
                                                               analysis = analysis[1]))),
                        do.call(plotSpectrumHash, c(args, list(index = index[2], groupName = groupName[2],
                                                               analysis = analysis[2])))))
    }
    
    formTable <- annotations(obj)[[groupName]]
    fRow <- if (is.null(formTable) || nrow(formTable) == 0) NULL else formTable[index]
    
    return(makeHash(fRow, getSpec(MSPeakLists, groupName, 2, NULL), title, mincex, xlim, ylim, ...))
})

#' @describeIn formulas Plots a barplot with scoring of a candidate formula.
#'
#' @export
setMethod("plotScores", "formulas", function(obj, index, groupName, analysis = NULL, normalizeScores = "max",
                                             excludeNormScores = defaultExclNormScores(obj))
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertCount(index, positive = TRUE, add = ac)
    checkmate::assertString(groupName, min.chars = 1, add = ac)
    checkmate::assertString(analysis, min.chars = 1, null.ok = TRUE, add = ac)
    checkmate::assertChoice(normalizeScores, c("none", "max", "minmax"))
    checkmate::assertCharacter(excludeNormScores, min.chars = 1, null.ok = TRUE, add = ac)
    checkmate::reportAssertions(ac)
    
    if (is.null(analysis))
        annTable <- annotations(obj)[[groupName]]
    else
        annTable <- annotations(obj, TRUE)[[analysis, groupName]]
    
    if (is.null(annTable) || nrow(annTable) == 0 || index > nrow(annTable))
        return(NULL)
    
    mcn <- mergedConsensusNames(obj, FALSE)
    mcnAll <- mergedConsensusNames(obj, TRUE)
    if (length(mcn) == 0)
        mcn <- mcnAll # HACK: only split set scores if not consensus
    
    if (normalizeScores != "none")
        annTable <- normalizeAnnScores(annTable, annScoreNames(obj, TRUE), obj@scoreRanges[[groupName]], mcnAll,
                                       normalizeScores == "minmax", excludeNormScores)
    
    scoreCols <- getAllMergedConsCols(annScoreNames(obj, FALSE), names(annTable), mcnAll)

    makeScoresPlot(annTable[index, scoreCols, with = FALSE], mcn)
})

setMethod("plotScoresHash", "formulas", function(obj, index, groupName, analysis = NULL, normalizeScores = "max",
                                                 excludeNormScores = defaultExclNormScores(obj))
{
    if (is.null(analysis))
        annTable <- annotations(obj)[[groupName]]
    else
        annTable <- annotations(obj, TRUE)[[analysis, groupName]]
    if (is.null(annTable) || nrow(annTable) == 0 || index > nrow(annTable))
        annTable <- NULL
    else if (normalizeScores == "none")
        annTable <- annTable[index]
    
    return(makeHash(index, annTable, normalizeScores, excludeNormScores))
})

#' @templateVar what formulas
#' @template consensus-form_comp
#'
#' @templateVar what formulas
#' @template consensus-common-args
#'
#' @param labels A \code{character} with names to use for labelling. If \code{NULL} labels are automatically generated.
#'
#' @return \code{consensus} returns a \code{formulas} object that is produced by
#'   merging results from multiple \code{formulas} objects.
#'
#' @export
setMethod("consensus", "formulas", function(obj, ..., absMinAbundance = NULL, relMinAbundance = NULL,
                                            uniqueFrom = NULL, uniqueOuter = FALSE, rankWeights = 1, labels = NULL)
{
    # NOTE: keep args in sync with formulasSet method
    
    allFormulas <- c(list(obj), list(...))
    
    ac <- checkmate::makeAssertCollection()
    checkmate::assertList(allFormulas, types = "formulas", min.len = 2, any.missing = FALSE,
                          unique = TRUE, .var.name = "...", add = ac)
    checkmate::assertNumber(rankWeights, lower = 0, finite = TRUE, add = ac)
    checkmate::assertCharacter(labels, min.chars = 1, len = length(allFormulas), null.ok = TRUE, add = ac)
    checkmate::reportAssertions(ac)
    
    labels <- prepareConsensusLabels(obj, ..., labels = labels)

    assertConsCommonArgs(absMinAbundance, relMinAbundance, uniqueFrom, uniqueOuter, labels)
    
    cons <- doFeatAnnConsensus(obj, ..., rankWeights = rankWeights, annNames = labels,
                               uniqueCols = c("neutral_formula", "error", "error_median", "dbe", "neutralMass",
                                              "RF_SIRFP", "LC50_SIRFP"))
    
    ret <- formulasConsensus(groupAnnotations = cons, featureFormulas = list(),
                             algorithm = paste0(unique(sapply(allFormulas, algorithm)), collapse = ","),
                             mergedConsensusNames = labels)
    
    ret <- filterFeatAnnConsensus(ret, absMinAbundance, relMinAbundance, uniqueFrom, uniqueOuter, FALSE)
    
    return(ret)
})

#' Automatic chemical formula generation
#'
#' Automatically calculate chemical formulae for all feature groups.
#'
#' Several algorithms are provided to automatically generate formulae for given feature groups. All algorithms use the
#' accurate mass of a feature to back-calculate candidate formulae. Depending on the algorithm and data availability,
#' other data such as isotopic pattern and MS/MS fragments may be used to further improve formula assignment and
#' ranking.
#'
#' @templateVar func generateFormulas
#' @templateVar what generate formulae
#' @templateVar ex1 generateFormulasDA
#' @templateVar ex2 generateFormulasGenForm
#' @templateVar algos bruker,genform,sirius
#' @templateVar algosSuffix DA,GenForm,SIRIUS
#' @templateVar ret formulas
#' @template generic-algo
#'
#' @param fGroups \code{\link{featureGroups}} object for which formulae should be generated. This should be the same or
#'   a subset of the object that was used to create the specified \code{MSPeakLists}. In the case of a subset only the
#'   remaining feature groups in the subset are considered.
#' @param MSPeakLists An \code{\link{MSPeakLists}} object that was generated for the supplied \code{fGroups}.
#' @param \dots Any parameters to be passed to the selected formula generation algorithm.
#'
#' @section Candidate assignment: Formula candidate assignment occurs in one of the following ways: \itemize{
#'
#'   \item Candidates are first generated for each feature and then pooled to form consensus candidates for the feature
#'   group.
#'
#'   \item Candidates are directly generated for each feature group by group averaged MS peak list data.
#'
#'   }
#'
#'   With approach (1), scorings and mass errors are averaged and outliers are removed (controlled by
#'   \code{featThreshold} and \code{featThresholdAnn} arguments). Other candidate properties that cannot be averaged are
#'   from the feature from the analysis as specified in the \code{"analysis"} column of the results. The second approach only generates candidate formulae once for every feature group, and is therefore generally much
#'   faster. However, this inherently prevents removal of outliers.
#'
#'   Note that with either approach subsequent workflow steps that use formula data (\emph{e.g.}
#'   \code{\link{addFormulaScoring}} and \link{reporting} functions) only use formula data that was eventually assigned
#'   to feature groups.
#'   
#' @section Scorings: Each algorithm implements their own scoring system. Their names have been harmonized where
#'   possible. An overview is obtained with the \code{\link{formulaScorings}} function:
#'   \Sexpr[results=rd,echo=FALSE,stage=build]{patRoon:::tabularRD(patRoon::formulaScorings())}
#'
#' @templateVar UID neutral formula
#' @template featAnnSets-gen
#'
#' @return A \code{\link{formulas}} object containing all generated formulae.
#'
#' @seealso The \href{https://www.researchgate.net/publication/307964728_MOLGEN-MSMS_Software_User_Manual}{GenForm
#'   manual} (also known as MOLGEN-MSMS).
#'
#' @templateVar what generateFormulas
#' @template main-rd-method
#' @export
setMethod("generateFormulas", "featureGroups", function(fGroups, MSPeakLists, algorithm, ...)
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertClass(MSPeakLists, "MSPeakLists", add = ac)
    checkmate::assertChoice(algorithm, c("bruker", "genform", "sirius"))
    checkmate::reportAssertions(ac)
    
    f <- switch(algorithm,
                bruker = generateFormulasDA,
                genform = generateFormulasGenForm,
                sirius = generateFormulasSIRIUS)
    
    f(fGroups, MSPeakLists, ...)
})
rickhelmus/patRoon documentation built on April 25, 2024, 8:15 a.m.