R/formulas-bruker.R

Defines functions getEmptyDAFragInfo simplifyDAFormula

#' @include main.R
#' @include utils-bruker.R
NULL

# NOTE: No coverage calculation for Bruker tools as they cannot be run on CI
# nocov start

simplifyDAFormula <- function(formula)
{
    # Remove spaces
    formula <- gsub(" ", "", formula)

    # Removes any element counts of 1: match all "1"'s after a character but not before another number
    return(gsub("(?<=[[:alpha:]])1{1}(?![[:digit:]])", "", formula, perl = TRUE))
}

getEmptyDAFragInfo <- function() data.table(mz = numeric(), ion_formula = character(), error = numeric(),
                                            mSigma = numeric(), dbe = numeric(), score = numeric())


#' Generate formula with Bruker DataAnalysis
#'
#' Uses Bruker DataAnalysis to generate chemical formulae.
#'
#' @templateVar algo Bruker DataAnalysis
#' @templateVar do generate formula candidates
#' @templateVar generic generateFormulas
#' @templateVar algoParam bruker
#' @template algo_generator
#'
#' @details This method supports scoring based on overlap between measured and theoretical isotopic patterns (both MS
#'   and MS/MS data) and the presence of 'fitting' MS/MS fragments. The method will iterate through all features (or
#'   "Compounds" in DataAnalysis terms) and call \command{SmartFormula} (and \command{SmartFormula3D} if MS/MS data is
#'   available) to generate all formulae. Parameters affecting formula calculation have to be set in advance within the
#'   DataAnalysis method for each analysis (\emph{e.g.} by \code{\link{setDAMethod}}).
#'
#'   This method requires that features were obtained with \code{\link{findFeaturesBruker}}. It is recommended, but not
#'   mandatory, that the \code{\link{MSPeakLists}} are also generated by DataAnalysis.
#'
#'   Calculation of formulae with DataAnalysis always occurs with the 'feature approach' (see \verb{Candidate
#'   assignment} in \code{\link{generateFormulas}}).
#'
#' @param precursorMzSearchWindow Search window for \emph{m/z} values (+/- the feature \emph{m/z}) used to find back
#'   feature data of precursor/parent ions from MS/MS spectra (this data is not readily available from
#'   \command{SmartFormula3D} results).
#' @param MSMode Whether formulae should be generated only from MS data (\code{"ms"}), MS/MS data (\code{"msms"}) or
#'   both (\code{"both"}). Selecting "both" will calculate formulae from MS data and MS/MS data and combines the results
#'   (duplicated formulae are removed). This is useful when poor MS/MS data would exclude proper candidates.
#'
#' @template adduct-arg
#' @templateVar algo bruker
#' @template form_algo-args
#'
#' @inheritParams generateFormulas
#'
#' @inherit generateFormulas return
#'
#' @template dasaveclose-args
#'
#' @template DA-restart-note
#'
#' @templateVar what generateFormulasDA
#' @template main-rd-method
#' @export
setMethod("generateFormulasDA", "featureGroups", function(fGroups, MSPeakLists, precursorMzSearchWindow = 0.002,
                                                          MSMode = "both", adduct = NULL, featThreshold = 0,
                                                          featThresholdAnn = 0.75, absAlignMzDev = 0.002, save = TRUE,
                                                          close = save)
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertClass(MSPeakLists, "MSPeakLists", add = ac)
    checkmate::assertNumber(precursorMzSearchWindow, lower = 0, finite = TRUE, add = ac)
    checkmate::assertChoice(MSMode, c("ms", "msms", "both"), add = ac)
    aapply(checkmate::assertNumber, . ~ featThreshold + featThresholdAnn + absAlignMzDev, lower = 0, upper = 1,
           fixed = list(add = ac))
    assertDACloseSaveArgs(close, save, add = ac)
    checkmate::reportAssertions(ac)

    adduct <- checkAndToAdduct(adduct, fGroups)
    fgAdd <- getFGroupAdducts(names(fGroups), annotations(fGroups), adduct, "generic")
    
    DA <- getDAApplication()
    anaInfo <- analysisInfo(fGroups)
    ftind <- groupFeatIndex(fGroups)
    gTable <- groupTable(fGroups)
    gCount <- length(fGroups)
    gNames <- names(fGroups)

    hideDAInScope()

    fts <- featureTable(fGroups)
    fTable <- list()

    cacheDB <- openCacheDBScope() # open manually so caching code doesn't need to on each R/W access
    setHash <- makeHash(fGroups, precursorMzSearchWindow, MSMode)
    cachedSet <- loadCacheSet("formulasBruker", setHash, cacheDB)
    formHashes <- vector("character", nrow(anaInfo) * gCount)
    formHashCount <- 0
    
    setFormColOrder <- function(dt)
    {
        setcolorder(dt, c("neutral_formula", "ion_formula", "neutralMass", "ion_formula_mz", "error", "mSigma", "dbe",
                          "score", "explainedPeaks", "fragInfo"))
    }

    for (anai in seq_len(nrow(anaInfo)))
    {
        ana <- anaInfo$analysis[anai]
        DAAnaInd <- getDAFileIndex(DA, ana, anaInfo$path[anai])
        checkDAFMFCompounds(DA, fts[[ana]], DAAnaInd, TRUE)

        printf("Loading all formulas from analysis '%s'...\n", ana)

        baseHash <- makeHash(fGroups, ana, precursorMzSearchWindow, MSMode)

        cmpds <- DA[["Analyses"]][[DAAnaInd]][["Compounds"]]

        prog <- openProgBar(0, gCount)

        for (grpi in seq_len(gCount))
        {
            grp <- gNames[grpi]
            fti <- ftind[[grp]][anai]

            if (fti == 0)
                next

            hash <- makeHash(baseHash, grp)
            formHashCount <- formHashCount + 1
            formHashes[formHashCount] <- hash

            flist <- NULL
            if (!is.null(cachedSet))
                flist <- cachedSet[[hash]]
            if (is.null(flist))
                flist <- loadCacheData("formulasBruker", hash, cacheDB)

            if (is.null(flist))
            {
                cmpd <- cmpds[[fts[[ana]]$ID[fti]]]

                ftable <- vector("list", gCount)
                ftableCount <- 0

                if (MSMode != "msms")
                {
                    cmpd$SmartFormula()

                    SMFResults <- cmpd[[1]][["SmartFormulaResults"]]
                    SMFResultsCount <- SMFResults[["Count"]]

                    if (SMFResultsCount > 0)
                    {
                        for (k in 1:SMFResultsCount)
                        {
                            SMFResult <- SMFResults[[k]]
                            SMFResultCount <- SMFResult[["Count"]]

                            if (SMFResultCount < 1 || all.equal(SMFResult[["m_over_z"]], fts[[ana]]$mz[fti]) != TRUE)
                                next

                            dt <- rbindlist(lapply(seq_len(SMFResultCount), function(resi)
                            {
                                SMFResultItem <- SMFResult[[resi]]
                                return(list(
                                    neutral_formula = simplifyDAFormula(SMFResultItem[["NeutralFormula"]]),
                                    ion_formula = simplifyDAFormula(SMFResultItem[["SumFormula"]]),
                                    ion_formula_mz = SMFResultItem[["m_over_z"]],
                                    error = SMFResultItem[["Error"]],
                                    mSigma = SMFResultItem[["Sigma"]] * 1000,
                                    dbe = SMFResultItem[["RingsAndDoubleBonds"]],
                                    score = SMFResultItem[["Score"]],
                                    fragInfo = list(getEmptyDAFragInfo())
                                ))
                            }))

                            dt <- addMiscFormulaInfo(dt, adduct)
                            dt <- setFormColOrder(dt)
                            
                            ftableCount <- ftableCount + 1
                            ftable[[ftableCount]] <- dt
                            break # Shouldn't be any more results
                        }
                    }
                }

                ftable3D <- vector("list", gCount)
                ftable3DCount <- 0

                if (MSMode != "ms" && cmpd[["Count"]] > 1)
                {
                    cmpd$SmartFormula3D()

                    SMF3DResults <- cmpd[[2]][["SmartFormula3DResults"]]
                    SMF3DResultsCount <- SMF3DResults[["Count"]]

                    if (SMF3DResultsCount > 0)
                    {
                        ftable3DFrag <- vector("list", SMF3DResultsCount)
                        ftable3DFragCount <- 0

                        for (k in 1:SMF3DResultsCount)
                        {
                            SMF3DResult <- SMF3DResults[[k]]
                            SMF3DResultCount <- SMF3DResult[["Count"]]
                            parent <- SMF3DResult[["ParentResults"]]

                            if (SMF3DResultCount < 1 || abs(parent[["m_over_z"]] - fts[[ana]]$mz[fti]) > precursorMzSearchWindow)
                                next
                            
                            form <- simplifyDAFormula(parent[["SumFormula"]])
                            nform <- calculateNeutralFormula(form, fgAdd$grpAdducts[[grp]])

                            dt <- data.table(neutral_formula = nform,
                                             ion_formula = form,
                                             ion_formula_mz = parent[["m_over_z"]],
                                             error = parent[["Error"]],
                                             mSigma = parent[["Sigma"]] * 1000,
                                             dbe = parent[["RingsAndDoubleBonds"]],
                                             score = parent[["Score"]])
                            
                            fi <- rbindlist(lapply(seq_len(SMF3DResultCount), function(resi)
                            {
                                frag <- SMF3DResult[[resi]]
                                fform <- simplifyDAFormula(frag[["SumFormula"]])
                                return(data.table(
                                    mz = frag[["m_over_z"]],
                                    ion_formula = fform,
                                    error = frag[["Error"]],
                                    mSigma = frag[["Sigma"]] * 1000,
                                    dbe = frag[["RingsAndDoubleBonds"]],
                                    score = frag[["Score"]]))
                            }))
                            dt[, fragInfo := list(fi)]

                            dt <- addMiscFormulaInfo(dt, adduct)
                            dt <- setFormColOrder(dt)

                            ftable3DFragCount <- ftable3DFragCount + 1
                            ftable3DFrag[[ftable3DFragCount]] <- dt
                        }

                        if (ftable3DFragCount > 0)
                        {
                            ftable3DCount <- ftable3DCount + 1
                            ftable3D[[ftable3DCount]] <- rbindlist(ftable3DFrag[1:ftable3DFragCount])
                        }
                    }
                }

                flist <- list()

                if (ftableCount > 0)
                    flist <- rbindlist(ftable[1:ftableCount])

                if (ftable3DCount > 0)
                {
                    MSMSFlist <- rbindlist(ftable3D[1:ftable3DCount])
                    if (ftableCount > 0)
                        flist <- flist[!neutral_formula %in% MSMSFlist[["neutral_formula"]]]
                    flist <- rbind(flist, MSMSFlist, fill = TRUE)
                }

                saveCacheData("formulasBruker", flist, hash, cacheDB)
            }

            if (!is.null(flist) && length(flist) > 0)
            {
                if (is.null(fTable[[ana]]))
                    fTable[[ana]] <- list()

                fTable[[ana]][[grp]] <- flist
            }

            if ((grpi %% 5) == 0)
                setTxtProgressBar(prog, grpi)
        }

        setTxtProgressBar(prog, gCount)
        close(prog)

        closeSaveDAFile(DA, DAAnaInd, close, save)

        ngrp <- length(fTable[[ana]])
        printf("Loaded %d formulas for %d features (%.2f%%).\n", countUniqueFormulas(fTable[[ana]]),
               ngrp, if (gCount == 0) 0 else ngrp * 100 / gCount)
    }

    if (is.null(cachedSet))
        saveCacheSet("formulasBruker", formHashes[1:formHashCount], setHash, cacheDB)

    fTable <- pruneList(sapply(fTable, function(ft) ft[sapply(ft, nrow) > 0], simplify = FALSE), TRUE)

    if (length(fTable) > 0)
    {
        groupFormulas <- generateGroupFormulasByConsensus(fTable, lapply(ftind, function(x) sum(x > 0)),
                                                          featThreshold, featThresholdAnn, gNames)
        groupFormulas <- setFormulaPLID(groupFormulas, MSPeakLists, absAlignMzDev)
    }
    else
        groupFormulas <- list()

    return(formulas(groupAnnotations = groupFormulas, featureFormulas = fTable, algorithm = "bruker"))
})

#' @template featAnnSets-gen_args
#' @rdname generateFormulasDA
#' @export
setMethod("generateFormulasDA", "featureGroupsSet", function(fGroups, MSPeakLists, precursorMzSearchWindow = 0.002,
                                                             MSMode = "both", adduct = NULL, ..., setThreshold = 0,
                                                             setThresholdAnn = 0, setAvgSpecificScores = FALSE)
{
    generateFormulasSet(fGroups, MSPeakLists, adduct, generateFormulasDA,
                        precursorMzSearchWindow = precursorMzSearchWindow, MSMode = MSMode, ...,
                        setThreshold = setThreshold, setThresholdAnn = setThresholdAnn,
                        setAvgSpecificScores = setAvgSpecificScores)
})

# nocov end
rickhelmus/patRoon documentation built on April 25, 2024, 8:15 a.m.