R/feature_groups-bruker.R

Defines functions getFeatIndicesFromPA extractPAGroupInfo importFeatureGroupsBrukerPA

Documented in importFeatureGroupsBrukerPA

#' @include feature_groups.R
NULL

#' @rdname featureGroups-class
#' @export
featureGroupsBruker <- setClass("featureGroupsBruker", contains = "featureGroups")

setMethod("initialize", "featureGroupsBruker",
          function(.Object, ...) callNextMethod(.Object, algorithm = "bruker_profile", ...))


#' Imports feature groups from Bruker ProfileAnalysis
#'
#' Imports a 'bucket table' produced by Bruker ProfileAnalysis (PA)
#'
#' @templateVar algo Bruker ProfileAnalysis
#' @templateVar generic importFeatureGroups
#' @templateVar algoParam brukerpa
#' @template algo_importer
#'
#' @details The 'bucket table' should be exported as \file{.txt} file. Please note that this function only supports
#'   features generated by \code{\link{findFeaturesBruker}} and it is \strong{crucial} that DataAnalysis files remain
#'   unchanged when features are collected and the bucket table is generated. Furthermore, please note that PA does not
#'   retain information about originating features for generated buckets. For this reason, this function tries to find
#'   back the original features and care must be taken to correctly specify search parameters (\code{rtWindow},
#'   \code{mzWindow}, \code{intWindow}).
#'
#' @param path The file path to a exported 'bucket table' \file{.txt} file from PA.
#' @param feat The \code{\link{features}} object obtained with \code{\link{findFeaturesBruker}}.
#' @param rtWindow,mzWindow,intWindow Search window values for retention time (seconds), \emph{m/z} (Da) and intensity
#'   used to find back features within feature groups from PA (+/- the retention/mass/intensity value of a feature).
#' @param warn Warn about missing or duplicate features when relating them back from grouped features.
#'
#' @inherit importFeatureGroups return
#'
#' @export
importFeatureGroupsBrukerPA <- function(path, feat, rtWindow = 12, mzWindow = 0.005, intWindow = 5, warn = TRUE)
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertFileExists(path, "r", add = ac)
    checkmate::assertClass(feat, "featuresBruker", add = ac)
    aapply(checkmate::assertNumber, . ~ rtWindow + mzWindow + intWindow, lower = 0, finite = TRUE, fixed = list(add = ac))
    checkmate::assertFlag(warn, add = ac)
    checkmate::reportAssertions(ac)

    hash <- makeHash(makeFileHash(path), feat, rtWindow, mzWindow, intWindow)
    cachefg <- loadCacheData("featureGroupsPA", hash)
    if (!is.null(cachefg))
        return(cachefg)

    cat("Importing ProfileAnalysis file...")

    anaInfo <- analysisInfo(feat)

    # disable check.names: keep original names as much as possible
    f <- fread(path, check.names = F, sep = "\t", stringsAsFactors = FALSE, header = TRUE)
    setnames(f, make.unique(colnames(f))) # still make them unique

    # assume first column contains filenames
    setnames(f, 1, "file") # name it to make processing easier

    # filter & align samples with samples in features and remove file column
    featgroups <- f[match(anaInfo$analysis, simplifyAnalysisNames(file), nomatch = 0)][, -"file"]

    gInfo <- extractPAGroupInfo(featgroups)

    # rename columns: do this after getting group info as that needs the original column names
    gNames <- makeFGroupName(seq_len(nrow(gInfo)), gInfo$rts, gInfo$mzs)
    setnames(featgroups, gNames)
    rownames(gInfo) <- gNames

    ret <- featureGroupsBruker(groups=featgroups, analysisInfo = anaInfo, groupInfo = gInfo,
                               features = feat)
    ret@ftindex <- getFeatIndicesFromPA(ret, rtWindow, mzWindow, intWindow, warn)

    saveCacheData("featureGroupsPA", ret, hash)

    cat("Done!\n")
    return(ret)
}

extractPAGroupInfo <- function(groups)
{
    # Bucket format is: "<retention time>s : <m/z>m/z"
    # Extract data: regex from http://stackoverflow.com/a/19253799 (made middle decimal point mandatory)

    groupnames <- colnames(groups)
    spltbuckets <- as.numeric(unlist(regmatches(groupnames, gregexpr("[[:digit:]]+\\.+[[:digit:]]*", groupnames))))
    rts <- spltbuckets[c(T, F)]
    mzs <- spltbuckets[c(F, T)]

    return(data.frame(rts = rts, mzs = mzs))
}

# PA doesn't export original feature IDs used for bucketing, try to find them back
getFeatIndicesFromPA <- function(fGroups, rtWindow, mzWindow, intWindow, warn)
{
    gTable <- groupTable(fGroups)
    gCount <- ncol(gTable)
    gInfo <- groupInfo(fGroups)
    fTable <- featureTable(fGroups@features)

    ret <- copy(gTable)
    ret[, (colnames(ret)) := 0]

    for (datafile in names(fTable))
    {
        dfind <- match(datafile, analysisInfo(fGroups)$analysis)
        fts <- fTable[[datafile]]

        if (is.na(dfind))
            next # datafile not (anymore) in this feature group

        for (grp in seq_len(gCount))
        {
            if (gTable[[dfind, grp]] > 0) # check if this file should have this feature
            {
                # Filter retention time
                subf <- fts[numLTE(abs(ret - gInfo$rts[grp]), rtWindow)]

                # Filter m/z
                subf <- subf[numLTE(abs(mz - gInfo$mzs[grp]), mzWindow)]

                # Filter intensity
                subf <- subf[numLTE(abs(intensity - gTable[[dfind, grp]]), intWindow)]

                if (nrow(subf) > 0)
                {
                    feature <- unique(subf$ID)
                    if (length(feature) > 1)
                    {
                        if (warn)
                        {
                            printf("WARNING: Found multiple features within group %s:\n", colnames(gTable)[grp])
                            print(subf)
                        }

                        feature <- feature[1] # just take first
                    }

                    ret[dfind, grp] <- fts[, .I[ID == feature]]
                }
                else if (warn)
                    printf("WARNING: Couldn't find any feature for feature group %s in file %s!\n", colnames(gTable)[grp], datafile)
            }
        }
    }

    return(ret)
}
rickhelmus/patRoon documentation built on April 25, 2024, 8:15 a.m.