#' @include feature_groups.R
NULL
#' @rdname featureGroups-class
#' @export
featureGroupsBruker <- setClass("featureGroupsBruker", contains = "featureGroups")
setMethod("initialize", "featureGroupsBruker",
function(.Object, ...) callNextMethod(.Object, algorithm = "bruker_profile", ...))
#' Imports feature groups from Bruker ProfileAnalysis
#'
#' Imports a 'bucket table' produced by Bruker ProfileAnalysis (PA)
#'
#' @templateVar algo Bruker ProfileAnalysis
#' @templateVar generic importFeatureGroups
#' @templateVar algoParam brukerpa
#' @template algo_importer
#'
#' @details The 'bucket table' should be exported as \file{.txt} file. Please note that this function only supports
#' features generated by \code{\link{findFeaturesBruker}} and it is \strong{crucial} that DataAnalysis files remain
#' unchanged when features are collected and the bucket table is generated. Furthermore, please note that PA does not
#' retain information about originating features for generated buckets. For this reason, this function tries to find
#' back the original features and care must be taken to correctly specify search parameters (\code{rtWindow},
#' \code{mzWindow}, \code{intWindow}).
#'
#' @param path The file path to a exported 'bucket table' \file{.txt} file from PA.
#' @param feat The \code{\link{features}} object obtained with \code{\link{findFeaturesBruker}}.
#' @param rtWindow,mzWindow,intWindow Search window values for retention time (seconds), \emph{m/z} (Da) and intensity
#' used to find back features within feature groups from PA (+/- the retention/mass/intensity value of a feature).
#' @param warn Warn about missing or duplicate features when relating them back from grouped features.
#'
#' @inherit importFeatureGroups return
#'
#' @export
importFeatureGroupsBrukerPA <- function(path, feat, rtWindow = 12, mzWindow = 0.005, intWindow = 5, warn = TRUE)
{
ac <- checkmate::makeAssertCollection()
checkmate::assertFileExists(path, "r", add = ac)
checkmate::assertClass(feat, "featuresBruker", add = ac)
aapply(checkmate::assertNumber, . ~ rtWindow + mzWindow + intWindow, lower = 0, finite = TRUE, fixed = list(add = ac))
checkmate::assertFlag(warn, add = ac)
checkmate::reportAssertions(ac)
hash <- makeHash(makeFileHash(path), feat, rtWindow, mzWindow, intWindow)
cachefg <- loadCacheData("featureGroupsPA", hash)
if (!is.null(cachefg))
return(cachefg)
cat("Importing ProfileAnalysis file...")
anaInfo <- analysisInfo(feat)
# disable check.names: keep original names as much as possible
f <- fread(path, check.names = F, sep = "\t", stringsAsFactors = FALSE, header = TRUE)
setnames(f, make.unique(colnames(f))) # still make them unique
# assume first column contains filenames
setnames(f, 1, "file") # name it to make processing easier
# filter & align samples with samples in features and remove file column
featgroups <- f[match(anaInfo$analysis, simplifyAnalysisNames(file), nomatch = 0)][, -"file"]
gInfo <- extractPAGroupInfo(featgroups)
# rename columns: do this after getting group info as that needs the original column names
gNames <- makeFGroupName(seq_len(nrow(gInfo)), gInfo$rts, gInfo$mzs)
setnames(featgroups, gNames)
rownames(gInfo) <- gNames
ret <- featureGroupsBruker(groups=featgroups, analysisInfo = anaInfo, groupInfo = gInfo,
features = feat)
ret@ftindex <- getFeatIndicesFromPA(ret, rtWindow, mzWindow, intWindow, warn)
saveCacheData("featureGroupsPA", ret, hash)
cat("Done!\n")
return(ret)
}
extractPAGroupInfo <- function(groups)
{
# Bucket format is: "<retention time>s : <m/z>m/z"
# Extract data: regex from http://stackoverflow.com/a/19253799 (made middle decimal point mandatory)
groupnames <- colnames(groups)
spltbuckets <- as.numeric(unlist(regmatches(groupnames, gregexpr("[[:digit:]]+\\.+[[:digit:]]*", groupnames))))
rts <- spltbuckets[c(T, F)]
mzs <- spltbuckets[c(F, T)]
return(data.frame(rts = rts, mzs = mzs))
}
# PA doesn't export original feature IDs used for bucketing, try to find them back
getFeatIndicesFromPA <- function(fGroups, rtWindow, mzWindow, intWindow, warn)
{
gTable <- groupTable(fGroups)
gCount <- ncol(gTable)
gInfo <- groupInfo(fGroups)
fTable <- featureTable(fGroups@features)
ret <- copy(gTable)
ret[, (colnames(ret)) := 0]
for (datafile in names(fTable))
{
dfind <- match(datafile, analysisInfo(fGroups)$analysis)
fts <- fTable[[datafile]]
if (is.na(dfind))
next # datafile not (anymore) in this feature group
for (grp in seq_len(gCount))
{
if (gTable[[dfind, grp]] > 0) # check if this file should have this feature
{
# Filter retention time
subf <- fts[numLTE(abs(ret - gInfo$rts[grp]), rtWindow)]
# Filter m/z
subf <- subf[numLTE(abs(mz - gInfo$mzs[grp]), mzWindow)]
# Filter intensity
subf <- subf[numLTE(abs(intensity - gTable[[dfind, grp]]), intWindow)]
if (nrow(subf) > 0)
{
feature <- unique(subf$ID)
if (length(feature) > 1)
{
if (warn)
{
printf("WARNING: Found multiple features within group %s:\n", colnames(gTable)[grp])
print(subf)
}
feature <- feature[1] # just take first
}
ret[dfind, grp] <- fts[, .I[ID == feature]]
}
else if (warn)
printf("WARNING: Couldn't find any feature for feature group %s in file %s!\n", colnames(gTable)[grp], datafile)
}
}
}
return(ret)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.