#' @include main.R
#' @include components.R
#' @include components-set.R
#' @include feature_groups-set.R
NULL
#' Components class for homologous series.
#'
#' This class is derived from \code{\link{components}} and is used to store
#' results from unsupervised homolog detection with the \pkg{nontarget}
#' package.
#'
#' Objects from this class are generated by
#' \code{\link{generateComponentsNontarget}}
#'
#' @slot homol A \code{list} with \code{homol} objects for each replicate group
#' as returned by \code{\link[nontarget]{homol.search}}
#'
#' @references \insertRef{Loos2017}{patRoon} \cr\cr \addCitations{enviPat}{1}
#'
#' @seealso \code{\link{components}} and \code{\link{generateComponents}}
#'
#' @export
componentsNT <- setClass("componentsNT", slots = c(homol = "list"), contains = "components")
setMethod("collapseComponents", "componentsNT", function(obj)
{
obj@components <- lapply(obj@components, function(cmp)
{
keep <- c("rt", "mz", "group", "rGroup")
cmp <- cmp[, keep, with = FALSE]
cmp[, rGroup := paste0(unique(rGroup), collapse = ","), by = "group"]
return(unique(cmp, by = "group"))
})
return(obj)
})
#' @describeIn componentsNT Plots an interactive network graph for linked
#' homologous series (\emph{i.e.} series with (partial) overlap which could
#' not be merged). The resulting graph can be browsed interactively and allows
#' quick inspection of series which may be related. The graph is constructed
#' with the \pkg{\link{igraph}} package and rendered with
#' \pkg{\link{visNetwork}}.
#'
#' @param obj The \code{componentsNT} object to plot.
#' @param onlyLinked If \code{TRUE} then only components with links are plotted.
#'
#' @template plotGraph
#'
#' @references \addCitations{igraph}{2}
#'
#' @export
setMethod("plotGraph", "componentsNT", function(obj, onlyLinked = TRUE, width = NULL, height = NULL)
{
checkmate::assertFlag(onlyLinked)
if (length(obj) == 0)
stop("No components to plot", call. = FALSE)
cInfo <- componentInfo(obj)
cTable <- componentTable(obj)
hsFGroups <- sapply(cTable, function(cmp) paste0(cmp$group, collapse = ", "))
hsRGroups <- sapply(cTable, function(cmp) paste0(unique(cmp$rGroup), collapse = ", "))
titles <- sprintf("<b>%s</b> (RT: %.2f; m/z: %.4f; #%d)<br>fGroups: <i>%s</i><br>rGroups: <i>%s</i>",
names(obj), cInfo$ret_increment, cInfo$mz_increment, cInfo$size, hsFGroups, hsRGroups)
makeGraph(obj, onlyLinked, titles, width, height)
})
#' Componentization of homologous series with nontarget
#'
#' Uses \href{https://cran.r-project.org/web/packages/nontarget/index.html}{the nontarget R package} to generate
#' components by unsupervised detection of homologous series.
#'
#' @templateVar algo nontarget
#' @templateVar do generate components
#' @templateVar generic generateComponents
#' @templateVar algoParam nontarget
#' @template algo_generator
#'
#' @details In the first step the \code{\link[nontarget]{homol.search}} function is used to detect all homologous series
#' within each replicate group (analyses within each replicate group are averaged prior to detection). Then,
#' homologous series across replicate groups are merged in case of full overlap or when merging of partial overlapping
#' series causes no conflicts.
#'
#' @param rtRange A numeric vector containing the minimum and maximum retention time (in seconds) between homologues.
#' Series are always considered from low to high \emph{m/z}, thus, a negative minimum retention time allows detection
#' of homologous series with increasing \emph{m/z} and decreasing retention times. These values set the \code{minrt}
#' and \code{maxrt} arguments of \code{\link[nontarget]{homol.search}}.
#' @param mzRange A numeric vector specifying the minimum and maximum \emph{m/z} increment of a homologous series. Sets
#' the \code{minmz} and \code{maxmz} arguments of \code{\link[nontarget]{homol.search}}.
#' @param elements A character vector with elements to be considered for detection of repeating units. Sets the
#' \code{elements} argument of \code{\link[nontarget]{homol.search}} function.
#' @param rtDev Maximum retention time deviation. Sets the \code{rttol} to \code{\link[nontarget]{homol.search}}.
#' @param absMzDevLink Maximum absolute \emph{m/z} deviation when linking series. This should usually be a bit higher
#' than \code{absMzDev} to ensure proper linkage.
#' @param traceHack Currently \code{\link[nontarget]{homol.search}} does not work with \R \samp{>3.3.3}. This flag, which is
#' enabled by default on these R versions, implements a (messy) workaround
#' (\href{https://github.com/blosloos/nontarget/issues/6}{more details here}).
#'
#' @templateVar ion TRUE
#' @templateVar absMzDev \code{mztol} argument to \code{\link[nontarget]{homol.search}}
#' @templateVar myDots Any further arguments passed to \code{\link[nontarget]{homol.search}}.
#' @template compon_algo-args
#'
#' @inheritParams generateComponents
#'
#' @return The generated comnponents are returned as an object from the \code{\link{componentsNT}} class.
#'
#' @templateVar class componentsNTSet
#' @template compon_gen-sets-merged
#'
#' @section Sets workflows: The output class supports additional methods such as \code{plotGraph}.
#'
#' @references \insertRef{Loos2017}{patRoon} \cr\cr \addCitations{enviPat}{1}
#'
#' @templateVar what generateComponentsNontarget
#' @template main-rd-method
#' @export
setMethod("generateComponentsNontarget", "featureGroups", function(fGroups, ionization = NULL, rtRange = c(-120, 120),
mzRange = c(5, 120), elements = c("C", "H", "O"),
rtDev = 30, absMzDev = 0.002,
absMzDevLink = absMzDev * 2,
traceHack = all(R.Version()[c("major", "minor")] >= c(3, 4)),
...)
{
checkPackage("nontarget", "https://github.com/blosloos/nontarget")
ac <- checkmate::makeAssertCollection()
checkmate::assertClass(fGroups, "featureGroups", add = ac)
ionization <- checkAndGetIonization(ionization, fGroups, add = ac)
checkmate::assertNumeric(rtRange, finite = TRUE, any.missing = FALSE, len = 2, add = ac)
checkmate::assertNumeric(mzRange, lower = 0, finite = TRUE, any.missing = FALSE, len = 2, add = ac)
checkmate::assertCharacter(elements, min.chars = 1, any.missing = FALSE, min.len = 1, add = ac)
aapply(checkmate::assertNumber, . ~ rtDev + absMzDev + absMzDevLink, lower = 0, finite = TRUE, fixed = list(add = ac))
checkmate::assertFlag(traceHack, add = ac)
checkmate::reportAssertions(ac)
if (length(fGroups) == 0)
return(componentsNT(homol = list(), componentInfo = data.table(), components = list(),
algorithm = "nontarget"))
extraArgs <- list(...)
hash <- makeHash(fGroups, ionization, rtRange, mzRange, elements, rtDev, absMzDev, absMzDevLink, extraArgs)
cd <- loadCacheData("componentsNontarget", hash)
if (!is.null(cd))
return(cd)
gTable <- groupTable(fGroups)
gInfo <- groupInfo(fGroups)
anaInfo <- analysisInfo(fGroups)
utils::data(isotopes, package = "enviPat")
# adduct.search() and homol.search() call stop() when nothing is found (arg!) --> use tryCatch
# For now just stick with homologues as there is no easy way to do adduct/isotopes over multiple analyses
# find homologous series for each replicate group
rGroups <- unique(anaInfo$group)
groupTablesRG <- sapply(rGroups, function(rg)
{
fGrpRep <- replicateGroupFilter(fGroups, rg, verbose = FALSE)
return(if (length(fGrpRep) == 0) NULL else as.data.table(fGrpRep, average = TRUE))
}, simplify = FALSE)
homArgs <- c(list(isotopes = isotopes, elements = elements, minmz = mzRange[1],
maxmz = mzRange[2], minrt = rtRange[1], maxrt = rtRange[2], ppm = FALSE,
mztol = absMzDev, rttol = rtDev),
extraArgs)
homList <- sapply(rGroups, function(rg)
{
gt <- groupTablesRG[[rg]][, c("mz", rg, "ret"), with = FALSE] # convert to nontarget peaklist format
if (is.null(gt))
return(NULL) # rep group doesn't have feature groups
if (traceHack)
suppressMessages(trace(nontarget::homol.search, function() {})) # put in dummy trace
# UNDONE: find some way to differentiate between actual errors?
hom <- tryCatch(do.call(nontarget::homol.search, c(list(gt), homArgs)),
error = function(e) FALSE)
if (traceHack)
suppressMessages(untrace(nontarget::homol.search))
if (is.logical(hom))
return(NULL) # no results
return(hom)
}, simplify = FALSE)
homList <- homList[!sapply(homList, is.null)]
compTab <- rbindlist(mapply(homList, names(homList), SIMPLIFY = FALSE, FUN = function(hom, rg)
{
stopifnot(identical(hom[[3]][["HS IDs"]], hom[[3]][["HS cluster"]])) # what is the difference!?
homTab <- as.data.table(hom[[3]])
gNames <- groupTablesRG[[rg]]$group
# peak IDs are now numeric indices of a subset of the original feature
# groups. Replace them by group names to allow easy comparison.
homTab[, groups := list(sapply(`peak IDs`, function(pids)
{
ginds <- as.integer(unlist(strsplit(pids, ",")))
ginds <- ginds[order(gInfo[gNames[ginds], "mzs"])] # ensure they are from low to high m/z
return(list(gNames[ginds]))
}, simplify = FALSE))]
homTab[, c("HS IDs", "peak IDs") := NULL]
homTab[, (rg) := list(groups)]
return(homTab)
}), fill = TRUE)
if (nrow(compTab) == 0)
return(componentsNT(componentInfo = data.table(), components = list(), algorithm = "nontarget"))
# check which series should be linked
compTab[, links := list(list(integer()))]
for (r in seq_len(nrow(compTab)))
{
series <- compTab[["groups"]][[r]][[1]]
links <- integer(0)
for (ro in seq_len(nrow(compTab)))
{
if (r == ro)
next
if (abs(compTab[["m/z increment"]][r] - compTab[["m/z increment"]][ro]) > absMzDev ||
abs(compTab[["RT increment"]][r] - compTab[["RT increment"]][ro]) > rtDev)
next # different series
otherSeries <- compTab[["groups"]][[ro]][[1]]
if (sum(series %in% otherSeries) < 1) # UNDONE: minimum overlap?
next
# check for conflicts: groups that are not present in both but with
# m/z values close or equal to those groups that are present
missingG <- setdiff(series, otherSeries)
missingOtherG <- setdiff(otherSeries, series)
mzSame <- function(mz1, mz2) numLTE(abs(mz1 - mz2), absMzDevLink)
if (any(sapply(missingG, function(mg) any(mzSame(gInfo[mg, "mzs"], gInfo[otherSeries, "mzs"])))) ||
any(sapply(missingOtherG, function(mg) any(mzSame(gInfo[mg, "mzs"], gInfo[series, "mzs"])))))
next
links <- c(links, ro)
}
set(compTab, r, "links", list(list(links)))
}
presentRGroups <- rGroups[sapply(rGroups, function(rg) !is.null(compTab[[rg]]))]
# merge any pairs of series that are just pointing to each other: they are
# either exactly the same or otherwise subsets of each other that can be
# merged without conflicts
compTab[, keep := TRUE]
for (r in seq_len(nrow(compTab)))
{
if (compTab[["keep"]][r] == FALSE)
next # already merged
links <- compTab[["links"]][[r]]
for (other in links)
{
otherLinks <- compTab[["links"]][[other]]
# linkage is equal?
if (length(links) == length(otherLinks) && all(otherLinks == r | otherLinks %in% links))
{
# merge groups
mGroups <- union(compTab[["groups"]][[r]][[1]], compTab[["groups"]][[other]][[1]])
mGroups <- mGroups[order(gInfo[mGroups, "mzs"])] # make sure order stays correct
set(compTab, r, "groups", list(list(list(mGroups))))
# mark presence
lc <- compTab[other, presentRGroups, with = FALSE]
for (rg in presentRGroups)
{
if (!is.null(lc[[rg]][[1]]))
set(compTab, r, rg, list(list(lc[[rg]][[1]]))) # need to rewrap it in a list?
}
# update statistics
indBoth <- c(r, other)
set(compTab, r, "m/z increment", mean(compTab[["m/z increment"]][indBoth]))
set(compTab, r, "RT increment", mean(compTab[["RT increment"]][indBoth]))
set(compTab, r, "min. RT in series", min(compTab[["min. RT in series"]][indBoth]))
set(compTab, r, "max. RT in series", max(compTab[["max. RT in series"]][indBoth]))
set(compTab, r, "max.-min. RT", compTab[["max. RT in series"]][r] - compTab[["min. RT in series"]][r])
set(compTab, other, "keep", FALSE) # remove other
set(compTab, r, "links", list(list(setdiff(links, other)))) # unlink
}
}
}
# clearout merged series
compTab[, IDs := seq_len(.N)] # store current row order
compTab <- compTab[keep == TRUE]
# update links
for (r in seq_len(nrow(compTab)))
{
links <- compTab[["links"]][[r]]
if (length(links) > 0)
{
# no sapply: this may return integer(0) for removed links and therefore make it a list
# --> use lapply and convert from list afterwards
newLinks <- lapply(links, function(l) compTab[IDs == l, which = TRUE])
newLinks <- newLinks[lengths(newLinks) > 0] # length will be zero if linked series was removed
newLinks <- unlist(newLinks)
if (is.null(newLinks)) # no links anymore?
newLinks <- integer()
set(compTab, r, "links", list(list(newLinks)))
}
}
compTab[, c("keep", "IDs") := NULL]
# split all rows in list with tables containing groups per row
comps <- lapply(seq_len(nrow(compTab)), function(cmpi)
{
allGroups <- compTab[["groups"]][[cmpi]][[1]]
homSeries <- seq_along(allGroups)
ret <- rbindlist(lapply(presentRGroups, function(rg)
{
grp <- compTab[[rg]][[cmpi]][[1]]
if (!is.null(grp))
return(data.table(rt = gInfo[grp, "rts"], mz = gInfo[grp, "mzs"], group = grp,
hsnr = match(grp, allGroups), rGroup = rg,
intensity = groupTablesRG[[rg]][group %in% grp, get(rg)]))
return(NULL)
}), fill = TRUE)
setorderv(ret, c("hsnr"))
})
cNames <- paste0("CMP", seq_along(comps))
names(comps) <- cNames
cInfo <- copy(compTab)
cInfo[, c("groups", "HS cluster") := NULL]
setnames(cInfo,
c("m/z increment", "RT increment", "min. RT in series", "max. RT in series", "max.-min. RT"),
c("mz_increment", "ret_increment", "ret_min", "ret_max", "ret_range"))
cInfo[, name := names(comps)]
cInfo[, size := sapply(comps, nrow)]
# convert from fgroup lists to logical presence
for (rg in presentRGroups)
set(cInfo, j = rg, value = !sapply(cInfo[[rg]], is.null))
# Convert numeric links to component names. This also keeps links alive
# after subsetting
cInfo[, links := lapply(links, function(l) cNames[l])]
ret <- componentsNT(homol = homList, componentInfo = cInfo, components = comps,
algorithm = "nontarget")
saveCacheData("componentsNontarget", ret, hash)
return(ret)
})
#' @rdname generateComponentsNontarget
#' @export
setMethod("generateComponentsNontarget", "featureGroupsSet", function(fGroups, ionization = NULL, ...)
{
return(generateComponentsSet(fGroups, ionization, generateComponentsNontarget, setIonization = TRUE, ...,
classGenerator = componentsNTSet))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.