#' @include main.R
#' @include workflow-step.R
NULL
#' Base feature annotations class
#'
#' Holds information for all feature group annotations.
#'
#' This class stores annotation data for feature groups, such as molecular formulae, SMILES identifiers, compound names
#' etc. The class of objects that are generated by formula and compound annotation (\code{\link{generateFormulas}} and
#' \code{\link{generateCompounds}}) are based on this class.
#'
#' @param obj,x \code{featureAnnotations} object to be accessed
#' @param \dots For the \code{"["} operator: ignored.
#'
#' For \code{delete}: passed to the function specified as \code{j}.
#'
#' Others: Any further (and unique) \code{featureAnnotations} objects.
#' @param OM For \code{as.data.table}: if set to \code{TRUE} several columns with information relevant for organic
#' matter (OM) characterization will be added (e.g. elemental ratios, classification). This will also make sure that
#' \code{countElements} contains at least C, H, N, O, P and S.
#'
#' For \code{filter}: If \code{TRUE} then several filters are applied to exclude unlikely formula candidates present
#' in organic matter (OM). See Source section for details.
#' @param labels A \code{character} with names to use for labelling. If \code{NULL} labels are automatically generated.
#'
#' @seealso \code{\link{formulas-class}} and \code{\link{compounds-class}}
#'
#' @slot groupAnnotations A \code{list} with for each annotated feature group a \code{data.table} with annotation data.
#' Use the \code{annotations} method for access.
#' @slot scoreTypes A \code{character} with all the score types present in this object.
#' @slot scoreRanges The minimum and maximum score values of all candidates for each feature group. Used for
#' normalization.
#'
#' @templateVar seli feature groups
#' @templateVar selOrderi groupNames()
#' @templateVar del TRUE
#' @templateVar deli feature groups
#' @templateVar delj candidates
#' @templateVar deljtype numeric indices (rows)
#' @templateVar delfwhat feature group
#' @templateVar delfa the annotation table (a \code{data.table}), the feature group name
#' @templateVar delfr the candidate indices (rows) to be removed (specified as an \code{integer} or \code{logical} vector)
#' @templateVar dollarOpName feature group
#' @template sub_sel_del-args
#'
#' @templateVar normParam normalizeScores
#' @templateVar excludeParam excludeNormScores
#' @template norm-args
#'
#' @section Source: Calculation of the aromaticity index (AI) and related double bond equivalents (DBE_AI) is performed
#' as described in Koch 2015. Formula classification is performed by the rules described in Abdulla 2013. Filtering of
#' OM related molecules is performed as described in Koch 2006 and Kujawinski 2006. (see references).
#'
#' @references \insertRef{Koch2015}{patRoon} \cr\cr \insertRef{Abdulla2013}{patRoon} \cr\cr
#' \insertRef{Koch2006}{patRoon} \cr\cr \insertRef{Kujawinski2006}{patRoon}
#'
#' @seealso The derived \code{\link{formulas}} and \code{\link{compounds}} classes.
#'
#' @templateVar class featureAnnotations
#' @template class-hierarchy
#'
#' @export
featureAnnotations <- setClass("featureAnnotations",
slots = c(groupAnnotations = "list", scoreTypes = "character", scoreRanges = "list"),
contains = c("workflowStep", "VIRTUAL"))
setMethod("initialize", "featureAnnotations", function(.Object, ...)
{
.Object <- callNextMethod(.Object, ...)
.Object@groupAnnotations <- makeEmptyListNamed(.Object@groupAnnotations)
return(.Object)
})
setMethod("groupNamesResults", "featureAnnotations", function(obj) groupNames(obj))
#' @describeIn featureAnnotations Accessor for the \code{groupAnnotations} slot.
#' @export
setMethod("annotations", "featureAnnotations", function(obj) obj@groupAnnotations)
#' @templateVar class featureAnnotations
#' @templateVar what feature groups
#' @template strmethod
#' @export
setMethod("groupNames", "featureAnnotations", function(obj) names(obj@groupAnnotations))
#' @describeIn featureAnnotations Obtain total number of candidates.
#' @export
setMethod("length", "featureAnnotations", function(x) if (length(x@groupAnnotations) > 0) sum(sapply(x@groupAnnotations, nrow)) else 0)
#' @describeIn featureAnnotations Subset on feature groups.
#' @export
setMethod("[", c("featureAnnotations", "ANY", "missing", "missing"), function(x, i, ...)
{
if (!missing(i))
{
i <- assertSubsetArgAndToChr(i, groupNames(x))
x <- delete(x, setdiff(groupNames(x), i))
}
return(x)
})
#' @describeIn featureAnnotations Extracts annotation data for a feature group.
#' @export
setMethod("[[", c("featureAnnotations", "ANY", "missing"), function(x, i, j)
{
assertExtractArg(i)
return(x@groupAnnotations[[i]])
})
#' @describeIn featureAnnotations Extracts annotation data for a feature group.
#' @export
setMethod("$", "featureAnnotations", function(x, name)
{
eval(substitute(x@groupAnnotations$NAME_ARG, list(NAME_ARG = name)))
})
#' @describeIn featureAnnotations Generates a table with all annotation data for each feature group and other
#' information such as element counts.
#'
#' @param fragments If \code{TRUE} then information on annotated fragments will be included. Automatically set to
#' \code{TRUE} if \code{countFragElements} is set.
#' @param countElements,countFragElements A \code{character} vector with elements that should be counted for each
#' candidate's formula. For instance, \code{c("C", "H")} adds columns for both carbon and hydrogen amounts of each
#' formula. Note that the neutral formula (\code{neutral_formula} column) is used to count elements of non-fragmented
#' formulae, whereas the charged formula of fragments (\code{ion_formula} column in \code{fragInfo} data) is used for
#' fragments. Set to \code{NULL} to not count any elements.
#'
#' @template as_data_table-args
#'
#' @return \code{as.data.table} returns a \code{\link{data.table}}.
#'
#' @export
setMethod("as.data.table", "featureAnnotations", function(x, fGroups = NULL, fragments = FALSE, countElements = NULL,
countFragElements = NULL, OM = FALSE,
normalizeScores = "none",
excludeNormScores = defaultExclNormScores(x))
{
# NOTE: keep args in sync with formulas/compounds methods
ac <- checkmate::makeAssertCollection()
checkmate::assertClass(fGroups, "featureGroups", null.ok = TRUE, add = ac)
checkmate::assertCharacter(countElements, min.chars = 1, any.missing = FALSE, null.ok = TRUE, add = ac)
checkmate::assertCharacter(countFragElements, min.chars = 1, any.missing = FALSE, null.ok = TRUE, add = ac)
checkmate::assertFlag(OM, add = ac)
checkmate::assertFlag(fragments, add = ac)
assertNormalizationMethod(normalizeScores, add = ac)
checkmate::assertCharacter(excludeNormScores, min.chars = 1, null.ok = TRUE, add = ac)
checkmate::reportAssertions(ac)
if (!is.null(countFragElements))
fragments <- TRUE
mcn <- mergedConsensusNames(x)
annTable <- annotations(x)
if (normalizeScores != "none")
{
annTable <- Map(annTable, x@scoreRanges, f = normalizeAnnScores,
MoreArgs = list(scoreCols = annScoreNames(x, TRUE), mConsNames = mcn, normalizeScores == "minmax",
exclude = excludeNormScores))
}
if (fragments)
{
ret <- rbindlist(lapply(annTable, function(ct)
{
ct <- copy(ct)
ct[, row := seq_len(nrow(ct))]
fragTab <- rbindlist(ct$fragInfo, idcol = "row", fill = TRUE)
fragTab[, PLID := NULL]
cnames <- setdiff(names(fragTab), "row")
setnames(fragTab, cnames, paste0("frag_", cnames))
return(merge(ct, fragTab, by = "row", all.x = TRUE)[, -"row"])
}), idcol = "group", fill = TRUE)
}
else
ret <- rbindlist(annTable, idcol = "group", fill = TRUE)
if (!is.null(fGroups))
{
ret[, c("ret", "group_mz") := groupInfo(fGroups)[group, c("rts", "mzs")]]
setcolorder(ret, c("group", "ret", "group_mz"))
}
ret <- addElementInfoToAnnTable(ret, countElements, countFragElements, OM, TRUE)
if (!is.null(ret[["fragInfo"]]))
return(ret[, -"fragInfo"]) # not there if empty results
return(ret)
})
#' @templateVar where featureAnnotations
#' @templateVar what annotations
#' @template delete
#' @export
setMethod("delete", "featureAnnotations", function(obj, i = NULL, j = NULL, ...)
{
# NOTE: this is ~ a c/p from the features method
ac <- checkmate::makeAssertCollection()
i <- assertDeleteArgAndToChr(i, groupNames(obj), add = ac)
checkmate::assert(
checkmate::checkIntegerish(j, any.missing = FALSE, null.ok = TRUE),
checkmate::checkFunction(j, null.ok = TRUE),
.var.name = "j"
)
checkmate::reportAssertions(ac)
if (length(i) == 0 || (!is.null(j) && length(j) == 0))
return(obj) # nothing to remove...
# UNDONE: NULL for i and j will remove all?
# i = NULL; j = vector: remove from all groups
# i = vector; j = NULL: remove specified groups
# j = function: remove specific results from given groups (or all if i=NULL)
if (!is.function(j))
{
if (is.null(j))
obj@groupAnnotations <- obj@groupAnnotations[setdiff(groupNames(obj), i)]
else
{
obj@groupAnnotations[i] <- lapply(obj@groupAnnotations[i], function(at)
{
inds <- j[j <= nrow(at)]
return(if (length(inds) > 0) at[-inds] else at)
})
}
}
else
{
obj@groupAnnotations[i] <- Map(obj@groupAnnotations[i], i, f = function(at, grp)
{
rm <- j(at, grp, ...)
if (is.logical(rm))
return(at[!rm])
return(at[setdiff(seq_len(nrow(at)), rm)])
})
}
obj@groupAnnotations <- pruneList(obj@groupAnnotations, checkZeroRows = TRUE)
obj@scoreRanges <- obj@scoreRanges[names(obj@groupAnnotations)]
return(obj)
})
#' @describeIn featureAnnotations Provides rule based filtering for feature group annotations. Useful to eliminate
#' unlikely candidates and speed up further processing.
#'
#' @param minExplainedPeaks Minimum number of explained peaks. Set to \code{NULL} to ignore.
#' @param scoreLimits Filter results by their scores. Should be a named \code{list} that contains two-sized numeric
#' vectors with the minimum/maximum value of a score (use \code{-Inf}/\code{Inf} for no limits). The names of each
#' element should follow the name column of the table returned by \code{\link{formulaScorings}$name} and
#' \code{\link{compoundScorings}()$name}. For instance, \code{scoreLimits=list(numberPatents=c(10, Inf))} specifies
#' that \code{numberPatents} should be at least \samp{10}. Note that a result without a specified scoring is never
#' removed. If a score term exists multiple times, \emph{i.e.} due to a consensus, then a candidate is kept if at
#' least one of the terms falls within the range. Set to \code{NULL} to skip this filter.
#' @param topMost Only keep a maximum of \code{topMost} candidates with highest score (or least highest if
#' \code{negate=TRUE}). Set to \code{NULL} to ignore.
#' @param negate If \code{TRUE} then filters are applied in opposite manner.
#'
#' @template element-args
#'
#' @return \code{filter} returns a filtered \code{featureAnnotations} object.
#'
#' @export
setMethod("filter", "featureAnnotations", function(obj, minExplainedPeaks = NULL, scoreLimits = NULL, elements = NULL,
fragElements = NULL, lossElements = NULL, topMost = NULL, OM = FALSE,
negate = FALSE)
{
ac <- checkmate::makeAssertCollection()
aapply(checkmate::assertCount, . ~ minExplainedPeaks + topMost, positive = c(FALSE, TRUE),
null.ok = TRUE, fixed = list(add = ac))
assertScoreRange(scoreLimits, NULL, add = ac)
aapply(checkmate::assertCharacter, . ~ elements + fragElements + lossElements,
min.chars = 1, min.len = 1, null.ok = TRUE, fixed = list(add = ac))
aapply(checkmate::assertFlag, . ~ OM + negate, fixed = list(add = ac))
checkmate::reportAssertions(ac)
cat("Filtering annotations... ")
mConsNames <- mergedConsensusNames(obj)
if (!is.null(minExplainedPeaks))
scoreLimits <- modifyList(if (is.null(scoreLimits)) list() else scoreLimits,
list(explainedPeaks = c(minExplainedPeaks, Inf)))
mark <- if (negate) function(x) !x else function(x) x
oldn <- length(obj)
obj <- delete(obj, j = function(annTable, ...)
{
annTable <- copy(annTable)
annTable[, keep := TRUE]
if (!is.null(scoreLimits))
{
for (sc in names(scoreLimits))
{
cols <- getAllMergedConsCols(sc, names(annTable), mConsNames)
if (length(cols) == 0)
next
annTable[keep == TRUE, keep :=
mark(do.call(pmax, c(.SD, list(na.rm = TRUE))) >= scoreLimits[[sc]][1] &
do.call(pmin, c(.SD, list(na.rm = TRUE))) <= scoreLimits[[sc]][2]),
.SDcols = cols]
}
}
if (!is.null(elements))
annTable[keep == TRUE, keep := sapply(neutral_formula, checkFormula, elements, negate)]
if (!is.null(fragElements) || !is.null(lossElements))
{
annTable[keep == TRUE, keep := sapply(fragInfo, function(fi)
{
if (nrow(fi) == 0)
return(FALSE)
if (!is.null(fragElements) && !any(sapply(fi$ion_formula, checkFormula, fragElements, negate)))
return(FALSE)
if (!is.null(lossElements) && !any(sapply(fi$neutral_loss, checkFormula, lossElements, negate)))
return(FALSE)
return(TRUE)
})]
}
if (OM)
{
annTable <- addElementInfoToAnnTable(annTable, NULL, NULL, OM = TRUE, classify = FALSE)
annTable[keep == TRUE, keep := mark(
# rules from Kujawinski & Behn, 2006 (10.1021/ac0600306)
H >= 1/3 * C &
H <= ((2 * C) + N + 2) &
(H + N) %% 2 == 0 &
N <= C &
O <= C &
P <= 2 &
S <= 2 &
# rules from Koch & dittmar 2006 (10.1002/rcm.2386)
sapply(DBE_AI, checkmate::checkInt) &
HC <= 2.2 &
OC <= 1.2 &
NC <= 0.5)]
}
return(!annTable$keep)
})
if (!is.null(topMost))
{
if (negate)
obj <- delete(obj, j = function(at, ...) seq_len(nrow(at)) <= (nrow(at) - topMost))
else
obj <- delete(obj, j = function(at, ...) seq_len(nrow(at)) > topMost)
}
newn <- length(obj)
printf("Done! Filtered %d (%.2f%%) annotations. Remaining: %d\n", oldn - newn, if (oldn == 0) 0 else (1-(newn/oldn))*100, newn)
return(obj)
})
#' @describeIn featureAnnotations plots a Venn diagram (using \pkg{\link{VennDiagram}}) outlining unique and shared
#' candidates of up to five different \code{featureAnnotations} objects.
#'
#' @param vennArgs A \code{list} with further arguments passed to \pkg{VennDiagram} plotting functions. Set to
#' \code{NULL} to ignore.
#'
#' @template plotvenn-ret
#'
#' @export
setMethod("plotVenn", "featureAnnotations", function(obj, ..., labels = NULL, vennArgs = NULL)
{
allFeatAnnotations <- c(list(obj), list(...))
ac <- checkmate::makeAssertCollection()
checkmate::assertList(allFeatAnnotations, types = "featureAnnotations", min.len = 2, any.missing = FALSE,
unique = TRUE, .var.name = "...", add = ac)
checkmate::assertCharacter(labels, min.chars = 1, len = length(allFeatAnnotations), null.ok = TRUE, add = ac)
checkmate::assertList(vennArgs, names = "unique", null.ok = TRUE, add = ac)
checkmate::reportAssertions(ac)
if (is.null(labels))
labels <- make.unique(sapply(allFeatAnnotations, algorithm))
if (is.null(vennArgs))
vennArgs <- list()
allAnnTabs <- lapply(allFeatAnnotations, as.data.table)
do.call(makeVennPlot, c(list(allAnnTabs, labels, lengths(allFeatAnnotations), function(obj1, obj2)
{
if (length(obj1) == 0 || length(obj2) == 0)
return(data.table())
fintersect(obj1[, c("group", "UID")], obj2[, c("group", "UID")])
}, nrow), vennArgs))
})
#' @describeIn featureAnnotations plots an UpSet diagram (using the \code{\link[UpSetR]{upset}} function) outlining
#' unique and shared candidates between different \code{featureAnnotations} objects.
#' @templateVar withArgs TRUE
#' @template plotUpSet
#' @export
setMethod("plotUpSet", "featureAnnotations", function(obj, ..., labels = NULL, nsets = length(list(...)) + 1,
nintersects = NA, upsetArgs = NULL)
{
allFeatAnnotations <- c(list(obj), list(...))
ac <- checkmate::makeAssertCollection()
checkmate::assertList(allFeatAnnotations, types = "featureAnnotations", min.len = 2, any.missing = FALSE,
unique = TRUE, .var.name = "...", add = ac)
checkmate::assertCharacter(labels, min.chars = 1, len = length(allFeatAnnotations), null.ok = TRUE, add = ac)
checkmate::assertList(upsetArgs, names = "unique", null.ok = TRUE, add = ac)
checkmate::assertCount(nsets, positive = TRUE)
checkmate::assertCount(nintersects, positive = TRUE, na.ok = TRUE)
checkmate::reportAssertions(ac)
if (is.null(labels))
labels <- make.unique(sapply(allFeatAnnotations, algorithm))
allAnnTabs <- mapply(allFeatAnnotations, labels, SIMPLIFY = FALSE, FUN = function(f, l)
{
ret <- as.data.table(f)
if (nrow(ret) == 0)
ret <- data.table(group = character(), UID = character())
ret <- unique(ret[, c("group", "UID")])[, (l) := 1]
})
annTab <- Reduce(function(f1, f2)
{
merge(f1, f2, by = c("group", "UID"), all = TRUE)
}, allAnnTabs)
annTab <- annTab[, labels, with = FALSE]
for (j in seq_along(annTab))
set(annTab, which(is.na(annTab[[j]])), j, 0)
if (sum(sapply(annTab, function(x) any(x>0))) < 2)
stop("Need at least two non-empty objects to plot")
do.call(UpSetR::upset, c(list(annTab, nsets = nsets, nintersects = nintersects), upsetArgs))
})
setMethod("annScoreNames", "featureAnnotations", function(obj, onlyNums)
{
# NOTE: onlyNums ignored here, mainly for MF
if (length(obj@scoreRanges) == 0)
return(character())
return(unique(unlist(lapply(obj@scoreRanges, names))))
})
setMethod("prepareConsensusLabels", "featureAnnotations", function(obj, ..., labels)
{
if (is.null(labels))
labels <- sapply(list(obj, ...), algorithm)
# in case names are (still) duplicated
labels <- make.unique(labels)
return(labels)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.