#' @include main.R
#' @include feature_groups.R
#' @include feature_groups-set.R
#' @include utils-screening.R
NULL
#' Class for suspect screened feature groups.
#'
#' This class derives from \code{\link{featureGroups}} and adds suspect screening information.
#'
#' @param obj,object,x,fGroups The \code{featureGroupsScreening} object.
#' @param \dots Further arguments passed to the base method.
#' @param onlyHits For \code{as.data.table}: if \code{TRUE} then only feature groups with suspect hits are reported.
#'
#' For \code{filter} \itemize{
#'
#' \item if \code{negate=FALSE} and \code{onlyHits=TRUE} then all feature groups without suspect hits will be removed.
#' Otherwise nothing will be done.
#'
#' \item if \code{negate=TRUE} then \code{onlyHits=TRUE} will select feature groups without suspect hits,
#' \code{onlyHits=FALSE} will only retain feature groups with suspect matches and this filter is ignored if
#' \code{onlyHits=NULL}.
#'
#' }
#'
#' @slot screenInfo A (\code{\link{data.table}}) with results from suspect screening. This table will be amended with
#' annotation data when \code{annotateSuspects} is run.
#' @slot MS2QuantMeta Metadata from \pkg{MS2Quant} filled in by \code{predictRespFactors}.
#'
#' @section Suspect annotation: The \code{annotateSuspects} method is used to annotate suspects after
#' \code{\link{screenSuspects}} was used to collect suspect screening results and other workflow steps such as formula
#' and compound annotation steps have been completed. The annotation results, which can be acquired with the
#' \code{as.data.table} and \code{screenInfo} methods, amends the current screening data with the following columns:
#'
#' \itemize{
#'
#' \item \code{formRank},\code{compRank} The rank of the suspect within the formula/compound annotation results.
#'
#' \item \code{annSimForm},\code{annSimComp},\code{annSimBoth} A similarity measure between measured and annotated
#' MS/MS peaks from annotation of formulae, compounds or both. The similarity is calculated as the spectral similarity
#' between a peaklist with (a) all MS/MS peaks and (b) only annotated peaks. Thus, a value of one means that all MS/MS
#' peaks were annotated. If both formula and compound annotations are available then \code{annSimBoth} is calculated
#' after combining all the annotated peaks, otherwise \code{annSimBoth} equals the available value for
#' \code{annSimForm} or \code{annSimComp}. The similarity calculation can be configured with the \code{specSimParams}
#' argument to \code{annotateSuspects}. Note for annotation with \code{generateCompoundsLibrary} results: the method
#' and default parameters for \code{annSimComp} calculation slightly differs to those from the spectral similarity
#' calculated with compound annotation (\code{libMatch} score), hence small differences in results are typically
#' observed.
#'
#' \item \code{maxFrags} The maximum number of MS/MS fragments that can be matched for this suspect (based on the
#' \code{fragments_*} columns from the suspect list).
#'
#' \item \code{maxFragMatches},\code{maxFragMatchesRel} The absolute and relative amount of experimental MS/MS peaks
#' that were matched from the fragments specified in the suspect list. The value for \code{maxFragMatchesRel} is
#' relative to the value for \code{maxFrags}. The calculation of this column is influenced by the
#' \code{checkFragments} argument to \code{annotateSuspects}.
#'
#' \item \code{estIDLevel} Provides an \emph{estimation} of the identification level, roughly following that of
#' \insertCite{Schymanski2014}{patRoon}. However, please note that this value is only an estimation, and manual
#' interpretation is still necessary to assign final identification levels. The estimation is done through a set of
#' rules, see the \verb{Identification level rules} section below.
#'
#' }
#'
#' Note that only columns are present if sufficient data is available for their calculation.
#'
#' @section Identification level rules: The estimation of identification levels is configured through a YAML file which
#' specifies the rules for each level. The default file is shown below.
#'
#' @eval paste0("@@section Identification level rules: \\preformatted{", patRoon:::readAllFile(system.file("misc",
#' "IDLevelRules.yml", package = "patRoon")), "}")
#'
#' @section Identification level rules: Most of the file should be self-explanatory. Some notes:
#'
#' \itemize{
#'
#' \item Each rule is either a field of \code{suspectFragments} (minimum number of MS/MS fragments matched from
#' suspect list), \code{retention} (maximum retention deviation from suspect list), \code{rank} (the maximum
#' annotation rank from formula or compound annotations), \code{all} (this level is always matched) or any of the
#' scorings available from the formula or compound annotations.
#'
#' \item In case any of the rules could be applied to either formula or compound annotations, the annotation type must
#' be specified with the \code{type} field (\code{formula} or \code{compound}).
#'
#' \item Identification levels should start with a number and may optionally be followed by a alphabetic character.
#' The lowest levels are checked first.
#'
#' \item If \code{relative=yes} then the relative scoring will be used for testing.
#'
#' \item For \code{suspectFragments}: if the number of fragments from the suspect list (\code{maxFrags} column) is
#' less then the minimum rule value, the minimum is adjusted to the number of available fragments.
#'
#' \item The \code{or} and \code{and} keywords can be used to combine multiple conditions.
#' }
#'
#' A template rules file can be generated with the \code{\link{genIDLevelRulesFile}} function, and this file can
#' subsequently passed to \code{annotateSuspects}. The file format is highly flexible and (sub)levels can be added or
#' removed if desired. Note that the default file is currently only suitable when annotation is performed with GenForm
#' and MetFrag, for other algorithms it is crucial to modify the rules.
#'
#' @templateVar class featureGroupsScreening
#' @template class-hierarchy
#'
#' @references \insertAllCited{} \cr \cr \insertRef{Stein1994}{patRoon}
#'
#' @seealso \code{\link{featureGroups}}
#'
#' @export
featureGroupsScreening <- setClass("featureGroupsScreening",
slots = c(screenInfo = "data.table", MS2QuantMeta = "list"),
contains = "featureGroups")
setMethod("initialize", "featureGroupsScreening",
function(.Object, ...) callNextMethod(.Object, algorithm = "screening", ...))
#' @describeIn featureGroupsScreening Returns a table with screening information
#' (see \code{screenInfo} slot).
#' @aliases screenInfo
#' @export
setMethod("screenInfo", "featureGroupsScreening", function(obj) obj@screenInfo)
#' @describeIn featureGroupsScreening Shows summary information for this object.
#' @export
setMethod("show", "featureGroupsScreening", function(object)
{
callNextMethod(object)
doScreeningShow(object)
})
#' @describeIn featureGroupsScreening Subset on analyses, feature groups and/or
#' suspects.
#'
#' @param i,j,rGroups Used for subsetting data analyses, feature groups and
#' replicate groups, see \code{\link{featureGroups}}.
#' @param suspects An optional \code{character} vector with suspect names. If
#' specified, only \code{featureGroups} will be kept that are assigned to
#' these suspects.
#' @param drop Ignored.
#'
#' @export
setMethod("[", c("featureGroupsScreening", "ANY", "ANY", "missing"), function(x, i, j, ..., rGroups,
suspects = NULL, drop = TRUE)
{
checkmate::assertCharacter(suspects, null.ok = TRUE)
x <- callNextMethod(x, i, j, ..., rGroups = rGroups, drop = drop)
if (!is.null(suspects))
x <- x[, x@screenInfo[name %in% suspects]$group]
return(x)
})
#' @rdname featureGroupsScreening-class
#' @export
setMethod("delete", "featureGroupsScreening", function(obj, i = NULL, j = NULL, ...)
{
obj <- callNextMethod()
obj@screenInfo <- obj@screenInfo[group %in% names(obj)]
return(obj)
})
#' @describeIn featureGroupsScreening Obtain a summary table (a \code{\link{data.table}}) with retention, \emph{m/z},
#' intensity and optionally other feature data. Furthermore, the output table will be merged with information from
#' \code{screenInfo}, such as suspect names and other properties and annotation data.
#'
#' @param collapseSuspects If a \code{character} then any suspects that were matched to the same feature group are
#' collapsed to a single row and suspect names are separated by the value of \code{collapseSuspects}. If \code{NULL}
#' then no collapsing occurs, and each suspect match is reported on a single row. See the \verb{Suspect collapsing}
#' section below for additional details.
#'
#' @section {Suspect collapsing}: The \code{as.data.table} method fir \code{featureGroupsScreening} supports an
#' additional format where each suspect hit is reported on a separate row (enabled by setting
#' \code{collapseSuspects=NULL}). In this format the suspect
#' properties from the \code{screenInfo} method are merged with each suspect row. Alternatively, if \emph{suspect
#' collapsing} is enabled (the default) then the regular \code{as.data.table} format is used, and amended with the
#' names of all suspects matched to a feature group (separated by the value of the \code{collapseSuspects} argument).
#'
#' Suspect collapsing also influences how calculated feature concentrations/toxicities are reported (\emph{i.e.}
#' obtained with \code{\link{calculateConcs}}/\code{\link{calculateTox}}). If these values were directly predicted for
#' suspects, \emph{i.e.} by using \code{\link{predictRespFactors}}/\code{\link{predictTox}} on the feature groups
#' object, \emph{and} suspects are \emph{not} collapsed, then the calculated concentration/toxicity reported for each
#' suspect row is not aggregated and specific for that suspect (unless not available). Hence, this allows you to
#' obtain specific concentration/toxicity values for each suspect/feature group pair.
#'
#' @export
setMethod("as.data.table", "featureGroupsScreening", doFGScrAsDataTable)
#' @rdname pred-quant
#' @export
setMethod("predictRespFactors", "featureGroupsScreening", function(obj, calibrants, eluent, organicModifier, pHAq,
concUnit = "ugL", calibConcUnit = concUnit)
{
checkPackage("MS2Quant", "kruvelab/MS2Quant")
ac <- checkmate::makeAssertCollection()
assertQuantEluent(eluent, obj, add = ac)
checkmate::assertChoice(organicModifier, c("MeOH", "MeCN"), add = ac)
checkmate::assertNumber(pHAq, finite = TRUE, add = ac)
aapply(assertConcUnit, . ~ concUnit + calibConcUnit, fixed = list(add = ac))
checkmate::reportAssertions(ac)
calibrants <- assertAndPrepareQuantCalib(calibrants, calibConcUnit)
if (length(obj) == 0)
return(obj)
scr <- screenInfo(obj)
if (is.null(scr[["SMILES"]]) || all(is.na(scr$SMILES)))
stop("Suspects lack necessary SMILES information to perform calculations, aborting...", call. = FALSE)
if (any(is.na(scr$SMILES)))
warning("Some suspect SMILES are NA and will be ignored", call. = FALSE)
inp <- screenInfo(obj)[, c("group", "SMILES"), with = FALSE]
inp <- inp[!is.na(SMILES)]
# avoid duplicate calculations if there happen to be suspects with the same SMILES
inp <- unique(inp, by = c("group", "SMILES"))
printf("Predicting response factors from SMILES with MS2Quant for %d suspects...\n", nrow(inp))
res <- predictRespFactorsSMILES(inp, groupInfo(obj), calibrants, eluent, organicModifier, pHAq, concUnit)
if (!is.null(scr[["RF_SMILES"]]))
scr[, RF_SMILES := NULL] # clearout for merge below
scr <- merge(scr, res$RFs[, c("group", "SMILES", "RF_SMILES"), with = FALSE], by = c("group", "SMILES"),
sort = FALSE, all.x = TRUE)
obj@screenInfo <- scr
obj@MS2QuantMeta <- res$MD
return(obj)
})
#' @rdname pred-tox
#' @export
setMethod("predictTox", "featureGroupsScreening", function(obj, LC50Mode = "static", concUnit = "ugL")
{
checkPackage("MS2Tox", "kruvelab/MS2Tox")
checkmate::assertChoice(LC50Mode, c("static", "flow"))
assertConcUnit(concUnit)
if (length(obj) == 0)
return(obj)
scr <- screenInfo(obj)
if (is.null(scr[["SMILES"]]) || all(is.na(scr$SMILES)))
stop("Suspects lack necessary SMILES information to perform calculations, aborting...", call. = FALSE)
if (any(is.na(scr$SMILES)))
warning("Some suspect SMILES are NA and will be ignored", call. = FALSE)
# avoid duplicate calculations if there happen to be suspects with the same SMILES
inpSMILES <- unique(screenInfo(obj)$SMILES)
inpSMILES <- inpSMILES[!is.na(inpSMILES)]
printf("Predicting LC50 values from SMILES with MS2Tox for %d suspects...\n", length(inpSMILES))
pr <- predictLC50SMILES(inpSMILES, LC50Mode, concUnit)
if (!is.null(scr[["LC50_SMILES"]]))
scr[, LC50_SMILES := NULL] # clearout for merge below
scr <- merge(scr, pr, by = "SMILES", sort = FALSE, all.x = TRUE)
obj@screenInfo <- scr
return(obj)
})
#' @rdname pred-quant
#' @export
setMethod("calculateConcs", "featureGroupsScreening", function(fGroups, featureAnn = NULL, areas = FALSE)
{
checkmate::assertFlag(areas)
if (!is.null(featureAnn) && length(featureAnn) > 0)
fGroups <- callNextMethod()
else
fGroups@concentrations <- data.table()
scr <- screenInfo(fGroups)
if (is.null(scr[["RF_SMILES"]]))
{
cat("Screening results lacks predicted response factors and will not be used for quantitation.",
"You can use predictRespFactors() to add suspect response factors.\n")
return(fGroups)
}
resp <- scr[, c("group", "name", "SMILES", "RF_SMILES"), with = FALSE]
resp[, type := "suspect"]
setnames(resp, c("SMILES", "name", "RF_SMILES"), c("candidate", "candidate_name", "RF"))
concs <- calcFeatureConcs(fGroups, resp, areas)
fGroups@concentrations <- finalizeFeaturePredTab(rbind(fGroups@concentrations, concs, fill = TRUE))
return(fGroups)
})
#' @rdname pred-tox
#' @export
setMethod("calculateTox", "featureGroupsScreening", function(fGroups, featureAnn = NULL)
{
if (!is.null(featureAnn) && length(featureAnn) > 0)
fGroups <- callNextMethod()
else
fGroups@toxicities <- data.table()
scr <- screenInfo(fGroups)
if (is.null(scr[["LC50_SMILES"]]))
{
cat("Screening results lacks predicted toxicity values and will not be used.",
"You can use predictTox() to add suspect toxicity values.\n")
return(fGroups)
}
LC50s <- scr[, c("group", "name", "SMILES", "LC50_SMILES"), with = FALSE]
LC50s[, type := "suspect"]
setnames(LC50s, c("SMILES", "name", "LC50_SMILES"), c("candidate", "candidate_name", "LC50"))
fGroups@toxicities <- finalizeFeaturePredTab(rbind(fGroups@toxicities, LC50s, fill = TRUE))
return(fGroups)
})
#' @describeIn featureGroupsScreening Incorporates annotation data obtained during the workflow to annotate suspects
#' with matched known MS/MS fragments, formula/candidate ranks and automatic estimation of identification levels. See
#' the \verb{Suspect annotation} section for more details. The estimation of identification levels for each suspect is
#' logged in the \code{log/ident} directory.
#'
#' @templateVar normParam compoundsNormalizeScores,formulasNormalizeScores
#' @templateVar noNone TRUE
#' @template norm-args
#'
#' @template specSimParams-arg
#'
#' @param MSPeakLists,formulas,compounds Annotation data (\code{\link{MSPeakLists}}, \code{\link{formulas}} and
#' \code{\link{compounds}}) obtained for this \code{featureGroupsScreening} object. All arguments can be \code{NULL}
#' to exclude it from the annotation.
#' @param absMzDev Maximum absolute \emph{m/z} deviation.
#' @param checkFragments Which type(s) of MS/MS fragments from workflow data should be checked to evaluate the number of
#' suspect fragment matches (\emph{i.e.} from the \code{fragments_mz}/\code{fragments_formula} columns in the suspect
#' list). Valid values are: \code{"mz"}, \code{"formula"}, \code{"compounds"}. The former uses \emph{m/z} values in
#' the specified \code{MSPeakLists} object, whereas the others use the formulae that were annotated to MS/MS peaks in
#' the given \code{formulas} or \code{compounds} objects. Multiple values are possible: in this case the maximum
#' number of fragment matches will be reported.
#' @param IDFile A file path to a YAML file with rules used for estimation of identification levels. See the
#' \verb{Suspect annotation} section for more details. If not specified then a default rules file will be used.
#' @param logPath A directory path to store logging information. If \code{NULL} then logging is disabled.
#'
#' @return \code{annotateSuspects} returns a \code{featureGroupsScreening} object, which is a
#' \code{\link{featureGroups}} object amended with annotation data.
#'
#' @author Rick Helmus <\email{r.helmus@@uva.nl}>, Emma Schymanski <\email{emma.schymanski@@uni.lu}> (contributions to
#' identification level rules), Bas van de Velde (contributions to spectral similarity calculation).
#'
#' @aliases annotateSuspects
#' @export
setMethod("annotateSuspects", "featureGroupsScreening", function(fGroups, MSPeakLists, formulas, compounds,
absMzDev = 0.005,
specSimParams = getDefSpecSimParams(removePrecursor = TRUE),
checkFragments = c("mz", "formula", "compound"),
formulasNormalizeScores = "max",
compoundsNormalizeScores = "max",
IDFile = system.file("misc", "IDLevelRules.yml",
package = "patRoon"),
logPath = file.path("log", "ident"))
{
# NOTE: keep args in sync with sets method
ac <- checkmate::makeAssertCollection()
aapply(checkmate::assertClass, . ~ MSPeakLists + formulas + compounds,
c("MSPeakLists", "formulas", "compounds"), null.ok = TRUE, fixed = list(add = ac))
aapply(checkmate::assertNumber, . ~ absMzDev, lower = 0,
finite = TRUE, fixed = list(add = ac))
assertSpecSimParams(specSimParams, add = ac)
checkmate::assertSubset(checkFragments, c("mz", "formula", "compound"), add = ac)
aapply(assertNormalizationMethod, . ~ formulasNormalizeScores + compoundsNormalizeScores, withNone = FALSE,
fixed = list(add = ac))
checkmate::assertFileExists(IDFile, "r", add = ac)
if (!is.null(logPath))
assertCanCreateDir(logPath, add = ac)
checkmate::reportAssertions(ac)
hash <- makeHash(fGroups, MSPeakLists, formulas, compounds, absMzDev, specSimParams, checkFragments,
formulasNormalizeScores, compoundsNormalizeScores, makeFileHash(IDFile))
cd <- loadCacheData("annotateSuspects", hash)
if (!is.null(cd))
return(cd)
IDLevelRules <- readYAML(IDFile)
if (!checkmate::test_named(IDLevelRules))
stop("No valid rules could be loaded")
if (!all(grepl("^[[:digit:]]+[[:alpha:]]?$", names(IDLevelRules))))
stop("Levels should be defined as a number and may optionally followed by one character (e.g. 3, 2b etc)")
IDLevelRules <- IDLevelRules[order(names(IDLevelRules))] # sort to ensure lowest levels will be tested first
if (nrow(screenInfo(fGroups)) == 0)
{
cat("No suspect hits, nothing to annotate")
return(fGroups)
}
mzWithin <- function(mz1, mz2) abs(mz1 - mz2) <= absMzDev
si <- copy(screenInfo(fGroups))
annCols <- suspAnnCols()
si <- si[, setdiff(names(si), annCols), with = FALSE] # remove any previous results
printf("Annotating %d suspects...\n", nrow(si))
prog <- openProgBar(0, nrow(si))
for (i in seq_len(nrow(si)))
{
gName <- si$group[i]
MSMSList <- if (!is.null(MSPeakLists)) MSPeakLists[[gName]][["MSMS"]] else NULL
fTable <- if (!is.null(formulas)) formulas[[gName]] else NULL
fScRanges <- if (!is.null(formulas)) formulas@scoreRanges[[gName]] else NULL
cTable <- if (!is.null(compounds)) compounds[[gName]] else NULL
cScRanges <- if (!is.null(compounds)) compounds@scoreRanges[[gName]] else NULL
formRank <- NA_integer_; annSimForm <- annSimBoth <- NA_real_
if (!is.null(fTable) && !is.null(si[["formula"]]) && !is.na(si$formula[i]))
{
formRank <- which(si$formula[i] == fTable$neutral_formula)
formRank <- if (length(formRank) > 0) formRank[1] else NA_integer_
if (!is.na(formRank))
annSimForm <- annSimBoth <- annotatedMSMSSimilarity(annotatedPeakList(formulas,
index = formRank,
groupName = gName,
MSPeakLists = MSPeakLists),
specSimParams)
}
suspIK1 <- if (!is.null(si[["InChIKey"]]) && !is.na(si$InChIKey[i])) getIKBlock1(si$InChIKey[i]) else NULL
annSimComp <- NA_real_; compRank <- NA_integer_
if (!is.null(MSMSList) && !is.null(cTable) && !is.null(suspIK1))
{
compRank <- which(suspIK1 == cTable$InChIKey1)
compRank <- if (length(compRank) > 0) compRank[1] else NA_integer_
if (!is.na(compRank) && !is.null(cTable[["fragInfo"]][[compRank]]))
{
annSimComp <- annotatedMSMSSimilarity(annotatedPeakList(compounds, index = compRank,
groupName = gName, MSPeakLists = MSPeakLists),
specSimParams)
if (!is.na(formRank))
annSimBoth <- annotatedMSMSSimilarity(annotatedPeakList(compounds, index = compRank,
groupName = gName, MSPeakLists = MSPeakLists,
formulas = formulas),
specSimParams)
else
annSimBoth <- annSimComp
}
}
fragMZMatches <- fragFormMatches <- fragFormCompMatches <- NA_integer_
fragMZs <- fragForms <- NULL
maxSuspFrags <- maxFragMatches <- NA_integer_
if (!is.null(MSMSList) && !is.null(si[["fragments_mz"]]) &&
!is.na(si[["fragments_mz"]][i]) && nzchar(si[["fragments_mz"]][i]) &&
"mz" %in% checkFragments)
{
fragMZs <- as.numeric(unlist(strsplit(si[["fragments_mz"]][i], ";")))
maxSuspFrags <- length(fragMZs)
maxFragMatches <- sum(sapply(MSMSList$mz, function(mz1) any(sapply(fragMZs, mzWithin, mz1 = mz1))))
}
if (!is.null(si[["fragments_formula"]]) && !is.na(si[["fragments_formula"]][i]) &&
nzchar(si[["fragments_formula"]][i]))
{
fragForms <- unlist(strsplit(si[["fragments_formula"]][i], ";"))
maxSuspFrags <- max(NAToZero(maxSuspFrags), length(fragForms))
if (!is.null(fTable) && "formula" %in% checkFragments && !is.na(formRank) &&
!is.null(fTable[["fragInfo"]][[formRank]]))
maxFragMatches <- max(NAToZero(maxFragMatches), sum(fragForms %in% fTable[["fragInfo"]][[formRank]]$ion_formula))
if (!is.null(cTable) && "compound" %in% checkFragments && !is.na(compRank) &&
!is.null(cTable[["fragInfo"]][[compRank]]))
maxFragMatches <- max(NAToZero(maxFragMatches), sum(fragForms %in% cTable[["fragInfo"]][[compRank]]$ion_formula))
}
maxFragMatchesRel <- NA_real_
if (!is.na(maxFragMatches))
maxFragMatchesRel <- maxFragMatches / maxSuspFrags
estIDLevel <- estimateIdentificationLevel(si$name[i], si$group[i], si$d_rt[i], suspIK1, si$formula[i],
annSimForm, annSimComp, annSimBoth,
maxSuspFrags, maxFragMatches, fTable, formRank,
mFormNames = if (!is.null(formulas)) mergedConsensusNames(formulas) else character(),
fScRanges, formulasNormalizeScores, cTable, compRank,
mCompNames = if (!is.null(compounds)) mergedConsensusNames(compounds) else character(),
cScRanges, compoundsNormalizeScores, absMzDev, IDLevelRules, logPath)
set(si, i,
c("formRank", "compRank", "annSimForm", "annSimComp", "annSimBoth",
"maxFrags", "maxFragMatches", "maxFragMatchesRel", "estIDLevel"),
list(formRank, compRank, annSimForm, annSimComp, annSimBoth, maxSuspFrags, maxFragMatches,
maxFragMatchesRel, estIDLevel))
setTxtProgressBar(prog, i)
}
rmCols <- annCols[sapply(annCols, function(col) !is.null(si[[col]]) && all(is.na(si[[col]])))]
if (length(rmCols) > 0)
si <- si[, setdiff(names(si), rmCols), with = FALSE]
fGroups@screenInfo <- si
close(prog)
saveCacheData("annotateSuspects", fGroups, hash)
return(fGroups)
})
#' @describeIn featureGroupsScreening Performs rule based filtering. This method builds on the comprehensive filter
#' functionality from the base \code{\link{filter,featureGroups-method}}. It adds several filters to select
#' \emph{e.g.} the best ranked suspects or those with a minimum estimated identification level. \strong{NOTE}: most
#' filters \emph{only} affect suspect hits, not feature groups. Set \code{onlyHits=TRUE} to subsequently remove any
#' feature groups that lost any suspect matches due to other filter steps.
#'
#' @param selectHitsBy Should be \code{"intensity"} or \code{"level"}. For cases where the same suspect is matched to
#' multiple feature groups, only the suspect to the feature group with highest mean intensity
#' (\code{selectHitsBy="intensity"}) or best identification level (\code{selectHitsBy="level"}) is kept. In case of
#' ties only the first hit is kept. Set to \code{NULL} to ignore this filter. If \code{negate=TRUE} then only those
#' hits with lowest mean intensity/poorest identification level are kept.
#' @param selectBestFGroups If \code{TRUE} then for any cases where a single feature group is matched to several
#' suspects only the suspect assigned to the feature group with best identification score is kept. In case of ties
#' only the first is kept.
#' @param maxLevel,maxFormRank,maxCompRank,minAnnSimForm,minAnnSimComp,minAnnSimBoth Filter suspects by maximum
#' identification level (\emph{e.g.} \code{"3a"}), formula/compound rank or with minimum formula/compound/combined
#' annotation similarity. Set to \code{NULL} to ignore.
#' @param absMinFragMatches,relMinFragMatches Only retain suspects with this minimum number MS/MS matches with the
#' fragments specified in the suspect list (\emph{i.e.} \code{fragments_mz}/\code{fragments_formula}).
#' \code{relMinFragMatches} sets the minimum that is relative (\samp{0-1}) to the maximum number of MS/MS fragments
#' specified in the \code{fragments_*} columns of the suspect list. Set to \code{NULL} to ignore.
#' @param minRF Filter suspect hits by the given minimum predicted response factor (as calculated by
#' \code{\link[=predictRespFactors]{predictRespFactors}}). Set to \code{NULL} to ignore.
#' @param maxLC50 Filter suspect hits by the given maximum toxicity (LC50) (as calculated by
#' \code{\link[=predictTox]{predictTox}}). Set to \code{NULL} to ignore.
#' @param negate If set to \code{TRUE} then filtering operations are performed in opposite manner.
#'
#' @return \code{filter} returns a filtered \code{featureGroupsScreening} object.
#'
#' @note \code{filter} removes suspect hits with \code{NA} values when any of the filters related to minimum or maximum
#' values are applied (unless \code{negate=TRUE}).
#'
#' @export
setMethod("filter", "featureGroupsScreening", function(obj, ..., onlyHits = NULL,
selectHitsBy = NULL, selectBestFGroups = FALSE,
maxLevel = NULL, maxFormRank = NULL, maxCompRank = NULL,
minAnnSimForm = NULL, minAnnSimComp = NULL, minAnnSimBoth = NULL,
absMinFragMatches = NULL, relMinFragMatches = NULL,
minRF = NULL, maxLC50 = NULL, negate = FALSE)
{
# NOTE: keep args and method in sync with featureGroupsScreeningSet method
ac <- checkmate::makeAssertCollection()
aapply(checkmate::assertFlag, . ~ onlyHits + selectBestFGroups + negate, null.ok = c(TRUE, FALSE, FALSE), fixed = list(add = ac))
checkmate::assertChoice(selectHitsBy, choices = c("intensity", "level"), null.ok = TRUE, add = ac)
aapply(checkmate::assertCount, . ~ maxLevel + maxFormRank + maxCompRank + absMinFragMatches + relMinFragMatches,
null.ok = TRUE, fixed = list(add = ac))
aapply(checkmate::assertNumber, . ~ minAnnSimForm + minAnnSimComp + minAnnSimBoth + minRF + maxLC50, null.ok = TRUE,
fixed = list(add = ac))
checkmate::reportAssertions(ac)
obj <- doSuspectFilter(obj, onlyHits, selectHitsBy, selectBestFGroups, maxLevel, maxFormRank, maxCompRank,
minAnnSimForm, minAnnSimComp, minAnnSimBoth, absMinFragMatches, relMinFragMatches, minRF,
maxLC50, negate)
if (...length() > 0)
obj <- callNextMethod(obj, ..., negate = negate)
return(obj)
})
#' @details \code{screenSuspects} is used to perform suspect screening. The input \code{\link{featureGroups}} object
#' will be screened for suspects by \emph{m/z} values and optionally retention times. Afterwards, any feature groups
#' not matched may be kept or removed, depending whether a full non-target analysis is desired.
#'
#' @param fGroups The \code{\link{featureGroups}} object that should be screened.
#' @param suspects A \code{data.frame} with suspect information. See the \verb{Suspect list format} section below.
#'
#' \setsWF Can also be a \code{list} with suspect lists to be used for each set (otherwise the same suspect lists is
#' used for all sets). The \code{list} can be named with the sets names to mark which suspect list is to be used with
#' which set (\emph{e.g.} \code{suspects=list(positive=suspsPos, negative=suspsNeg)}).
#' @param rtWindow,mzWindow The retention time window (in seconds) and \emph{m/z} window that will be used for matching
#' a suspect (+/- feature data).
#' @param adduct An \code{\link{adduct}} object (or something that can be converted to it with \code{\link{as.adduct}}).
#' Examples: \code{"[M-H]-"}, \code{"[M+Na]+"}. May be \code{NULL}, see \verb{Suspect list format} and \verb{Matching
#' of suspect masses} sections below.
#' @param skipInvalid If set to \code{TRUE} then suspects with invalid data (\emph{e.g.} missing names or other missing
#' data) will be ignored with a warning. Similarly, any suspects for which mass calculation failed (when no \code{mz}
#' column is present in the suspect list), for instance, due to invalid \code{SMILES}, will be ignored with a warning.
#' @param onlyHits If \code{TRUE} then all feature groups not matched by any of the suspects will be removed.
#'
#' @section Suspect list format: the \code{suspects} argument for \code{screenSuspects} should be a \code{data.frame}
#' with the following mandatory and optional columns:
#'
#' \itemize{
#'
#' \item \code{name} The suspect name. Must be file-compatible. (\strong{mandatory})
#'
#' \item \code{rt} The retention time (in seconds) for the suspect. If specified the suspect will only be matched if
#' its retention matches the experimental value (tolerance defined by the \code{rtWindow} argument).
#' (\strong{optional})
#'
#' \item \code{neutralMass},\code{formula},\code{SMILES},\code{InChI} The neutral monoisotopic mass, chemical formula,
#' SMILES or InChI for the suspect. (data from one of these columns are \strong{mandatory} in case no value from the
#' \code{mz} column is available for a suspect)
#'
#' \item \code{mz} The ionized \emph{m/z} of the suspect. (\strong{mandatory} unless it can be calculated from one of
#' the aforementioned columns)
#'
#' \item \code{adduct} A \code{character} that can be converted with \code{\link{as.adduct}}. Can be used to
#' automatically calculate values for the \code{mz} column. (\strong{mandatory} unless data from the \code{mz} column
#' is available, the \code{adduct} argument is set or \code{fGroups} has adduct annotations)
#'
#' \item \code{fragments_mz},\code{fragments_formula} One or more MS/MS fragments (specified as \emph{m/z} or
#' formulae, respectively). Multiple values can be specified by separating them with a semicolon (\verb{;}). This data
#' is used by \code{\link{annotateSuspects}} to report detected MS/MS fragments and calculate identification levels.
#' (\strong{optional})
#'
#' }
#'
#' @section Matching of suspect masses: How the mass of a suspect is matched with the mass of a feature depends on the
#' available data: \itemize{
#'
#' \item If the suspect has data from the \code{mz} column of the suspect list, then this data is matched with the
#' detected feature \emph{m/z}.
#'
#' \item Otherwise, if the suspect has data in the \code{adduct} column of the suspect list, this data is used to
#' calculate its \code{mz} value, which is then used like above.
#'
#' \item In the last case, the neutral mass of the suspect is matched with the neutral mass of the feature. Hence,
#' either the \code{adduct} argument needs to be specified, or the \code{featureGroups} input object must have adduct
#' annotations.
#'
#' }
#'
#' @templateVar whatCP suspect list
#' @template chemPropCalc
#'
#' @return \code{screenSuspects} returns a \code{\link{featureGroupsScreening}} object, which is a copy of the input
#' \code{fGroups} object amended with additional screening information.
#'
#' @seealso \code{featureGroupsScreening}
#'
#' @rdname suspect-screening
#' @aliases screenSuspects
#' @export
setMethod("screenSuspects", "featureGroups", function(fGroups, suspects, rtWindow, mzWindow, adduct, skipInvalid,
prefCalcChemProps, neutralChemProps, onlyHits)
{
checkmate::assertFlag(skipInvalid) # not in assert collection, should fail before assertSuspectList
# NOTE: skip adduct check if fGroups is empty
needsAdduct <- is.null(adduct) && nrow(annotations(fGroups)) == 0 && length(fGroups) > 0
ac <- checkmate::makeAssertCollection()
assertSuspectList(suspects, needsAdduct = needsAdduct, skipInvalid, add = ac)
aapply(checkmate::assertNumber, . ~ rtWindow + mzWindow, lower = 0, finite = TRUE, fixed = list(add = ac))
aapply(checkmate::assertFlag, . ~ skipInvalid + prefCalcChemProps + neutralChemProps + onlyHits,
fixed = list(add = ac))
checkmate::reportAssertions(ac)
if (!is.null(adduct))
adduct <- checkAndToAdduct(adduct, fGroups)
# do this before checking cache to ensure proper errors/warnings are thrown!
suspects <- prepareSuspectList(suspects, adduct, skipInvalid, checkDesc = TRUE,
prefCalcChemProps = prefCalcChemProps, neutralChemProps = neutralChemProps)
hash <- makeHash(fGroups, suspects, rtWindow, mzWindow, adduct, skipInvalid, prefCalcChemProps, neutralChemProps,
onlyHits)
cd <- loadCacheData("screenSuspects", hash)
if (!is.null(cd))
return(cd)
scr <- doScreenSuspects(fGroups, suspects, rtWindow, mzWindow, skipInvalid)
if (onlyHits)
fGroups <- fGroups[, scr$group]
ret <- featureGroupsScreening(screenInfo = scr, groups = copy(groupTable(fGroups)),
analysisInfo = analysisInfo(fGroups), groupInfo = groupInfo(fGroups),
features = getFeatures(fGroups), ftindex = copy(groupFeatIndex(fGroups)),
groupQualities = copy(groupQualities(fGroups)),
groupScores = copy(groupScores(fGroups)), ISTDs = copy(internalStandards(fGroups)),
ISTDAssignments = internalStandardAssignments(fGroups),
annotations = copy(annotations(fGroups)),
concentrations = copy(concentrations(fGroups)),
toxicities = copy(toxicities(fGroups)))
saveCacheData("screenSuspects", ret, hash)
return(ret)
})
#' @param amend If \code{TRUE} then screening results will be \emph{amended} to the original object.
#' @rdname suspect-screening
#' @export
setMethod("screenSuspects", "featureGroupsScreening", function(fGroups, suspects, rtWindow, mzWindow,
adduct, skipInvalid, onlyHits, amend = FALSE)
{
aapply(checkmate::assertFlag, . ~ onlyHits + amend)
fGroupsScreened <- callNextMethod(fGroups, suspects, rtWindow, mzWindow, adduct, skipInvalid, onlyHits)
if (!amend)
return(fGroupsScreened)
# amend screening results
fGroups@screenInfo <- rbind(fGroups@screenInfo, fGroupsScreened@screenInfo, fill = TRUE)
fGroups@screenInfo <- unique(fGroups@screenInfo, by = c("name", "group"))
if (onlyHits)
fGroups <- fGroups[, fGroups@screenInfo$group]
return(fGroups)
})
#' @details \code{numericIDLevel} Extracts the numeric part of a given
#' identification level (\emph{e.g.} \code{"3a"} becomes \samp{3}).
#' @param level The identification level to be converted.
#' @rdname suspect-screening
#' @export
numericIDLevel <- function(level)
{
checkmate::assertCharacter(level, any.missing = TRUE, min.chars = 1)
ret <- integer(length(level))
ret[is.na(level)] <- NA_integer_
ret[!is.na(level)] <- as.integer(gsub("[[:alpha:]]*", "", level[!is.na(level)]))
return(ret)
}
#' @details \code{genIDLevelRulesFile} Generates a template YAML file that is
#' used to configure the rules for automatic estimation of identification
#' levels. This file can then be used as input for
#' \code{\link{annotateSuspects}}.
#' @param out The file path to the target file.
#' @param inLevels,exLevels A \link[=regex]{regular expression} for the
#' identification levels to include or exclude, respectively. For instance,
#' \code{exLevels="4|5"} would exclude level 4 and 5 from the output file. Set
#' to \code{NULL} to ignore.
#' @rdname suspect-screening
#' @export
genIDLevelRulesFile <- function(out, inLevels = NULL, exLevels = NULL)
{
aapply(checkmate::assertCharacter, . ~ inLevels + exLevels, null.ok = TRUE)
checkmate::assertPathForOutput(out, overwrite = TRUE)
defFile <- system.file("misc", "IDLevelRules.yml", package = "patRoon")
if (is.null(inLevels) && is.null(exLevels))
file.copy(defFile, out, overwrite = TRUE)
else
{
rules <- readYAML(defFile)
if (!is.null(inLevels))
rules <- rules[grepl(inLevels, names(rules))]
if (!is.null(exLevels))
rules <- rules[!grepl(exLevels, names(rules))]
# UNDONE: this quotes ID levels without sub-level, fix?
writeYAML(rules, out)
}
invisible(NULL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.