R/feature_groups.R

#' @include main.R
#' @include features.R
#' @include workflow-step.R
NULL

#' Base class for grouped features.
#'
#' This class holds all the information for grouped features.
#'
#' The \code{featureGroup} class is the workhorse of \pkg{patRoon}: almost all functionality operate on its instantiated
#' objects. The class holds all information from grouped features (obtained from \code{\link{features}}). This class
#' itself is \code{virtual}, hence, objects are not created directly from it. Instead, 'feature groupers' such as
#' \code{\link{groupFeaturesXCMS}} return a \code{featureGroups} derived object after performing the actual grouping of
#' features across analyses.
#'
#' @param fGroups,obj,x,object \code{featureGroups} object to be accessed.
#' @param rGroups For \code{[}: An optional \code{character} vector: if specified only keep results for the given
#'   replicate groups (equivalent to the \code{rGroups} argument to \code{\link[=filter,featureGroups-method]{filter}}).
#' @param \dots For the \code{"["} operator: ignored.
#'
#'   For \code{delete}: passed to the function specified as \code{j}.
#'
#'   For \code{normInts}: passed to \code{\link{screenSuspects}} if \code{featNorm="istd"}.
#'
#'   \setsPassedArgs1{featureGroups}
#' @param average If \code{TRUE} then data within replicate groups are averaged.
#'
#'   For \code{as.data.table}: if \code{features=TRUE} other feature properties are also averaged.
#' @param averageFunc Function used for averaging. Only used when \code{average=TRUE} or \code{FCParams != NULL}.
#' @param areas If set to \code{TRUE} then areas are considered instead of peak intensities.
#'
#'   For \code{as.data.table}: ignored if \code{features=TRUE}, as areas of features are always reported.
#' @param normalized If \code{TRUE} then normalized intensity data is used (see the \verb{Feature intensity
#'   normalization} section.
#'
#'   For \code{as.data.table}: if no normalization data is available (\emph{e.g.} because \code{normInts} was not used)
#'   then an automatic group normalization is performed.
#' @param which A character vector with replicate groups used for comparison.
#'
#'   For \code{overlap}: can also be a \code{list} of \code{character} vectors with replicate groups to compare. For
#'   instance, \code{which=list(c("samp1", "samp2"), c("samp3", "samp4"))} returns the overlap between
#'   \code{"samp1"}+\code{"samp2"} and \code{"samp3"}+\code{"samp4"}.
#' @param FCParams A parameter list to calculate Fold change data. See \code{getFCParams} for more details. Set to
#'   \code{NULL} to not perform FC calculations.
#'
#' @templateVar seli analyses
#' @templateVar selOrderi analyses()
#' @templateVar selj feature groups
#' @templateVar selOrderj names()
#' @templateVar optionalji TRUE
#' @templateVar del TRUE
#' @templateVar deli analyses
#' @templateVar delj feature groups
#' @templateVar deljtype numeric index, logical or character
#' @templateVar delfwhat feature group
#' @templateVar delfa a vector of the group intensities, the group name
#' @templateVar delfr the analyses of the features in the group to be removed (same format as \code{i})
#' @templateVar dollarOpName feature group
#' @template sub_sel_del-args
#'
#' @slot groups Matrix (\code{\link{data.table}}) with intensities for each feature group (columns) per analysis (rows).
#'   Access with \code{groups} method.
#' @slot analysisInfo,features \link[=analysis-information]{Analysis info} and \code{\link{features}} class associated
#'   with this object. Access with \code{analysisInfo} and \code{featureTable} methods, respectively.
#' @slot groupInfo \code{data.frame} with retention time (\code{rts} column, in seconds) and \emph{m/z} (\code{mzs}
#'   column) for each feature group. Access with \code{groupInfo} method.
#' @slot ftindex Matrix (\code{\link{data.table}}) with feature indices for each feature group (columns) per analysis
#'   (rows). Each index corresponds to the row within the feature table of the analysis (see
#'   \code{\link{featureTable}}).
#' @slot groupQualities,groupScores A \code{\link{data.table}} with qualities/scores for each feature group (see the
#'   \code{calculatePeakQualities} method).
#' @slot annotations A \code{\link{data.table}} with adduct annotations for each group (see the \code{selectIons}
#'   method).
#' @slot ISTDs A \code{data.table} with screening results for internal standards (filled in by the \code{normInts}
#'   method).
#' @slot ISTDAssignments A \code{list}, where each item is named by a feature group and consists of a vector with
#'   feature group names of the internal standards assigned to it (filled in by the \code{normInts} method).
#' @slot concentrations,toxicities A \code{data.table} with predicted concentrations/toxicities for each feature group.
#'   Assigned by the \code{\link{calculateConcs}}/\code{\link{calculateTox}} methods. Use the
#'   \code{concentratrions}/\code{toxicities} methods for access.
#'
#' @templateVar class featureGroups
#' @template class-hierarchy
#'
#' @seealso \code{\link{groupFeatures}} for generating feature groups, \link{feature-filtering} and
#'   \link{feature-plotting} for more advanced \code{featureGroups} methods.
#'
#' @export
featureGroups <- setClass("featureGroups",
                          slots = c(groups = "data.table", analysisInfo = "data.frame", groupInfo = "data.frame",
                                    features = "features", ftindex = "data.table", groupQualities = "data.table",
                                    groupScores = "data.table", annotations = "data.table",
                                    ISTDs = "data.table", ISTDAssignments = "list", concentrations = "data.table",
                                    toxicities = "data.table"),
                          contains = c("VIRTUAL", "workflowStep"))

setMethod("initialize", "featureGroups", function(.Object, ...)
{
    args <- list(...)

    # data.table's don't seem to initialize well (gives error that slot is init as list)
    for (s in c("groups", "ftindex", "groupQualities", "groupScores", "annotations", "ISTDs", "concentrations",
                "toxicities"))
    {
        if (is.null(args[[s]]))
            args[[s]] <- data.table()
    }
    
    .Object@ISTDAssignments <- makeEmptyListNamed(.Object@ISTDAssignments)

    .Object <- do.call(callNextMethod, c(list(.Object), args))
    
    if (nrow(.Object@ftindex) > 0)
    {
        ftitr <- transpose(.Object@ftindex)
        gNames <- names(.Object)
        .Object@features@features <- Map(.Object@features@features, ftitr, f = function(feat, inds)
        {
            wh <- which(inds != 0)
            feat <- copy(feat)
            feat[inds[wh], group := gNames[wh]][]
            return(feat)
        })

        # remove unassigned features (eg in case the grouping algorithm already did some cleanup)
        oldfn <- length(.Object@features)
        .Object@features <- delete(getFeatures(.Object), j = function(ft, ...) is.na(ft$group))
        if (oldfn != length(.Object@features))
            .Object <- reGenerateFTIndex(.Object)
    }
    else
    {
        .Object@features@features <- lapply(.Object@features@features, function(feat)
        {
            feat <- copy(feat)
            feat[, group := character()]
        })
    }
    
    return(.Object)
})

#' @describeIn featureGroups Obtain feature group names.
#' @export
setMethod("names", "featureGroups", function(x) names(x@groups))

#' @templateVar class featureGroups
#' @templateVar what analyses
#' @template strmethod
#' @export
setMethod("analyses", "featureGroups", function(obj) analysisInfo(obj)$analysis)

#' @templateVar class featureGroups
#' @templateVar what replicate groups
#' @template strmethod
#' @export
setMethod("replicateGroups", "featureGroups", function(obj) unique(analysisInfo(obj)$group))

#' @describeIn featureGroups Same as \code{names}. Provided for consistency to other classes.
#' @export
setMethod("groupNames", "featureGroups", function(obj) names(obj))

#' @describeIn featureGroups Obtain number of feature groups.
#' @export
setMethod("length", "featureGroups", function(x) ncol(x@groups))

#' @describeIn featureGroups Shows summary information for this object.
#' @export
setMethod("show", "featureGroups", function(object)
{
    callNextMethod(object)
    anaInfo <- analysisInfo(object)
    fCount <- length(getFeatures(object)); gCount <- length(object)
    printf("Feature groups: %s (%d total)\n", getStrListWithMax(names(object), 6, ", "), gCount)
    printf("Features: %d (%.1f per group)\n", fCount, if (gCount > 0) fCount / gCount)
    if (length(object) > 0)
    {
        printf("Has normalized intensities: %s\n", as.character(!is.null(featureTable(object)[[1]][["intensity_rel"]])))
        printf("Internal standards used for normalization: ")
        if (nrow(internalStandards(object)) == 0)
            printf("no\n")
        else
            printf("%s (%d assigned total)\n", getStrListWithMax(unique(internalStandards(object)$name), 6, ", "),
                   nrow(internalStandards(object)))
        printf("Predicted concentrations: ")
        concs <- concentrations(object)
        if (nrow(concs) == 0)
            printf("none\n")
        else
            printf("%d feature groups (%.2f%%)\n", uniqueN(concs$group), uniqueN(concs$group) / gCount * 100)
        printf("Predicted toxicities: ")
        tox <- toxicities(object)
        if (nrow(tox) == 0)
            printf("none\n")
        else
            printf("%d feature groups (%.2f%%)\n", uniqueN(tox$group), uniqueN(tox$group) / gCount * 100)
            
    }
    showAnaInfo(analysisInfo(object))
})

#' @describeIn featureGroups Accessor for \code{groups} slot.
#' @aliases groupTable
#' @export
setMethod("groupTable", "featureGroups", function(object, areas = FALSE, normalized = FALSE)
{
    checkmate::assertFlag(areas)
    checkmate::assertFlag(normalized)

    if (length(object) == 0 || (!areas && !normalized))
        return(object@groups)

    anaInfo <- analysisInfo(object)
    ret <- copy(object@groups)
    ftindex <- object@ftindex
    fTable <- featureTable(object)
    
    if (normalized && is.null(fTable[[1]][["intensity_rel"]]))
        stop("There is no normalized data, did you run normInts()?")
    
    colName <- if (areas && normalized)
        "area_rel"
    else if (areas)
        "area"
    else # if ("normalized")
        "intensity_rel"
    
    for (cl in seq_along(ret))
    {
        ftinds <- ftindex[[cl]]
        anainds <- seq_len(nrow(ret))[ftinds != 0]
        ftinds <- ftinds[ftinds != 0]
        as <- mapply(anainds, ftinds, SIMPLIFY = TRUE, FUN = function(a, i)
        {
            fTable[[anaInfo$analysis[a]]][[colName]][i]
        })
        set(ret, anainds, cl, as)
    }
    
    return(ret)
})

#' @describeIn featureGroups Obtain analysisInfo (see analysisInfo slot in \code{\link{features}}).
#' @export
setMethod("analysisInfo", "featureGroups", function(obj) obj@analysisInfo)

#' @describeIn featureGroups Accessor for \code{groupInfo} slot.
#' @aliases groupInfo
#' @export
setMethod("groupInfo", "featureGroups", function(fGroups) fGroups@groupInfo)

#' @describeIn featureGroups Obtain feature information (see \code{\link{features}}).
#' @export
setMethod("featureTable", "featureGroups", function(obj) featureTable(obj@features))

setReplaceMethod("featureTable", "featureGroups", function(obj, value)
{
    featureTable(obj@features) <- value
    return(obj)
})

#' @describeIn featureGroups Accessor for \code{features} slot.
#' @export
setMethod("getFeatures", "featureGroups", function(obj) obj@features)

#' @describeIn featureGroups Accessor for \code{ftindex} slot.
#' @aliases groupFeatIndex
#' @export
setMethod("groupFeatIndex", "featureGroups", function(fGroups) fGroups@ftindex)

#' @describeIn featureGroups Accessor for \code{groupQualities} slot.
#' @aliases groupQualities
#' @export
setMethod("groupQualities", "featureGroups", function(fGroups) fGroups@groupQualities)

#' @describeIn featureGroups Accessor for \code{groupScores} slot.
#' @aliases groupScores
#' @export
setMethod("groupScores", "featureGroups", function(fGroups) fGroups@groupScores)

#' @describeIn featureGroups Accessor for \code{annotations} slot.
#' @export
setMethod("annotations", "featureGroups", function(obj) obj@annotations)

#' @describeIn featureGroups Accessor for \code{ISTDs} slot.
#' @aliases internalStandards
#' @export
setMethod("internalStandards", "featureGroups", function(fGroups) fGroups@ISTDs)

#' @describeIn featureGroups Accessor for \code{ISTDAssignments} slot.
#' @aliases internalStandardAssignments
#' @export
setMethod("internalStandardAssignments", "featureGroups", function(fGroups) fGroups@ISTDAssignments)

#' @describeIn featureGroups Returns a named \code{character} with adduct annotations assigned to each feature group (if
#'   available).
#' @export
setMethod("adducts", "featureGroups", function(obj)
{
    if (nrow(annotations(obj)) == 0)
        return(character())
    return(setNames(annotations(obj)$adduct, annotations(obj)$group))
})

#' @describeIn featureGroups Sets adduct annotations for feature groups.
#' @param value For \code{adducts<-}: A \code{character} with adduct annotations assigned to each feature group. The
#'   length should equal the number of feature groups. Can be named with feature group names to customize the assignment
#'   order.
#' @export
setReplaceMethod("adducts", "featureGroups", function(obj, value)
{
    checkmate::assertCharacter(value, min.chars = 1, any.missing = FALSE, len = length(obj))
    
    if (checkmate::testNamed(value))
    {
        checkmate::assertNames(names(value), permutation.of = names(obj), .var.name = "value")
        value <- value[names(obj)] # ensure correct order
    }
    else
        names(value) <- names(obj)
    
    obj@annotations <- updateAnnAdducts(annotations(obj), groupInfo(obj), value)
    
    return(obj)
})

#' @describeIn featureGroups Accessor for \code{concentrations} slot.
#' @aliases concentrations
#' @export
setMethod("concentrations", "featureGroups", function(fGroups) fGroups@concentrations)

#' @describeIn featureGroups Accessor for \code{toxicities} slot.
#' @aliases toxicities
#' @export
setMethod("toxicities", "featureGroups", function(fGroups) fGroups@toxicities)

#' @describeIn featureGroups Subset on analyses/feature groups.
#' @param results Optional argument. If specified only feature groups with results in the specified object are kept. The
#'   class of \code{results} should be \code{\link{featureAnnotations}} or \code{\link{components}}. Multiple objects
#'   can be specified in a \code{list}: in this case a feature group is kept if it has a result in \emph{any} of the
#'   objects (equivalent to the \code{results} argument to \code{\link[=filter,featureGroups-method]{filter}}).
#' @export
setMethod("[", c("featureGroups", "ANY", "ANY", "missing"), function(x, i, j, ..., rGroups, results, drop = TRUE)
{
    if (!missing(rGroups))
        x <- filter(x, rGroups = rGroups)
    if (!missing(results))
        x <- filter(x, results = results)
    
    if (!missing(i))
    {
        i <- assertSubsetArgAndToChr(i, analyses(x))
        x <- delete(x, setdiff(analyses(x), i))
    }

    if (!missing(j))
    {
        j <- assertSubsetArgAndToChr(j, names(x))
        x <- delete(x, j = setdiff(names(x), j))
    }

    return(x)
})

#' @describeIn featureGroups Extract intensity values.
#' @export
setMethod("[[", c("featureGroups", "ANY", "ANY"), function(x, i, j)
{
    assertExtractArg(i)

    if (missing(j))
        return(x@groups[[i]])

    assertExtractArg(j)
    if (is.character(i))
        i <- match(i, analyses(x))
    return(x@groups[[i, j]])
})

#' @describeIn featureGroups Extract intensity values for a feature group.
#' @export
setMethod("$", "featureGroups", function(x, name)
{
    eval(substitute(x@groups$NAME_ARG, list(NAME_ARG = name)))
})

#' @templateVar where featureGroups
#' @templateVar what feature groups
#' @template delete
#' @export
setMethod("delete", "featureGroups", function(obj, i = NULL, j = NULL, ...)
{
    anas <- analyses(obj)
    gNames <- names(obj)
    iNULL <- is.null(i); jNULL <- is.null(j)
    
    ac <- checkmate::makeAssertCollection()
    i <- assertDeleteArgAndToChr(i, anas, add = ac)
    if (is.data.table(j))
    {
        # checkmate::assertDataTable(j, types = c("logical", "numeric"), col.names = "unique", add = ac)
        checkmate::assertNames(names(j), identical.to = gNames, what = "colnames", add = ac)
    }
    else if (!is.function(j))
        j <- assertDeleteArgAndToChr(j, gNames, add = ac)
    checkmate::reportAssertions(ac)
    
    # cases:
    # i = vector; j = NULL: subset analyses
    # i = NULL; j = vector: subset groups
    # i = vector; j = vector: remove the same features from analyses i in groups j
    # i = NULL/vector; j = function/data.table: remove specific features from each group (all analyses if i=NULL)

    if ((!iNULL && length(i) == 0) || (!jNULL && length(j) == 0))
        return(obj) # nothing to remove...
    
    ftind <- groupFeatIndex(obj)
    gTable <- groupTable(obj)
    jByIndex <- !is.function(j) && !is.data.table(j)
    isAnaSubSet <- jNULL
    isGrpSubSet <- jByIndex && iNULL
    
    # remove features first
    if (isAnaSubSet)
        obj@features <- delete(getFeatures(obj), i = i)
    else if (jByIndex)
        obj@features <- delete(getFeatures(obj), i = i, j = function(ft, ...) which(ft$group %chin% j))
    else
    {
        if (is.data.table(j))
        {
            featsToRemove <- setnames(transpose(j), i)
            set(featsToRemove, j = "group", value = names(j))
        }
        else
        {
            gt <- gTable[chmatch(i, anas)]
            
            featsToRemove <- Map(j, gt, gNames, MoreArgs = list(...))
            # equalize lengths
            ol <- length(i)
            featsToRemove <- lapply(featsToRemove, function(x)
            {
                # use as.vector(.., "list") as it's a bit faster than as.list
                if (is.logical(x))
                    return(as.vector(rep(x, length.out = ol), "list"))
                if (is.numeric(x))
                    return(as.vector(seq_along(anas) %in% x, "list"))
                return(as.vector(anas %chin% x, "list"))
            })
            featsToRemove <- setnames(rbindlist(featsToRemove), i)
            set(featsToRemove, j = "group", value = gNames)
        }
        
        obj@features <- delete(getFeatures(obj), i = i, j = function(ft, ana)
        {
            return(featsToRemove[chmatch(ft$group, group), ana, with = FALSE][[1]] == TRUE)
        })
    }
    
    # remove analyses
    removedAnas <- setdiff(anas, analyses(getFeatures(obj)))
    if (length(removedAnas) > 0)
    {
        ainds <- chmatch(removedAnas, anas)
        if (length(obj) > 0)
        {
            obj@groups <- obj@groups[-ainds]
            obj@ftindex <- obj@ftindex[-ainds]
        }
        obj@analysisInfo <- obj@analysisInfo[-ainds, , drop = FALSE]
        if (length(obj@concentrations) > 0)
            obj@concentrations <- obj@concentrations[, setdiff(names(obj@concentrations), removedAnas), with = FALSE]
    }
    
    # remove deleted and empty groups
    removedGroups <- character()
    if (isGrpSubSet)
        removedGroups <- j
    else
        removedGroups <- setdiff(gNames, unique(unlist(lapply(featureTable(obj), "[[", "group"))))
    if (length(removedGroups) > 0)
    {
        ginds <- chmatch(removedGroups, gNames)
        if (length(obj) > 0)
        {
            obj@groups <- obj@groups[, -ginds, with = FALSE]
            obj@ftindex <- obj@ftindex[, -ginds, with = FALSE]
        }
        obj@groupInfo <- obj@groupInfo[-ginds, ]
        if (hasFGroupScores(obj))
        {
            obj@groupQualities <- obj@groupQualities[group %in% names(obj@groups)]
            obj@groupScores <- obj@groupScores[group %in% names(obj@groups)]
        }
        if (nrow(obj@annotations) > 0)
            obj@annotations <- obj@annotations[group %in% names(obj@groups)]
        if (nrow(obj@ISTDs) > 0)
        {
            obj@ISTDs <- obj@ISTDs[group %in% names(obj@groups)]
            obj@ISTDAssignments <- internalStandardAssignments(obj)[names(obj@ISTDAssignments) %chin% names(obj@groups)]
        }
        if (nrow(obj@concentrations) > 0)
            obj@concentrations <- obj@concentrations[group %in% names(obj@groups)]
        if (nrow(obj@toxicities) > 0)
            obj@toxicities <- obj@toxicities[group %in% names(obj@groups)]
    }
    
    if (!isAnaSubSet)
    {
        # UNDONE: can we skip updating things based on i/j?
        
        # re-generate feat index table by matching group names
        obj <- reGenerateFTIndex(obj)
        
        # update group intensities: zero missing features
        ftind <- groupFeatIndex(obj) # update var
        gNames <- names(obj) # update var
        # NOTE: if j is a function it's assumed that all groups are affected
        affectedGrps <- if (jByIndex) intersect(j, gNames) else gNames
        obj@groups <- copy(obj@groups)
        # NOTE: assignment with by seems to be the fastest, as it allows some DT optimizations apparently...
        obj@groups[, (affectedGrps) := Map(.SD, affectedGrps, f = function(x, g) fifelse(ftind[[g]] == 0, 0, x)),
                   by = rep(1, nrow(obj@groups)), .SDcols = affectedGrps]
        if (nrow(concentrations(obj)) > 0)
        {
            obj@concentrations <- copy(obj@concentrations)
            anas <- analyses(obj) # update var
            obj@concentrations[group %chin% affectedGrps, (anas) := {
                vals <- mget(anas)
                vals[ftind[[group]] == 0] <- NA_real_
                vals
            }, by = seq_len(sum(obj@concentrations$group %chin% affectedGrps))]
        }
    }
    
    return(obj)
})

# UNDONE: make this public?
setMethod("removeEmptyAnalyses", "featureGroups", function(fGroups)
{
    if (length(fGroups) > 0)
    {
        trGT <- transpose(groupTable(fGroups))

        empty <- trGT[, sapply(.SD, sum) == 0]
        if (any(empty))
            fGroups <- delete(fGroups, empty)
    }
    return(fGroups)
})

setMethod("averageGroups", "featureGroups", function(fGroups, areas, normalized, func)
{
    gTable <- copy(groupTable(fGroups, areas, normalized))
    if (nrow(gTable) == 0)
        return()

    gNames <- names(fGroups)
    anaInfo <- analysisInfo(fGroups)

    gTable[, sgroup := anaInfo$group]

    gTable[, (gNames) := lapply(.SD, function(v) { if (any(v > 0)) func(v[v>0]) else 0 }), by = sgroup, .SDcols = gNames]
    gTable <- unique(gTable, by = "sgroup")
    gTable[, sgroup := NULL]

    return(gTable[])
})

#' @describeIn featureGroups Exports feature groups to a \file{.csv} file that is readable to Bruker ProfileAnalysis (a
#'   'bucket table'), Bruker TASQ (an analyte database) or that is suitable as input for the \verb{Targeted peak
#'   detection} functionality of \href{http://mzmine.github.io/}{MZmine}.
#' @param type The export type: \code{"brukerpa"} (Bruker ProfileAnalysis), \code{"brukertasq"} (Bruker TASQ) or
#'   \code{"mzmine"} (MZmine).
#' @param out The destination file for the exported data.
#' @export
setMethod("export", "featureGroups", function(obj, type, out)
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertChoice(type, c("brukerpa", "brukertasq", "mzmine"), add = ac)
    checkmate::assertPathForOutput(out, overwrite = TRUE, add = ac) # NOTE: assert doesn't work on Windows...
    checkmate::reportAssertions(ac)

    if (length(obj) == 0)
        stop("Cannot export empty feature groups object")

    if (type == "brukerpa")
    {
        # UNDONE: do we need this?
        #files <- sapply(bucketInfo$fInfo$analysis, function(f) file.path(bucketInfo$dataPath, paste0(f, ".d")), USE.NAMES = F)
        files <- obj@analysisInfo$analysis

        # col.names: if NA an empty initial column is added
        write.table(obj@groups, out, na = "", sep = "\t", quote = FALSE, row.names = files, col.names = NA)
    }
    else if (type == "brukertasq")
    {
        hdr <- c("name", "formula", "m/z", "rt", "rn", "CAS", "Qual1", "Qual2", "Qual3", "Qual4",
                 "Qual5", "Qual6", "quantifier ion 1 low", "quantifier ion 1 up",
                 "quantifier ion 1 formula", "quantifier ion 2 low", "quantifier ion 2 up",
                 "quantifier ion 2 formula", "quantifier ion 3 low", "quantifier ion 3 up",
                 "quantifier ion 3 formula", "precursor ion MS2", "precursor ion MS2 formula",
                 "precursor ion MS3", "precursor ion MS3 formula", "precursor ion MS4",
                 "precursor ion MS4 formula")

        df <- data.frame(matrix(ncol=length(hdr), nrow=ncol(obj@groups)))
        colnames(df) <- hdr

        df["name"] <- colnames(obj@groups)
        df["m/z"] <- obj@groupInfo$mzs
        df["rt"] <- obj@groupInfo$rts / 60

        write.csv(df, out, row.names = FALSE, na = "")
    }
    else if (type == "mzmine")
    {
        df <- obj@groupInfo
        df$name <- rownames(df)
        df <- df[, c("mzs", "rts", "name")]
        df$rts <- df$rts / 60
        write.table(df, out, row.names = FALSE, col.names = FALSE, sep = ",", quote = FALSE)
    }
})

#' @describeIn featureGroups Obtain a summary table (a \code{\link{data.table}}) with retention, \emph{m/z}, intensity
#'   and optionally other feature data.
#' @param features If \code{TRUE} then feature specific data will be added. If \code{average=TRUE} this data will be
#'   averaged for each feature group.
#' @param qualities Adds feature (group) qualities (\code{qualities="quality"}), scores (\code{qualities="score"}) or
#'   both (\code{qualities="both"}), if this data is available (\emph{i.e.} from \code{calculatePeakQualities}). If
#'   \code{qualities=FALSE} then nothing is reported.
#' @param regression Set to \code{TRUE} to add regression data for each feature group. For this a linear model is
#'   created (intensity/area [depending on \code{areas} argument] \emph{vs} concentration). The model concentrations
#'   (e.g. of a set of standards) is derived from the \code{conc} column of the \link[=analysis-information]{analysis
#'   information}. From this model the intercept, slope and R2 is added to the output. In addition, when
#'   \code{features=TRUE}, concentrations for each feature are added. Note that no regression information is added when
#'   no \code{conc} column is present in the analysis information or when less than two concentrations are specified
#'   (\emph{i.e.} the minimum amount).
#' @param concAggrParams,toxAggrParams Parameters to aggregate calculated concentrations/toxicities (obtained with
#'   \code{\link{calculateConcs}}/\code{\link{calculateTox}}). See \link[=pred-aggr-params]{prediction aggregation
#'   parameters} for more information. Set to \code{NULL} to omit this data.
#' @param normConcToTox Set to \code{TRUE} to normalize concentrations to toxicities. Only relevant if this data is
#'   present (see \code{\link{calculateConcs}}/\code{\link{calculateTox}}).
#' @export
setMethod("as.data.table", "featureGroups", function(x, average = FALSE, areas = FALSE, features = FALSE,
                                                     qualities = FALSE, regression = FALSE, averageFunc = mean,
                                                     normalized = FALSE, FCParams = NULL,
                                                     concAggrParams = getDefPredAggrParams(),
                                                     toxAggrParams = getDefPredAggrParams(), normConcToTox = FALSE)
{
    return(doFGAsDataTable(x, average, areas, features, qualities, regression, averageFunc, normalized, FCParams,
                           concAggrParams, toxAggrParams, normConcToTox))
})

#' @describeIn featureGroups Obtain a subset with unique feature groups
#'   present in one or more specified replicate group(s).
#' @param relativeTo A character vector with replicate groups that should be
#'   used for unique comparison. If \code{NULL} then all replicate groups are
#'   used for comparison. Replicate groups specified in \code{which} are
#'   ignored.
#' @param outer If \code{TRUE} then only feature groups are kept which do not
#'   overlap between the specified replicate groups for the \code{which}
#'   parameter.
#' @export
setMethod("unique", "featureGroups", function(x, which, relativeTo = NULL, outer = FALSE)
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertCharacter(which, min.len = 1, min.chars = 1, any.missing = FALSE, add = ac)
    checkmate::assertSubset(which, replicateGroups(x), empty.ok = FALSE, add = ac)
    checkmate::assertCharacter(relativeTo, min.len = 1, min.chars = 1, any.missing = FALSE,
                               null.ok = TRUE, add = ac)
    checkmate::assertFlag(outer, add = ac)
    checkmate::reportAssertions(ac)

    if (is.null(relativeTo))
        relativeTo <- replicateGroups(x)
    else
    {
        relativeTo <- union(which, relativeTo)
        x <- replicateGroupFilter(x, relativeTo, verbose = FALSE)
    }

    anaInfo <- analysisInfo(x)
    rGroups <- unique(anaInfo$group)

    if (all(rGroups %in% which) && !outer)
        return(x) # nothing to do...

    # Split by selected and other replicate groups
    selFGroups <- replicateGroupFilter(x, which, verbose = FALSE)
    
    otherFGNames <- setdiff(relativeTo, which)
    # NOTE: check length here to avoid ending up with an empty 'otherFGroups'
    # below, which yields warnings with XCMS.
    if (length(otherFGNames) > 0)
    {
        # pick out all feature groups NOT present in others
        otherFGroups <- replicateGroupFilter(x, otherFGNames, verbose = FALSE)
        ret <- selFGroups[, setdiff(names(selFGroups), names(otherFGroups))]
    }
    else
        ret <- selFGroups
    
    # remove all that is in at least 2 replicate groups
    if (outer && length(which) > 1)
        ret <- minReplicatesFilter(ret, absThreshold = 2, negate = TRUE, verbose = FALSE)

    return(ret)
})

#' @describeIn featureGroups Obtain a subset with feature groups that overlap
#'   between a set of specified replicate group(s).
#' @param exclusive If \code{TRUE} then all feature groups are removed that are
#'   not unique to the given replicate groups.
#' @aliases overlap
#' @export
setMethod("overlap", "featureGroups", function(fGroups, which, exclusive)
{
    rGroups <- replicateGroups(fGroups)
    
    ac <- checkmate::makeAssertCollection()
    checkmate::assert(checkmate::checkSubset(which, rGroups, empty.ok = FALSE),
                      checkmate::checkList(which, "character", any.missing = FALSE),
                      .var.name = "which", add = ac)
    checkmate::assertFlag(exclusive, add = ac)
    checkmate::reportAssertions(ac)

    if (length(which) < 2 || length(fGroups) == 0)
        return(fGroups) # nothing to do...

    fGroupsList <- lapply(which, replicateGroupFilter, fGroups = fGroups, verbose = FALSE)
    ov <- Reduce(intersect, lapply(fGroupsList, names))
    ret <- fGroups[, ov]

    if (exclusive)
        ret <- unique(ret, which = unlist(which))
    
    return(ret)
})

#' @describeIn featureGroups Calculates peak and group qualities for all features and feature groups. The peak qualities
#'   (and scores) are calculated with the \link[=calculatePeakQualities,features-method]{features method of this
#'   function}, and subsequently averaged per feature group. Then, \pkg{MetaClean} is used to calculate the
#'   \verb{Elution Shift} and \verb{Retention Time Consistency} group quality metrics (see the \pkg{MetaClean}
#'   publication cited below for more details). Similarly to the \code{\link{features}} method, these metrics are scored
#'   by normalizing qualities among all groups and scaling them from \samp{0} (worst) to \samp{1} (best). The
#'   \verb{totalScore} for each group is then calculated as the weighted sum from all feature (group) scores. The
#'   \code{\link{getMCTrainData}} and \code{\link{predictCheckFeaturesSession}} functions can be used to train and apply
#'   Pass/Fail ML models from \pkg{MetaClean}.
#'
#' @inheritParams calculatePeakQualities,features-method
#' @param avgFunc The function used to average the peak qualities and scores for each feature group.
#'
#' @template parallel-arg
#' 
#' @references \insertRef{Chetnik2020}{patRoon}
#'
#' @return \code{calculatePeakQualities} returns a modified object amended with peak qualities and scores.
#'
#' @export
setMethod("calculatePeakQualities", "featureGroups", function(obj, weights, flatnessFactor, avgFunc = mean,
                                                              parallel = TRUE)
{
    checkPackage("MetaClean")
    
    allScores <- featureQualityNames(scores = TRUE, totScore = FALSE)
    
    ac <- checkmate::makeAssertCollection()
    checkmate::assertNumeric(weights, finite = TRUE, any.missing = FALSE, min.len = 1, names = "unique",
                             null.ok = TRUE, add = ac)
    if (!is.null(weights))
        checkmate::assertNames(names(weights), subset.of = allScores, add = ac)
    checkmate::assertNumber(flatnessFactor, add = ac)
    checkmate::assertFunction(avgFunc, add = ac)
    checkmate::assertFlag(parallel, add = ac)
    checkmate::reportAssertions(ac)
    
    hash <- makeHash(obj, weights, flatnessFactor, avgFunc)
    cd <- loadCacheData("calculatePeakQualities", hash)
    if (!is.null(cd))
        return(cd)
    
    if (length(obj) == 0)
        return(obj)
    
    if (!is.null(weights))
    {
        weights[setdiff(allScores, names(weights))] <- 1
        weights <- weights[allScores]
    }
    
    fs <- featureQualityNames(group = FALSE, scores = TRUE, totScore = FALSE)
    w <- if (!is.null(weights) && any(names(weights) %in% fs)) weights[names(weights) %in% fs] else NULL
    obj@features <- calculatePeakQualities(getFeatures(obj), weights = w, flatnessFactor = flatnessFactor,
                                           parallel = parallel)
    
    ftind <- groupFeatIndex(obj)
    anas <- analyses(obj)
    gNames <- names(obj)
    gCount <- length(obj)
    EICs <- getEICsForFGroups(obj, EICParams = getDefEICParams(rtWindow = 0))
    fgQualities <- featureGroupQualities()
    
    printf("Calculating group peak qualities and scores...\n")
    prog <- openProgBar(0, gCount)
    
    groupQualitiesScores <- rbindlist(sapply(gNames, function(grp)
    {
        featInds <- ftind[[grp]]
        doAna <- anas[featInds != 0]
        featInds <- featInds[featInds != 0]
        fList <- rbindlist(Map(doAna, featInds, f = function(ana, row) obj@features[[ana]][row]))
        featAvgs <- sapply(c(featureQualityNames(group = FALSE),
                             featureQualityNames(group = FALSE, scores = TRUE, totScore = FALSE)), function(q)
        {
            if (all(is.na(fList[[q]])))
                return(NA_real_)
            return(avgFunc(fList[[q]], na.rm = TRUE))
        }, simplify = FALSE)
        
        pdata <- lapply(seq_len(nrow(fList)), function(fti) list(rtmin = fList$retmin[fti],
                                                                 rtmax = fList$retmax[fti]))
        # NOTE: MetaClean expects EIC matrices
        eic <- lapply(doAna, function(a) as.matrix(EICs[[a]][[grp]]))
        gq <- sapply(lapply(fgQualities, "[[", "func"), do.call, list(pdata, eic), simplify = FALSE)
        
        setTxtProgressBar(prog, match(grp, gNames))
        
        return(c(featAvgs, gq))
    }, simplify = FALSE), idcol = "group")
    
    groupQualitiesScores[, (featureQualityNames(feat = FALSE, scores = TRUE, totScore = FALSE)) :=
                             Map(scoreFeatQuality, fgQualities, .SD),
                         .SDcols = featureQualityNames(feat = FALSE)]
    
    obj@groupQualities <- groupQualitiesScores[, c("group", featureQualityNames()), with = FALSE]
    obj@groupScores <- groupQualitiesScores[, c("group", allScores), with = FALSE]
    
    if (is.null(weights))
        obj@groupScores[, totalScore := rowSums(.SD, na.rm = TRUE), .SDcols = allScores][]
    else
    {
        wsc <- obj@groupScores[, allScores, with = FALSE]
        wsc[, (names(wsc)) := Map("*", .SD, weights)]
        set(obj@groupScores, j = "totalScore", value = rowSums(wsc, na.rm = TRUE))
    }
    
    setTxtProgressBar(prog, gCount)
    
    saveCacheData("calculatePeakQualities", obj, hash)
    
    return(obj)
})

#' @describeIn featureGroups uses \link[=generateComponents]{componentization} results to select feature groups with
#'   preferred adduct ion and/or isotope annotation. Typically, this means that only feature groups are kept if they are
#'   (de-)protonated adducts and are monoisotopic. The adduct annotation assignments for the selected feature groups are
#'   copied from the components to the \code{annotations} slot. If the adduct for a feature group is unknown, its
#'   annotation is defaulted to the 'preferred' adduct, and hence, the feature group will never be removed. Furthermore,
#'   if a component does not contain an annotation with the preferred adduct, the most intense feature group is selected
#'   instead. Similarly, if no isotope annotation is available, the feature group is assumed to be monoisotopic and thus
#'   not removed. An important advantage of \code{selectIons} is that it may considerably simplify your dataset.
#'   Furthermore, the adduct assignments allow formula/compound annotation steps later in the workflow to improve their
#'   annotation accuracy. On the other hand, it is important the componentization results are reliable. Hence, it is
#'   highly recommended that, prior to calling \code{selectIons}, the settings to \code{\link{generateComponents}} are
#'   optimized and its results are reviewed with \code{\link{checkComponents}}. Finally, the \code{adducts<-} method can
#'   be used to manually correct adduct assignments afterwards if necessary.
#'
#' @param components The \code{components} object that was generated for the given \code{featureGroups} object.
#'   Obviously, the components must be created with algorithms that support adduct/isotope annotations, such as those
#'   from \pkg{RAMClustR} and \pkg{cliqueMS}.
#' @param prefAdduct The 'preferred adduct' (see method description). This is often \code{"[M+H]+"} or \code{"[M-H]-"}.
#' @param onlyMonoIso Set to \code{TRUE} to only keep feature groups that were annotated as monoisotopic. Feature groups
#'   are never removed by this setting if no isotope annotations are available.
#' @param chargeMismatch Specifies how to deal with a mismatch in charge between adduct and isotope annotations. Valid
#'   values are: \code{"adduct"} (ignore isotope annotation), \code{"isotope"} (ignore adduct annotation), \code{"none"}
#'   (ignore both annotations) and \code{"ignore"} (don't check for charge mismatches). \emph{Important}: when
#'   \command{OpenMS} is used to find features, it already removes any detected non-monoisotopic features by default.
#'   Hence, in such case setting \code{chargeMismatch="adduct"} is more appropriate.
#'
#' @return \code{selectIons} returns a \code{featureGroups} object with only the selected feature groups and amended
#'   with adduct annotations.
#'
#' @aliases selectIons
#' @export
setMethod("selectIons", "featureGroups", function(fGroups, components, prefAdduct, onlyMonoIso = TRUE,
                                                  chargeMismatch = "adduct")
{
    # UNDONE is intensity a proper measure? ie does it allow comparison if
    # isotopes/adducts are taken from different analyses?
    # UNDONE: add logging to see what happens
    
    ac <- checkmate::makeAssertCollection()
    checkmate::assertClass(components, "components", add = ac)
    checkmate::assertFlag(onlyMonoIso, add = ac)
    checkmate::assertChoice(chargeMismatch, c("isotope", "adduct", "none", "ignore"))
    checkmate::reportAssertions(ac)
    
    prefAdduct <- as.character(checkAndToAdduct(prefAdduct))
    
    if (is.null(componentInfo(components)[["neutral_mass"]]))
    {
        cat("No adduct/isotope information available in given components!\n")
        return(fGroups)
    }
    if (length(components) == 0)
    {
        cat("Components are empty, skipping...\n")
        return(fGroups)
    }
    
    cTab <- as.data.table(components)
    cTab <- cTab[group %in% names(fGroups)]
    hasIsos <- !is.null(cTab[["isonr"]]) & !all(is.na(cTab$isonr))
    if (hasIsos)
        cTab <- cTab[!is.na(isonr) | !is.na(adduct_ion)]
    else
        cTab <- cTab[!is.na(adduct_ion)]
    
    cTab[, remove := FALSE]
    
    if (hasIsos && chargeMismatch != "ignore")
    {
        cTab[!is.na(charge) & !is.na(adduct_ion),
             chMismatch := charge != sapply(lapply(adduct_ion, as.adduct), slot, "charge")]
        if (chargeMismatch == "isotope")
            cTab[chMismatch == TRUE, adduct_ion := NA_character_]
        else if (chargeMismatch == "adduct")
            cTab[chMismatch == TRUE, isonr := NA_integer_]
        else # "none"
            cTab[chMismatch == TRUE, remove := TRUE]
    }
    
    if (onlyMonoIso)
    {
        if (!hasIsos)
            cat("No isotope annotations available!\n")
        else
            cTab[remove == FALSE & !is.na(isonr), remove := isonr != 0]
    }
    
    cTab[!is.na(adduct_ion) & remove == FALSE, remove := {
        if (.N == 1)
            FALSE
        # UNDONE: allow >1 pref adducts?
        else if (prefAdduct %in% adduct_ion)
            adduct_ion != prefAdduct
        else # fall back to most intense
            !numEQ(intensity, max(intensity))
    }, by = "name"]
    
    # remove unwanted isotopes/adducts
    fGroups <- fGroups[, setdiff(names(fGroups), cTab[remove == TRUE]$group)]
    
    # annotate remaining
    
    cTabAdd <- cTab[!is.na(adduct_ion) & !remove]

    if (nrow(cTabAdd) == 0)
    {
        fGroups@annotations <- data.table()
        cat("No adduct annotations found!\n")
    }
    else
    {
        fGroups@annotations <- data.table(group = names(fGroups))
        
        if (nrow(cTabAdd) > 0)
        {
            setnames(cTabAdd, "adduct_ion", "adduct")
            fGroups@annotations <- merge(fGroups@annotations, cTabAdd[, c("group", "adduct"), with = FALSE],
                                         by = "group", all.x = TRUE)
            fGroups@annotations[is.na(adduct), adduct := prefAdduct]
        }
        else
            fGroups@annotations[, adduct := prefAdduct]

        fGroups@annotations[, neutralMass := calculateMasses(groupInfo(fGroups)[group, "mzs"],
                                                             lapply(adduct, as.adduct),
                                                             type = "neutral")]
                
        # retain correct order
        fGroups@annotations <- fGroups@annotations[match(names(fGroups), group)]
        
        printf("Removed %d feature groups detected as unwanted adducts/isotopes\n", sum(cTab$remove))
        printf("Annotated %d feature groups with adducts\n", nrow(cTabAdd))
        printf("\tRemaining %d feature groups set as default adduct %s\n", length(fGroups) - nrow(cTabAdd), prefAdduct)
    }
    
    return(fGroups)
})

#' @describeIn featureGroups Provides various methods to normalizes feature intensities for each sample analysis or of
#'   all features within a feature group. See the \verb{Feature intensity normalization} section below.
#'
#' @param featNorm The method applied for feature normalization: \code{"istd"}, \code{"tic"}, \code{"conc"} or
#'   \code{"none"}. See the \verb{Feature intensity normalization} section for details.
#' @param groupNorm If \code{TRUE} then group normalization is performed. See the \verb{Feature intensity normalization}
#'   section for details.
#' @param normFunc A \code{function} to combine data for normalization. See the \verb{Feature intensity normalization}
#'   section for details.
#' @param standards A \code{data.table} (or \code{data.frame}) with all internal standards. Should follow the format of
#'   a \link[=suspect-screening]{suspect list}. Only used if \code{featNorm="istd"}. See the \verb{Feature intensity
#'   normalization} section for details.
#'
#'   \setsWF Can also be a \code{list} with internal standard lists.
#'
#'   See the \code{suspects} argument to \code{\link{screenSuspects}} for more details.
#' @param ISTDRTWindow,ISTDMZWindow The retention time and \emph{m/z} windows for IS selection. Only used if
#'   \code{featNorm="istd"}. See the \verb{Feature intensity normalization} section for details.
#' @param minISTDs The minimum number of IS that should be assigned to each feature (if possible). Only used if
#'   \code{featNorm="istd"}. See the \verb{Feature intensity normalization} section for details.
#'
#' @section Feature intensity normalization: The \code{normInts} method performs normalization of feature intensities
#'   (and areas). These values are amended in the \code{features} slot, while the original intensities/areas are kept.
#'   To use the normalized intensities set \code{normalized=TRUE} to methods such as \code{\link{plotInt}},
#'   \code{\link{generateComponentsIntClust}} and \code{as.data.table}. Please see the \code{normalized} argument
#'   documentation for these methods for more details.
#'
#'   The \code{normInts} method supports several methods to normalize intensities/areas of features within the same
#'   analysis. Most methods are influenced by the \emph{normalization concentration} (\code{norm_conc} in the
#'   \link[=analysis-information]{analysis information}) set for each sample analysis. For \code{NA} or zero values the
#'   output will be zero. If the \code{norm_conc} is completely absent from the analysis information, the normalization
#'   concentration is defaulted to one.
#'
#'   The different normalization methods are:
#'
#'   \enumerate{
#'
#'   \item \code{featNorm="istd"} Uses \emph{internal standards} (IS) for normalization. The IS are screened internally
#'   by the \code{\link{screenSuspects}} function. Hence, the IS specified by the \code{standards} argument should
#'   follow the format of a \link[=suspect-screening]{suspect list}. Note that labelled elements in IS formulae should
#'   be specified with the \CRANpkg{rcdk} format, \emph{e.g.} \code{"[13]C"} for 13C, \code{"[2]H"} for a deuterium etc.
#'   Example IS lists are provided with the \pkg{patRoonData} package.
#'
#'   The assignment of IS to features is automatically performed, using the following criteria: \enumerate{
#'
#'   \item Only analyses are considered with a defined normalization concentration.
#'
#'   \item The IS must be detected in all of the analyses in which the feature was detected.
#'
#'   \item The retention time and \emph{m/z} are reasonably close (\code{ISTDRTWindow}/\code{ISTDMZWindow} arguments).
#'   However, additional IS candidates outside these windows will be chosen if the number of candidates is less than the
#'   \code{minISTDs} argument. In this case the next close(st) candidate(s) will be chosen.
#'
#'   }
#'
#'   Normalization of features within the same feature group always occur with the same IS. If multiple IS are assigned
#'   to a feature then normalization occurs with the combined intensity (area), which is calculated with the function
#'   defined by the \code{normFunc} argument. The (combined) IS intensity is then normalized by the normalization
#'   concentration, and finally used for feature normalization.
#'
#'   \item \code{featNorm="tic"} Uses the Total Ion Current (TIC) to normalize intensities. The TIC is calculated by
#'   combining all intensities with the function defined by the \code{normFunc} argument. For this reason, you may need
#'   to take care to perform normalization before \emph{e.g.} suspect screening or other prioritization techniques. The
#'   TIC normalized intensities are finally divided by the normalization concentration.
#'
#'   \item \code{featNorm="conc"} Simply divides all intensities (areas) with the normalization concentration defined
#'   for the sample.
#'
#'   \item \code{featNorm="none"} Performs no normalization. The raw intensity values are simply copied. This is mainly
#'   useful if you only want to do group normalization (described below).
#'
#'   }
#'
#'   The meaning of the normalization concentration differs for each method: for \code{"istd"} it resembles the IS
#'   concentration of a sample analysis, whereas for \code{"tic"} and \code{"conc"} it is used to normalize different
#'   sample amounts (\emph{e.g.} injection volume).
#'
#'   If \code{groupNorm=TRUE} then feature intensities (areas) will be normalized by the combined values for its feature
#'   group (again, combination occurs with \code{normFunc}). This \emph{group normalization} always occurs \emph{after}
#'   aforementioned normalization methods. Group normalization was the only method with \pkg{patRoon} \samp{<2.1}, and
#'   still occurs automatically if \code{normInts} was not called when a method is executed that requests normalized
#'   data.
#'
#' @return \code{normInts} returns a \code{featureGroups} object, amended with data in the \code{ISTDs} and
#'   \code{ISTDAssignments} slots if \code{featNorm="istd"}.
#'
#' @aliases normInts
#' @export
setMethod("normInts", "featureGroups", function(fGroups, featNorm, groupNorm, normFunc, standards, ISTDRTWindow,
                                                ISTDMZWindow, minISTDs, ...)
{
    # NOTE: keep args in sync with sets method
    
    # UNDONE: default for minISTDs OK? (or no default for minISTDs and ISTDRTWindow?)
    # UNDONE: ISTD: doc that sorting occurs on both RT and m/z deviation
    # UNDONE: add adduct argument here, to make it more clear that it needs to be specified if featNorm=="istd"?
    
    ac <- checkmate::makeAssertCollection()
    checkmate::assertSubset(featNorm, c("tic", "istd", "conc", "none"))
    checkmate::assertFlag(groupNorm, add = ac)
    checkmate::assertFunction(normFunc, add = ac)
    checkmate::assertDataFrame(standards, null.ok = featNorm != "istd", add = ac) # more asserts in screenSuspects()
    aapply(checkmate::assertNumber, . ~ ISTDRTWindow + ISTDMZWindow, lower = 0, finite = TRUE, fixed = list(add = ac))
    checkmate::assertCount(minISTDs, positive = TRUE, add = ac)
    checkmate::reportAssertions(ac)
    
    if (length(fGroups) == 0)
        return(fGroups)
    
    anaInfo <- analysisInfo(fGroups)
    
    normConcs <- anaInfo[["norm_conc"]]
    if (is.null(normConcs))
    {
        printf("NOTE: No normalization concentrations defined (norm_conc column is absent in analysis information). Defaulting to 1.\n")
        normConcs <- rep(1, nrow(anaInfo))
    }
    
    # reset
    fGroups@ISTDs <- data.table()
    fGroups@ISTDAssignments <- list()
    
    if (featNorm == "istd")
    {
        # HACK: what we should do here for screening is exactly the same as screenSuspects(). So simply call that and use
        # its output...
        fGroupsScr <- screenSuspects(fGroups, suspects = standards, ...)
        fGroups@ISTDs <- screenInfo(fGroupsScr)
        origN <- uniqueN(fGroups@ISTDs$name)
        
        # only keep hits that are present in the analyses with non-NA conc
        fGroupsWithISTD <- fGroups[, fGroups@ISTDs$group]
        fGroupsWithISTD <- fGroupsWithISTD[!is.na(normConcs) & normConcs != 0]
        fGroupsWithISTD <- minAnalysesFilter(fGroupsWithISTD, relThreshold = 1, verbose = FALSE)
        
        fGroups@ISTDs <- fGroups@ISTDs[group %in% names(fGroupsWithISTD)]
        
        printf("Removed %d non-ubiquitous internal standards\n", origN - uniqueN(fGroups@ISTDs$name))
        
        gInfo <- groupInfo(fGroups)
        gInfoISTDs <- gInfo[unique(fGroups@ISTDs$group), ]
        fGroups@ISTDAssignments <- setNames(Map(gInfo$rts, gInfo$mzs, f = function(rt, mz)
        {
            # UNDONE: with configurable N, handle duplicate IS assignments differently? (ie select on suspect RT instead of group RT)
            
            # sort by closest eluting ISTDs with closest m/z
            gi <- gInfoISTDs[order(abs(gInfoISTDs$rts - rt), abs(gInfoISTDs$mzs - mz)), ]
            giInRange <- gi[numLTE(abs(gi$rts - rt), ISTDRTWindow) & numLTE(abs(gi$mzs - mz), ISTDMZWindow), ]
            if (nrow(giInRange) >= minISTDs)
                return(rownames(giInRange)) # only take those in range
            if (nrow(gi) > minISTDs)
                gi <- gi[seq_len(minISTDs), ] # just take minimum of ISTDs, even if some are out of range
            return(rownames(gi))
        }), names(fGroups))
        fGroups@ISTDAssignments <- fGroups@ISTDAssignments[lengths(fGroups@ISTDAssignments) > 0]
        fGroups@ISTDAssignments <- fGroups@ISTDAssignments[!names(fGroups@ISTDAssignments) %in% fGroups@ISTDs$group]
        
        fGroups@features@features <- Map(featureTable(fGroups), normConcs, f = function(ft, nconc)
        {
            ft <- copy(ft)
            
            if (is.na(nconc) || nconc == 0)
                ft[, c("intensity_rel", "area_rel") := 0]
            else
            {
                ft[group %in% names(fGroups@ISTDAssignments), intensity_rel := mapply(intensity, group, FUN = function(int, grp)
                {
                    iint <- normFunc(ft[group %in% fGroups@ISTDAssignments[[grp]]]$intensity)
                    return(int / (iint / nconc))
                })]
                ft[group %in% names(fGroups@ISTDAssignments), area_rel := mapply(area, group, FUN = function(ar, grp)
                {
                    iar <- normFunc(ft[group %in% fGroups@ISTDAssignments[[grp]]]$area)
                    return(ar / (iar / nconc))
                })]
                # HACK: don't want NA values
                ft[is.na(intensity_rel), c("intensity_rel", "area_rel") := 0]
            }
            return(ft)
        })
    }
    else if (featNorm == "tic")
    {
        fGroups@features@features <- Map(featureTable(fGroups), normConcs, f = function(ft, nconc)
        {
            ft <- copy(ft)
            nint <- normFunc(ft$intensity) * nconc
            narea <- normFunc(ft$area) * nconc
            if (is.na(nconc) || nconc == 0 || nint == 0)
                ft[, c("intensity_rel", "area_rel") := 0]
            else
                ft[, c("intensity_rel", "area_rel") := .(intensity / nint, area / narea)]
            return(ft)
        })
    }
    else if (featNorm == "conc")
    {
        fGroups@features@features <- Map(featureTable(fGroups), normConcs, f = function(ft, nconc)
        {
            ft <- copy(ft)
            if (is.na(nconc) || nconc == 0)
                ft[, c("intensity_rel", "area_rel") := 0]
            else
                ft[, c("intensity_rel", "area_rel") := .(intensity / nconc, area / nconc)]
            return(ft)
        })
    }
    else # "none"
    {
        fGroups@features@features <- lapply(featureTable(fGroups), function(ft)
        {
            ft <- copy(ft)
            ft[, c("intensity_rel", "area_rel") := .(intensity, area)]
            return(ft)
        })
    }
    
    if (groupNorm)
    {
        gNames <- names(fGroups)
        
        featsPerGroup <- split(rbindlist(featureTable(fGroups)), by = "group")
        nInts <- sapply(lapply(featsPerGroup, "[[", "intensity_rel"), normFunc)
        nAreas <- sapply(lapply(featsPerGroup, "[[", "area_rel"), normFunc)
        
        fGroups@features@features <- lapply(featureTable(fGroups), function(ft)
        {
            ft <- copy(ft)
            ft[, c("intensity_rel", "area_rel") := .(intensity_rel / nInts[group],
                                                     area_rel / nAreas[group])]
            return(ft)
        })
    }
    
    return(fGroups)
})

#' @aliases calculateConcs
#' @rdname pred-quant
#' @export
setMethod("calculateConcs", "featureGroups", function(fGroups, featureAnn, areas = FALSE)
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertClass(featureAnn, "featureAnnotations", add = ac)
    checkmate::assertFlag(areas, add = ac)
    checkmate::reportAssertions(ac)
    
    if (length(fGroups) == 0)
    {
        cat("No feature groups, nothing to do...\n")
        return(fGroups)
    }
    
    featureAnn <- featureAnn[names(fGroups)]
    
    if (length(featureAnn) == 0)
    {
        cat("No (relevant) feature annotations, nothing to do...\n")
        fGroups@concentrations <- data.table()
        return(fGroups)
    }
    
    annTab <- as.data.table(featureAnn)
    if (is.null(annTab[["RF_SMILES"]]) && is.null(annTab[["RF_SIRFP"]]))
        stop("Feature annotations lack predicted response factors. Please call predictRespFactors() first!",
             call. = FALSE)
    
    concs <- data.table()
    
    if (!is.null(annTab[["RF_SMILES"]]) && any(!is.na(annTab$RF_SMILES)))
    {
        resp <- annTab[!is.na(RF_SMILES), c("group", "SMILES", "RF_SMILES"), with = FALSE]
        resp[, type := "compound"]
        setnames(resp, c("SMILES", "RF_SMILES"), c("candidate", "RF"))
        if (!is.null(annTab[["compoundName"]]))
            resp[, candidate_name := annTab$compoundName]
        concs <- calcFeatureConcs(fGroups, resp, areas)
    }
    
    if (!is.null(annTab[["RF_SIRFP"]]) && any(!is.na(annTab$RF_SIRFP)))
    {
        resp <- annTab[!is.na(RF_SIRFP), c("group", "neutral_formula", "RF_SIRFP"), with = FALSE]
        resp[, type := "SIRIUS_FP"]
        setnames(resp, c("neutral_formula", "RF_SIRFP"), c("candidate", "RF"))
        # UNDONE: collapse compoundName? Results mostly in very long columns.
        # if (!is.null(annTab[["compoundName"]]))
        #     resp[, candidate_name := annTab$compoundName]
        resp <- unique(resp, by = c("group", "candidate"))
        concs <- rbind(concs, calcFeatureConcs(fGroups, resp, areas), fill = TRUE)
    }

    if (nrow(concs) > 0)
        concs <- finalizeFeaturePredTab(concs)
    
    fGroups@concentrations <- concs
    
    return(fGroups)
})

#' @aliases calculateTox
#' @rdname pred-tox
#' @export
setMethod("calculateTox", "featureGroups", function(fGroups, featureAnn)
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertClass(featureAnn, "featureAnnotations", add = ac)
    checkmate::reportAssertions(ac)
    
    if (length(fGroups) == 0)
    {
        cat("No feature groups, nothing to do...\n")
        return(fGroups)
    }
    
    featureAnn <- featureAnn[names(fGroups)]
    
    if (length(featureAnn) == 0)
    {
        cat("No (relevant) feature annotations, nothing to do...\n")
        fGroups@toxicities <- data.table()
        return(fGroups)
    }
    
    annTab <- as.data.table(featureAnn)
    if (is.null(annTab[["LC50_SMILES"]]) && is.null(annTab[["LC50_SIRFP"]]))
        stop("Feature annotations lack predicted LC50 values. Please call predictTox() first!", call. = FALSE)
    
    toxicities <- data.table()
    
    if (!is.null(annTab[["LC50_SMILES"]]) && any(!is.na(annTab$LC50_SMILES)))
    {
        LC50Tab <- annTab[!is.na(LC50_SMILES), c("group", "SMILES", "LC50_SMILES"), with = FALSE]
        LC50Tab[, type := "compound"]
        setnames(LC50Tab, c("SMILES", "LC50_SMILES"), c("candidate", "LC50"))
        if (!is.null(annTab[["compoundName"]]))
            LC50Tab[, candidate_name := annTab$compoundName]
        toxicities <- LC50Tab
    }
    
    if (!is.null(annTab[["LC50_SIRFP"]]) && any(!is.na(annTab$LC50_SIRFP)))
    {
        LC50Tab <- annTab[!is.na(LC50_SIRFP), c("group", "neutral_formula", "LC50_SIRFP"), with = FALSE]
        LC50Tab[, type := "SIRIUS_FP"]
        setnames(LC50Tab, c("neutral_formula", "LC50_SIRFP"), c("candidate", "LC50"))
        # UNDONE: collapse compoundName? Results mostly in very long columns.
        # if (!is.null(annTab[["compoundName"]]))
        #     LC50Tab[, candidate_name := annTab$compoundName]
        LC50Tab <- unique(LC50Tab, by = c("group", "candidate"))
        toxicities <- rbind(toxicities, LC50Tab, fill = TRUE)
    }
    
    if (nrow(toxicities) > 0)
        toxicities <- finalizeFeaturePredTab(toxicities)
    
    fGroups@toxicities <- toxicities
    
    return(fGroups)
})

#' @describeIn featureGroups Obtain the total ion chromatogram/s (TICs) of the analyses.
#' @param MSLevel Integer vector with the ms levels (i.e., 1 for MS1 and 2 for MS2) 
#' to obtain TIC traces.
#' @param retentionRange Range of retention time (in seconds) to collect TIC traces.
#' Should be a numeric vector with length of two containing the min/max values. 
#' Set to NULL to ignore.
#' @author Ricardo Cunha, \email{cunha@@iuta.de}
#' @aliases getTICs
#' @export
setMethod("getTICs", "featureGroups", function(obj, retentionRange = NULL, MSLevel = c(1, 2))
{
    getTICs(obj@features, retentionRange, MSLevel)
})

#' @describeIn featureGroups Obtain the base peak chromatogram/s (BPCs) of the analyses.
#' @author Ricardo Cunha, \email{cunha@@iuta.de}
#' @aliases getBPCs
#' @export
setMethod("getBPCs", "featureGroups", function(obj, retentionRange = NULL, MSLevel = c(1, 2))
{
    getBPCs(obj@features, retentionRange, MSLevel)
})

#' Grouping of features
#'
#' Group equal features across analyses.
#'
#' After \link[=findFeatures]{features have been found}, the next step is to align and group them across analyses. This
#' process is necessary to allow comparison of features between multiple analyses, which otherwise would be difficult
#' due to small deviations in retention and mass data. Thus, algorithms of 'feature groupers' are used to collect
#' features with similar retention and mass data. In addition, advanced retention time alignment algorithms exist to
#' enhance grouping of features even with relative large retention time deviations (\emph{e.g.} possibly observed from
#' analyses collected over a long period). Like \link{findFeatures}, various algorithms are supported which may have
#' many parameters that can be fine-tuned. This fine-tuning is likely to be necessary, since optimal settings often
#' depend on applied methodology and instrumentation.
#'
#' @templateVar func groupFeatures
#' @templateVar what group features
#' @templateVar ex1 groupFeaturesOpenMS
#' @templateVar ex2 groupFeaturesXCMS3
#' @templateVar algos openms,xcms,xcms3,kpic2
#' @templateVar algosSuffix OpenMS,XCMS,XCMS3,KPIC2,SIRIUS
#' @templateVar noParam TRUE
#' @templateVar ret featureGroups
#' @template generic-algo
#' 
#' @param algorithm A \code{character} that specifies the algorithm to be used: either \code{"openms"}, \code{"xcms"},
#'   \code{"xcms3"} or \code{"kpic2"} (\code{features method}), or \code{"sirius"} (\code{data.frame} method).
#' @param obj Either a \code{\link{features}} object to be grouped, or a \code{data.frame} with
#'   \link[=analysis-information]{analysis info} to be passed to \code{groupFeaturesSIRIUS}
#' @param \dots Further parameters passed to the selected grouping algorithm.
#' @param verbose if \code{FALSE} then no text output will be shown.
#'  
#' @return An object of a class which is derived from \code{\link{featureGroups}}.
#' 
#' @templateVar what groupFeatures
#' @templateVar cl features
#' @template main-rd-method
#' @export
setMethod("groupFeatures", "features", function(obj, algorithm, ..., verbose = TRUE)
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertChoice(algorithm, c("openms", "xcms", "xcms3", "kpic2"), add = ac)
    checkmate::assertFlag(verbose, add = ac)
    checkmate::reportAssertions(ac)
    
    f <- switch(algorithm,
                openms = groupFeaturesOpenMS,
                xcms = groupFeaturesXCMS,
                xcms3 = groupFeaturesXCMS3,
                kpic2 = groupFeaturesKPIC2)

    f(obj, ..., verbose = verbose)
})

#' @details The \code{data.frame} method for \code{groupFeatures} is a special case that currently only supports the
#'   \code{"sirius"} algorithm.
#' @rdname groupFeatures
#' @export
setMethod("groupFeatures", "data.frame", function(obj, algorithm, ..., verbose = TRUE)
{
    ac <- checkmate::makeAssertCollection()
    checkmate::assertChoice(algorithm, c("sirius"), add = ac)
    checkmate::assertFlag(verbose, add = ac)
    checkmate::reportAssertions(ac)
    
    f <- switch(algorithm,
                sirius = groupFeaturesSIRIUS)
    
    f(obj, ..., verbose = verbose)
})

#' Import feature groups from files
#'
#' Generic function to import feature groups produced by other software from files.
#'
#' @templateVar func importFeatureGroups
#' @templateVar what import feature groups from files
#' @templateVar ex1 importFeatureGroupsBrukerTASQ
#' @templateVar ex2 importFeatureGroupsBrukerPA
#' @templateVar algosSuffix BrukerPA,BrukerTASQ,EnviMass
#' @templateVar ret featureGroups
#' @templateVar noParam TRUE
#' @template generic-algo
#'
#' @param path The path that should be used for importing. See the algorithm specific functions for more details.
#' @param type Which file type should be imported: \code{"brukerpa"} (Bruker ProfileAnalysis), \code{"brukertasq"}
#'   (Bruker TASQ) or \code{"envimass"} (\pkg{enviMass}).
#' @param \dots Further arguments passed to the selected import algorithm function.
#'
#' @inherit groupFeatures return
#'
#' @seealso \code{\link{groupFeatures}} to group features. Other import functions:
#'   \code{\link{importFeatureGroupsXCMS}}, \code{\link{importFeatureGroupsXCMS3}} and
#'   \code{\link{importFeatureGroupsKPIC2}}.
#'
#' @export
importFeatureGroups <- function(path, type, ...)
{
    f <- switch(type,
                brukerpa = importFeatureGroupsBrukerPA,
                brukertasq = importFeatureGroupsBrukerTASQ,
                envimass = importFeatureGroupsEnviMass,
                stop("Invalid algorithm! Should be: brukerpa, brukertasq or envimass"))

    f(path, ...)
}
rickhelmus/patRoon documentation built on April 3, 2024, 6:56 p.m.