R/features-envipick.R

Defines functions importEnviPickPeakList importFeaturesEnviMass findFeaturesEnviPick

Documented in findFeaturesEnviPick importFeaturesEnviMass

#' @include features.R
NULL

#' @rdname features-class
#' @export
featuresEnviPick <- setClass("featuresEnviPick", contains = "features")

setMethod("initialize", "featuresEnviPick",
          function(.Object, ...) callNextMethod(.Object, algorithm = "envipick", ...))


#' Find features using enviPick
#'
#' Uses the \code{\link[enviPick]{enviPickwrap}} function from the \pkg{enviPick} R package to extract features.
#'
#' @templateVar algo enviPick
#' @templateVar do automatically find features
#' @templateVar generic findFeatures
#' @templateVar algoParam envipick
#' @template algo_generator
#'
#' @template centroid_note_mandatory
#'
#' @inheritParams findFeatures
#' 
#' @template parallel-arg
#' 
#' @param \dots Further parameters passed to \code{\link[enviPick]{enviPickwrap}}.
#'
#' @note The analysis files must be in the \code{mzXML} format.
#'
#' @inherit findFeatures return
#'
#' @export
findFeaturesEnviPick <- function(analysisInfo, ..., parallel = TRUE, verbose = TRUE)
{
    checkPackage("enviPick", "blosloos/enviPick")
    
    ac <- checkmate::makeAssertCollection()
    analysisInfo <- assertAndPrepareAnaInfo(analysisInfo, "mzXML", verifyCentroided = TRUE)
    aapply(checkmate::assertFlag, . ~ parallel + verbose, fixed = list(add = ac))
    checkmate::reportAssertions(ac)

    anaCount <- nrow(analysisInfo)
    ret <- featuresEnviPick(analysisInfo = analysisInfo)

    if (verbose)
        printf("Finding features with enviPick for %d analyses ...\n", anaCount)

    anas <- analysisInfo$analysis
    filePaths <- mapply(getMzXMLAnalysisPath, anas, analysisInfo$path)
    baseHash <- makeHash(list(...))
    hashes <- setNames(sapply(filePaths, function(fp) makeHash(baseHash, makeFileHash(fp))), anas)
    cachedData <- lapply(hashes, loadCacheData, category = "featuresEnviPick")
    cachedData <- pruneList(setNames(cachedData, anas))
    
    doFP <- function(fp)
    {
        invisible(utils::capture.output(ep <- enviPick::enviPickwrap(fp, ...)))
        f <- importEnviPickPeakList(ep$Peaklist)
        patRoon:::doProgress()
        return(f)
    }

    anasTBD <- setdiff(anas, names(cachedData))
    if (length(anasTBD) > 0)
    {
        if (parallel)
            feats <- withProg(length(anasTBD), TRUE, future.apply::future_lapply(filePaths[anasTBD], doFP))
        else
            feats <- withProg(length(anasTBD), FALSE, lapply(filePaths[anasTBD], doFP))
        names(feats) <- anasTBD
        for (a in anasTBD)
            saveCacheData("featuresEnviPick", feats[[a]], hashes[[a]])
        
        if (length(cachedData) > 0)
            feats <- c(feats, cachedData)[anas] # merge and re-order
    }
    else
        feats <- cachedData
    
    if (verbose)
    {
        printf("Done!\n")
        printFeatStats(feats)
    }

    ret@features <- feats
    return(ret)
}

# nocov start

#' Imports features from enviMass
#'
#' Imports features from a project generated by the \pkg{enviMass} package.
#'
#' @templateVar algo enviMass
#' @templateVar generic importFeatures
#' @templateVar algoParam envimass
#' @template algo_importer
#'
#' @inheritParams importFeatures
#'
#' @param enviProjPath The path of the enviMass project.
#'
#' @note This functionality has only been tested with older versions of \pkg{enviMass}.
#' 
#' @inherit importFeatures return
#' 
#' @export
importFeaturesEnviMass <- function(analysisInfo, enviProjPath)
{
    ac <- checkmate::makeAssertCollection()
    analysisInfo <- assertAndPrepareAnaInfo(analysisInfo, "mzXML", verifyCentroided = TRUE, add = ac)
    checkmate::assertDirectoryExists(enviProjPath, "r", add = ac)
    checkmate::assertDirectoryExists(file.path(enviProjPath, "peaklist"), "r", .var.name = "enviProjPath", add = ac)
    checkmate::reportAssertions(ac)

    cat("Importing features from enviMass...\n")

    ret <- featuresEnviPick(analysisInfo = analysisInfo)

    fts <- list()
    scount <- nrow(analysisInfo)
    prog <- openProgBar(0, scount)

    for (i in seq_len(nrow(analysisInfo)))
    {
        load(file.path(enviProjPath, "peaklist", analysisInfo$analysis[i])) # load into 'peaklist'
        fts[[analysisInfo$analysis[i]]] <- importEnviPickPeakList(as.data.frame(peaklist))
        setTxtProgressBar(prog, i)
    }

    ret@features <- fts

    setTxtProgressBar(prog, scount)
    close(prog)

    cat("Done!\n")

    return(ret)
}

# nocov end

importEnviPickPeakList <- function(peaklist)
{
    # peaklist is a single number (zero) when no results
    if (length(peaklist) == 1 || nrow(peaklist) == 0)
        return(data.table(ID = character(), ret = numeric(), mz = numeric(), intensity = numeric(),
                          area = numeric(), retmin = numeric(), retmax = numeric(), mzmin = numeric(),
                          mzmax = numeric()))

    ft <- as.data.table(peaklist)

    setnames(ft, c("m/z", "max_int", "sum_int", "RT", "minRT", "maxRT", "peak_ID"),
             c("mz", "intensity", "area", "ret", "retmin", "retmax", "ID"))

    # Estimate mzrange from variance
    s <- sqrt(ft$`var_m/z`)
    ft[, mzmin := mz - 2*s]
    ft[, mzmax := mz + 2*s]

    return(ft[, c("ID", "ret", "mz", "intensity", "area", "retmin", "retmax", "mzmin", "mzmax")])
}
rickhelmus/patRoon documentation built on April 3, 2024, 6:56 p.m.