R/metaboliteset.enrichment.R

Defines functions msea extract.uniq.metabolites calc.fisher.test formatting.results

Documented in msea

##' This function performs Metabolite Set
##' Enrichment Analysis (MSEA) with KEGG COMPOUND IDs.
##'
##' @title identifies biologically meaningful accumulations in metabolomic data
##'
##' @param metabolite.set a list of metabolite sets
##' @param queryset a list of metabolites for which test is to be done
##' @param atype a kind of alternative test (e.g., E, enrichment; D, depletion)
##' @param midp logical. TRUE gives mid p-values
##' @return msea object
##' @export
##' @examples
##' library(MSEApdata)
##' data(mset_SMPDB_format_KEGG)
##' data(kusano)
##'
##' res <- msea(mset_SMPDB_format_KEGG, kusano)
##'
##' @author Atsushi Fukushima
##'
## metaboliteset enrichment analysis based on fisher's exact test
msea <- function(metabolite.set, queryset, atype = "E", midp = FALSE) {
    if (atype == "ED") 
        alternative = "two.sided" 
    else if (atype == "D") 
        alternative = "less" 
    else alternative = "greater"
    if (!is.logical(midp)) 
        stop("'midp' must be logical")
    mets.refset <- extract.uniq.metabolites(metabolite.set)
    res.fisher.test <- lapply(metabolite.set, calc.fisher.test, 
                            queryset, mets.refset, alternative, midp)
    res <- formatting.results(res.fisher.test)
    class(res) <- c("msea", "data.frame")
    return(res)
}

## This function extracts unique metabolites in Metabolite set
extract.uniq.metabolites <- function(metabolite.set) {
    res <- unique(unlist(lapply(metabolite.set, function(x) unlist(x[3]))))
    return(res)
}

## This function performs Fisher's exact test.
calc.fisher.test <- function(list, queryset, mets.refset, alternative, midp) {
    res <- list()
    res$ID <- unlist(list[1])
    res$setname <- unlist(list[2])
    metabolite.set <- unlist(list[3])
    mets.interest <- intersect(queryset, metabolite.set)
    
    yy <- length(intersect(metabolite.set, queryset))
    yn <- length(setdiff(metabolite.set, queryset))
    ny <- length(queryset) - yy
    nn <- length(mets.refset) + yy - length(queryset) - length(metabolite.set)
    
    if (midp) {
        test.res <- exact2x2::fisher.exact(rbind(c(yy, yn), c(ny, nn)), 
                                            alternative = alternative, 
                                            midp = midp)
    } else {
        test.res <- stats::fisher.test(rbind(c(yy, yn), c(ny, nn)), 
                                        alternative = alternative)
    }
    
    res$pvalue <- test.res$p.value
    
    res$total <- length(metabolite.set)
    res$hit <- yy
    
    expected <- length(queryset) * (length(metabolite.set)/length(mets.refset))
    res$expected <- round(expected, digits = 2)
    
    res$overlap.percent <- 
        round(length(mets.interest)/length(metabolite.set) * 100, digits = 2)
    res$overlap.metabolites <- paste(mets.interest, collapse = ", ")
    
    ## Correcting multiple testing problem by BH method
    res$pvalue <- format(res$pvalue, scientific = TRUE, digits = 4)
    
    return(res)
}

## This function formats the results of MSEA.
formatting.results <- function(list) {
    res <- data.frame()
    pathway.ID <- unlist(lapply(list, function(x) unlist(x$ID)))
    Metaboliteset.name <- unlist(lapply(list, function(x) unlist(x$setname)))
    Total <- unlist(lapply(list, function(x) unlist(x$total)))
    Expected <- unlist(lapply(list, function(x) unlist(x$expected)))
    Hit <- unlist(lapply(list, function(x) unlist(x$hit)))
    p.value <- unlist(lapply(list, function(x) unlist(x$pvalue)))
    Holm.p <- stats::p.adjust(unlist(lapply(list, 
                                            function(x) unlist(x$pvalue))), 
                            method = "holm")
    FDR <- stats::p.adjust(unlist(lapply(list, 
                                        function(x) unlist(x$pvalue))), 
                            method = "fdr")
    Overlap.percent <- unlist(lapply(list, 
                                    function(x) unlist(x$overlap.percent)))
    Overlap.metabolites <- unlist(
        lapply(list, function(x) unlist(x$overlap.metabolites)))
    ## URL <- paste('http://www.genome.jp/dbget-bin/www_bget?', 
    ## KEGG.pathway.ID, sep = '')
    res <- data.frame(cbind(pathway.ID, Metaboliteset.name, Total, Expected, 
                            Hit, p.value, Holm.p, FDR, Overlap.percent, 
                            Overlap.metabolites))
    ordered.res <- res[order(as.numeric(as.character(res$p.value))), ]
    return(ordered.res)
}
afukushima/MSEAp documentation built on Sept. 18, 2019, 7:12 p.m.