##' This function performs Metabolite Set
##' Enrichment Analysis (MSEA) with KEGG COMPOUND IDs.
##'
##' @title identifies biologically meaningful accumulations in metabolomic data
##'
##' @param metabolite.set a list of metabolite sets
##' @param queryset a list of metabolites for which test is to be done
##' @param atype a kind of alternative test (e.g., E, enrichment; D, depletion)
##' @param midp logical. TRUE gives mid p-values
##' @return msea object
##' @export
##' @examples
##' library(MSEApdata)
##' data(mset_SMPDB_format_KEGG)
##' data(kusano)
##'
##' res <- msea(mset_SMPDB_format_KEGG, kusano)
##'
##' @author Atsushi Fukushima
##'
## metaboliteset enrichment analysis based on fisher's exact test
msea <- function(metabolite.set, queryset, atype = "E", midp = FALSE) {
if (atype == "ED")
alternative = "two.sided"
else if (atype == "D")
alternative = "less"
else alternative = "greater"
if (!is.logical(midp))
stop("'midp' must be logical")
mets.refset <- extract.uniq.metabolites(metabolite.set)
res.fisher.test <- lapply(metabolite.set, calc.fisher.test,
queryset, mets.refset, alternative, midp)
res <- formatting.results(res.fisher.test)
class(res) <- c("msea", "data.frame")
return(res)
}
## This function extracts unique metabolites in Metabolite set
extract.uniq.metabolites <- function(metabolite.set) {
res <- unique(unlist(lapply(metabolite.set, function(x) unlist(x[3]))))
return(res)
}
## This function performs Fisher's exact test.
calc.fisher.test <- function(list, queryset, mets.refset, alternative, midp) {
res <- list()
res$ID <- unlist(list[1])
res$setname <- unlist(list[2])
metabolite.set <- unlist(list[3])
mets.interest <- intersect(queryset, metabolite.set)
yy <- length(intersect(metabolite.set, queryset))
yn <- length(setdiff(metabolite.set, queryset))
ny <- length(queryset) - yy
nn <- length(mets.refset) + yy - length(queryset) - length(metabolite.set)
if (midp) {
test.res <- exact2x2::fisher.exact(rbind(c(yy, yn), c(ny, nn)),
alternative = alternative,
midp = midp)
} else {
test.res <- stats::fisher.test(rbind(c(yy, yn), c(ny, nn)),
alternative = alternative)
}
res$pvalue <- test.res$p.value
res$total <- length(metabolite.set)
res$hit <- yy
expected <- length(queryset) * (length(metabolite.set)/length(mets.refset))
res$expected <- round(expected, digits = 2)
res$overlap.percent <-
round(length(mets.interest)/length(metabolite.set) * 100, digits = 2)
res$overlap.metabolites <- paste(mets.interest, collapse = ", ")
## Correcting multiple testing problem by BH method
res$pvalue <- format(res$pvalue, scientific = TRUE, digits = 4)
return(res)
}
## This function formats the results of MSEA.
formatting.results <- function(list) {
res <- data.frame()
pathway.ID <- unlist(lapply(list, function(x) unlist(x$ID)))
Metaboliteset.name <- unlist(lapply(list, function(x) unlist(x$setname)))
Total <- unlist(lapply(list, function(x) unlist(x$total)))
Expected <- unlist(lapply(list, function(x) unlist(x$expected)))
Hit <- unlist(lapply(list, function(x) unlist(x$hit)))
p.value <- unlist(lapply(list, function(x) unlist(x$pvalue)))
Holm.p <- stats::p.adjust(unlist(lapply(list,
function(x) unlist(x$pvalue))),
method = "holm")
FDR <- stats::p.adjust(unlist(lapply(list,
function(x) unlist(x$pvalue))),
method = "fdr")
Overlap.percent <- unlist(lapply(list,
function(x) unlist(x$overlap.percent)))
Overlap.metabolites <- unlist(
lapply(list, function(x) unlist(x$overlap.metabolites)))
## URL <- paste('http://www.genome.jp/dbget-bin/www_bget?',
## KEGG.pathway.ID, sep = '')
res <- data.frame(cbind(pathway.ID, Metaboliteset.name, Total, Expected,
Hit, p.value, Holm.p, FDR, Overlap.percent,
Overlap.metabolites))
ordered.res <- res[order(as.numeric(as.character(res$p.value))), ]
return(ordered.res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.