#' Metabolite set enrichment analysis (MSEA) (using a hypergeometric test)
#'
#' A function that returns the pathway enrichment score for all perturbed metabolites in a patient's full metabolomic profile.
#' @param met.profile - A character vector of a patient's metabolomic profile, including KEGG IDs
#' and the associated z-score or p-value describing the level of the metabolite compared to controls.
#' @param type - Either "p-value" or "z-score".
#' @param gene.profile - Default set to NULL, meaning the default enrichment analysis only considers metabolites. However, if you have gene data, too, set
#' this parameter to a character vector of the gene names with found variants in the patient's record. Gene IDs must be converted to Entrez Identifiers.
#' @param paths.hsa - Which pathways do you want to test. The default is all 289 KEGG pathways.
#' @export data.getMSEA
#' @examples
#'
#' paths.hsa = list.files("/Library/Frameworks/R.framework/Versions/3.5/Resources/library/pathview/extdata", pattern=".xml")
#' paths.hsa = as.character(unlist(sapply(paths.hsa, function(i) unlist(strsplit(i, split=".xml"))[1])))
#' pathway.data = data.getMSEA(met.profile, "z-score", NULL, paths.hsa)
data.getMSEA = function(met.profile, type="zscore", gene.profile=NULL, paths.hsa) {
if (type=="zscore") {
perturbed.mets = met.profile[which(abs(met.profile) > 2)]
} else {
perturbed.mets = met.profile[which(met.profile < 0.05)]
}
nms.perturbed.mets = unique(as.character(unlist(sapply(names(perturbed.mets), function(i) strsplit(i, split=";")))))
# The size of the population of total possible metabolites to draw from
population = names(met.profile)
row = 1
pathway.data = data.frame(Pathway=character(), FDR=numeric(), Pvalue=numeric(), Hits=integer(), Size=integer(), stringsAsFactors = FALSE)
for (pathway in 1:length(paths.hsa)) {
xml.file = system.file("extdata", sprintf("%s.xml", paths.hsa[pathway]), package="pathview")
if (xml.file!="") {
node.data = node.info(xml.file)
pathway.compounds = node.data$type[which(node.data$type=="compound")]
pathCompIDs = node.data$labels[names(pathway.compounds)]
# q (sample successes), m (population successes), n (population failures), k (sample size)
sampleSuccesses = length(which(nms.perturbed.mets %in% pathCompIDs))
populationSuccesses = length(intersect(pathCompIDs, population))
N = length(population)
populationFailures=N-populationSuccesses
numDraws=length(perturbed.mets)
if (populationSuccesses>0) {
pathway.data[row, "Pathway"] = paths.hsa[pathway]
pathway.data[row, "Pvalue"] = round(phyper(q=sampleSuccesses-1, m=populationSuccesses, n=populationFailures, k=numDraws, lower.tail=FALSE), 3)
pathway.data[row, "Hits"] = sampleSuccesses
pathway.data[row, "Size"] = populationSuccesses
row = row + 1
}
} else {
print(pathway)
}
}
pathway.data[,"FDR"] = p.adjust(pathway.data[,"Pvalue"], method="fdr")
# Sort by FDR, then Pvalue
pathway.data = pathway.data[order(pathway.data[,"FDR"], pathway.data[,"Pvalue"]),]
# Then, only return rows that have Pvalue < 0.25
pathway.data = pathway.data[which(pathway.data[,"Pvalue"]<0.25),]
return(pathway.data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.