#' A function called from HyperGeneSetCollection_MSF
#' @description This function runs a function that takes a list of gene set collections, a named phenotype vector
#' (with names of the phenotype vector as the universe), a vector of hits (gene names only)
#' and returns the results of hypergeometric and gene set enrichment analyses for all of the gene set collections
#' (with multiple hypothesis testing corrections).
#' @param listOfGeneSetCollections a list of gene set collections (a 'gene set collection' is a list of gene sets).
#' Even if only one collection is being tested, it must be entered as an element of a 1-element list,
#' e.g. ListOfGeneSetCollections = list(YourOneGeneSetCollection).
#' Naming the elements of listOfGeneSetCollections will result in these names being associated
#' with the relevant data frames in the output (meaningful names are advised)
#' @param geneList a numeric or integer vector of phenotypes in descending or ascending order
#' with elements named by their EntrezIds (no duplicates nor NA values)
#' @param hits a character vector of the EntrezIds of hits, as determined by the user
#' @param pAdjustMethod a single character value specifying the p-value adjustment method to be used (see 'p.adjust' for details)
#' @param pValueCutoff numeric value, p value of cut of
#' @param nPermutations numeric value, number of permutations
#' @param minGeneSetSize numeric value, minimun number of genes accepted
#' @param doGSEA if TRUE do GSEA type of analysis
#' @param doGSOA if TRUE do Over representation Analysis using HyperGeom test
#'
analyzeGeneSetCollections_MSF_windows<-function (listOfGeneSetCollections, geneList, hits, pAdjustMethod = "BH",
pValueCutoff = 0.05, nPermutations = 1000, minGeneSetSize = 15,
exponent = 1, verbose = TRUE, doGSOA = TRUE, doGSEA = TRUE)
{ require(HTSanalyzeR)
if ((!doGSOA) && (!doGSEA))
stop("Please choose to perform hypergeometric tests and/or GSEA by specifying 'doGSOA' and 'doGSEA'!\n")
if (doGSOA || doGSEA) {
#paraCheck("gscs", listOfGeneSetCollections)
#paraCheck("genelist", geneList)
#paraCheck("pAdjustMethod", pAdjustMethod)
#paraCheck("pValueCutoff", pValueCutoff)
#paraCheck("minGeneSetSize", minGeneSetSize)
#paraCheck("verbose", verbose)
}
if (doGSOA) {
# paraCheck("hits", hits)
}
if (doGSEA) {
# paraCheck("nPermutations", nPermutations)
# paraCheck("exponent", exponent)
}
numGeneSetCollections <- length(listOfGeneSetCollections)
max.size <- 0
for (l in 1:numGeneSetCollections) {
gs.size <- unlist(lapply(lapply(listOfGeneSetCollections[[l]],
intersect, y = names(geneList)), length))
max.size <- max(max(gs.size), max.size)
gs.id <- which(gs.size >= minGeneSetSize)
n.gs.discarded <- length(listOfGeneSetCollections[[l]]) -
length(gs.id)
listOfGeneSetCollections[[l]] <- listOfGeneSetCollections[[l]][gs.id]
if (verbose && n.gs.discarded > 0)
cat(paste("--", n.gs.discarded, " gene sets don't have >= ",
minGeneSetSize, " overlapped genes with universe in gene",
" set collection named ", names(listOfGeneSetCollections)[l],
"!\n", sep = ""))
}
if (all(unlist(lapply(listOfGeneSetCollections, length)) == 0))
# if (all(unlist(lapply(listOfGeneSetCollections, length)) == 0))
stop(paste("No gene set has >= ", minGeneSetSize, " overlapped ",
"genes with universe!\n The largest number of overlapped ",
"genes between gene sets and universe is: ", max.size,
sep = ""))
result.names <- names(listOfGeneSetCollections)
if (doGSOA) {
#HGTresults <- list()
# cat("-Performing hypergeometric analysis ...\n")
# for (i in 1:length(listOfGeneSetCollections)) {
# if (verbose) {
# cat("--For", names(listOfGeneSetCollections)[i], "\n")
# }
# if (length(listOfGeneSetCollections[[i]]) > 0)
# HGTresults[[i]] <- multiHyperGeoTest(listOfGeneSetCollections[[i]],
# universe = names(geneList), hits = hits, minGeneSetSize = minGeneSetSize,
# pAdjustMethod = pAdjustMethod, verbose = verbose)
#
# else {
# HGTresults[[i]] <- matrix(, nrow = 0, ncol = 7)
# colnames(HGTresults[[i]]) <- c("Universe Size",
# "Gene Set Size", "Total Hits", "Expected Hits",
# "Observed Hits", "Pvalue", "Adjusted.Pvalue")
# }
# }
HGTresults<-lapply(listOfGeneSetCollections, function(L) {
#if (verbose) { cat("--For", , "\n") }
if (length(L) > 0) out<-multiHyperGeoTest(L, universe = names(geneList), hits = hits,
minGeneSetSize = minGeneSetSize, pAdjustMethod = pAdjustMethod, verbose = verbose)
else {
out <- matrix(, nrow = 0, ncol = 7)
colnames(out) <- c("Universe Size", "Gene Set Size", "Total Hits", "Expected Hits",
"Observed Hits", "Pvalue", "Adjusted.Pvalue")
}
out<-cbind(out[,1:5],OR=out[,"Observed Hits"]/out[, "Expected Hits"],out[,6:7])
return(out)
})
pvals <- NULL
sapply(1:numGeneSetCollections, function(i) {
if (nrow(HGTresults[[i]]) > 0) {
pv <- HGTresults[[i]][, "Pvalue"]
names(pv) <- rownames(HGTresults[[i]])
pvals <<- c(pvals, pv)
}
})
HGTpvals <- p.adjust(pvals, method = pAdjustMethod)
sapply(1:numGeneSetCollections, function(i) {
if (nrow(HGTresults[[i]]) > 0) {
ind <- match(rownames(HGTresults[[i]]), names(HGTpvals))
Adjusted.Pvalue <- HGTpvals[ind]
HGTresults[[i]][, "Adjusted.Pvalue"] <<- Adjusted.Pvalue
}
})
names(HGTresults) <- result.names
cat("-Hypergeometric analysis complete\n\n")
sign.hgt <- lapply(HGTresults, function(x) {
if (nrow(x) > 0) {
a <- which(x[, "Pvalue"] < pValueCutoff)
x <- x[a, , drop = FALSE]
if (length(a) > 1) {
x <- x[order(x[, "Pvalue"]), , drop = FALSE]
}
return(x)
}
})
sign.hgt.adj <- lapply(HGTresults, function(x) {
if (nrow(x) > 0) {
a <- which(x[, "Adjusted.Pvalue"] < pValueCutoff)
x <- x[a, , drop = FALSE]
if (length(a) > 1) {
x <- x[order(x[, "Adjusted.Pvalue"]), , drop = FALSE]
}
return(x)
}
})
}
else { HGTresults = NULL }
if (doGSEA) {
GSEA.results.list <- list()
cat("-Performing gene set enrichment analysis ...\n")
# test.collection <- list()
# sapply(1:length(listOfGeneSetCollections), function(i) {
# if (verbose) { cat("--For", names(listOfGeneSetCollections)[i], "\n")}
# if (length(listOfGeneSetCollections[[i]]) > 0)
# test.collection[[i]] <<- collectionGsea(listOfGeneSetCollections[[i]],
# geneList = geneList, exponent = exponent, nPermutations = nPermutations,
# minGeneSetSize = minGeneSetSize, verbose = verbose)
#
# else {test.collection[[i]] <<- list(Observed.scores = NULL, Permutation.scores = NULL)}
# })
test.collection<-lapply(listOfGeneSetCollections, function(L) {
if (verbose) { cat("--For", 'names(listOfGeneSetCollections)[i]', "\n")}
if (length(L) > 0)
collectionGsea(L, geneList = geneList, exponent = exponent, nPermutations = nPermutations,
minGeneSetSize = minGeneSetSize, verbose = verbose)
else {test.collection[[i]] <<- list(Observed.scores = NULL, Permutation.scores = NULL)}
})
obs.scores <- NULL
prm.scores <- NULL
sapply(1:numGeneSetCollections, function(i) {
obs.scores <<- c(obs.scores, test.collection[[i]]$Observed.scores)
prm.scores <<- rbind(prm.scores, test.collection[[i]]$Permutation.scores)
})
if (length(obs.scores) > 0) {
total.test.collect <- list(Observed.scores = obs.scores, Permutation.scores = prm.scores)
test.FDR.collection <- FDRcollectionGsea(permScores = total.test.collect$Permutation.scores,
dataScores = total.test.collect$Observed.scores)
test.pvalues.collection <- permutationPvalueCollectionGsea(permScores = total.test.collect$Permutation.scores,
dataScores = (total.test.collect$Observed.scores))
gsea.adjust.pval <- p.adjust(test.pvalues.collection, method = pAdjustMethod)
test.GSEA.results <- cbind(total.test.collect$Observed.scores, test.pvalues.collection,
gsea.adjust.pval, test.FDR.collection)
colnames(test.GSEA.results) <- c("Observed.score", "Pvalue", "Adjusted.Pvalue", "FDR")
sapply(1:numGeneSetCollections, function(i) {
if (!is.null(test.collection[[i]])) {
match.ind <- match(names(listOfGeneSetCollections[[i]]), rownames(test.GSEA.results))
match.ind <- match.ind[which(!is.na(match.ind))]
GSEA.res.mat <- test.GSEA.results[match.ind, , drop = FALSE]
GSEA.res.mat <- GSEA.res.mat[order(GSEA.res.mat[, "Adjusted.Pvalue"]), , drop = FALSE]
GSEA.results.list[[i]] <<- GSEA.res.mat
}
})
names(GSEA.results.list) <- result.names
}
else {
sapply(1:numGeneSetCollections, function(i) {
GSEA.results.list[[i]] <<- matrix(, nrow = 0, ncol = 4)
colnames(GSEA.results.list[[i]]) <<- c("Observed.score",
"Pvalue", "Adjusted.Pvalue", "FDR")
})
}
sign.gsea <- lapply(GSEA.results.list, function(x) {
if (nrow(x) > 0) {
a <- which(x[, "Pvalue"] < pValueCutoff)
x <- x[a, , drop = FALSE]
if (length(a) > 1) {
x <- x[order(x[, "Pvalue"]), , drop = FALSE]
}
return(x)
}
})
sign.gsea.adj <- lapply(GSEA.results.list, function(x) {
if (nrow(x) > 0) {
a <- which(x[, "Adjusted.Pvalue"] <= pValueCutoff)
x <- x[a, , drop = FALSE]
if (length(a) > 1) {
x <- x[order(x[, "Adjusted.Pvalue"]), , drop = FALSE]
}
return(x)
}
})
}
else {
GSEA.results.list = NULL
}
if (doGSOA && doGSEA) {
overlap <- list()
overlap.adj <- list()
sapply(1:numGeneSetCollections, function(i) {
a1 <- intersect(rownames(sign.gsea[[i]]), rownames(sign.hgt[[i]]))
a2 <- intersect(rownames(sign.gsea.adj[[i]]), rownames(sign.hgt.adj[[i]]))
Hypergeometric.Pvalue <- HGTresults[[i]][a1, "Pvalue",
drop = FALSE]
Hypergeometric.Adj.Pvalue <- HGTresults[[i]][a2,
"Adjusted.Pvalue", drop = FALSE]
GSEA.Pvalue <- GSEA.results.list[[i]][a1, "Pvalue",
drop = FALSE]
GSEA.Adj.Pvalue <- GSEA.results.list[[i]][a2, "Adjusted.Pvalue",
drop = FALSE]
overlap[[i]] <<- cbind(Hypergeometric.Pvalue, GSEA.Pvalue)
colnames(overlap[[i]]) <<- c("HyperGeo.Pvalue", "GSEA.Pvalue")
overlap.adj[[i]] <<- cbind(Hypergeometric.Adj.Pvalue,
GSEA.Adj.Pvalue)
colnames(overlap.adj[[i]]) <<- c("HyperGeo.Adj.Pvalue", "GSEA.Adj.Pvalue")
})
names(overlap) <- result.names
names(overlap.adj) <- result.names
}
else {
overlap = NULL
overlap.adj = NULL
}
cat("-Gene set enrichment analysis complete \n")
final.results <- list(HyperGeo.results = HGTresults, GSEA.results = GSEA.results.list,
Sig.pvals.in.both = overlap, Sig.adj.pvals.in.both = overlap.adj)
print('Finish Running GSOA/GSEA')
return(final.results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.