#
userListEnrichment <- function(geneR, labelR, fnIn = NULL, catNmIn = fnIn, nameOut = "enrichment.csv",
useBrainLists = FALSE, useBloodAtlases = FALSE, omitCategories = "grey",
outputCorrectedPvalues = TRUE, useStemCellLists = FALSE, outputGenes = FALSE,
minGenesInCategory = 1, useBrainRegionMarkers = FALSE, useImmunePathwayLists = FALSE,
usePalazzoloWang = FALSE) {
if (length(geneR) != length(labelR)) {
stop("geneR and labelR must have same number of elements.")
}
if (length(catNmIn) < length(fnIn)) {
catNmIn <- c(catNmIn, fnIn[(length(catNmIn) + 1):length(fnIn)])
write(
"WARNING: not enough category names. \n\t\t\t Naming remaining categories with file names.",
""
)
}
if (is.null(fnIn) & (!(useBrainLists | useBloodAtlases | useStemCellLists | useBrainRegionMarkers
| useImmunePathwayLists | usePalazzoloWang))) {
stop("Either enter user-defined lists or set one of the use_____ parameters to TRUE.")
}
glIn <- NULL
if (length(fnIn) > 0) {
for (i in 1:length(fnIn))
{
ext <- substr(fnIn[i], nchar(fnIn[i]) - 2, nchar(fnIn[i]))
if (ext == "csv") {
datIn <- read.csv(fnIn[i])
if (colnames(datIn)[2] == "Gene") {
datIn <- datIn[, 2:3]
}
else {
datIn <- datIn[, 1:2]
}
}
else {
datIn <- scan(fnIn[i], what = "character", sep = "\n")
datIn <- cbind(datIn[2:length(datIn)], datIn[1])
}
colnames(datIn) <- c("Gene", "Category")
datIn[, 2] <- paste(datIn[, 2], catNmIn[i], sep = "__")
glIn <- rbind(glIn, datIn)
}
glIn <- cbind(glIn, Type = rep("User", nrow(glIn)))
}
if (useBrainLists) {
if (!(exists("BrainLists"))) BrainLists <- NULL
data("BrainLists", envir = sys.frame(sys.nframe()))
write(
"See help file for details regarding brain list references.",
""
)
glIn <- rbind(glIn, cbind(BrainLists, Type = rep("Brain", nrow(BrainLists))))
}
if (useBloodAtlases) {
if (!(exists("BloodLists"))) BloodLists <- NULL
data("BloodLists", envir = sys.frame(sys.nframe()))
write(
"See help file for details regarding blood atlas references.",
""
)
glIn <- rbind(glIn, cbind(BloodLists, Type = rep("Blood", nrow(BloodLists))))
}
if (useStemCellLists) {
if (!(exists("SCsLists"))) SCsLists <- NULL
data("SCsLists", envir = sys.frame(sys.nframe()))
write(
"See help file for details regarding stem cell list references.",
""
)
glIn <- rbind(glIn, cbind(SCsLists, Type = rep("StemCells", nrow(SCsLists))))
}
if (useBrainRegionMarkers) {
if (!(exists("BrainRegionMarkers"))) BrainRegionMarkers <- NULL
data("BrainRegionMarkers", envir = sys.frame(sys.nframe()))
write(
"Brain region markers from http://human.brain-map.org/ -- see help file for details.",
""
)
glIn <- rbind(glIn, cbind(BrainRegionMarkers, Type = rep("HumanBrainRegions", nrow(BrainRegionMarkers))))
}
if (useImmunePathwayLists) {
if (!(exists("ImmunePathwayLists"))) ImmunePathwayLists <- NULL
data("ImmunePathwayLists", envir = sys.frame(sys.nframe()))
write(
"See help file for details regarding immune pathways.",
""
)
glIn <- rbind(glIn, cbind(ImmunePathwayLists, Type = rep("Immune", nrow(ImmunePathwayLists))))
}
if (usePalazzoloWang) {
if (!(exists("PWLists"))) PWLists <- NULL
data("PWLists", envir = sys.frame(sys.nframe()))
write(
"See help file for details regarding Palazzolo / Wang lists from CHDI.",
""
)
write(
"---- there are many of these gene sets so the function may take several minutes to run.",
""
)
glIn <- rbind(glIn, cbind(PWLists, Type = rep("PW_Lists", nrow(PWLists))))
}
# removeDups = unique(paste(as.character(glIn[, 1]), as.character(glIn[,
# 2]), as.character(glIn[, 3]), sep = "@#$%"))
# if (length(removeDups) < length(glIn[, 1]))
# glIn = t(as.matrix(as.data.frame(strsplit(removeDups, "@#$%", fixed = TRUE))))
glIn.2 <- glIn[!duplicated(as.data.frame(glIn)), ]
geneIn <- as.character(glIn.2[, 1])
labelIn <- as.character(glIn.2[, 2])
geneAll <- sort(unique(geneR))
keep <- is.element(geneIn, geneAll)
geneIn <- geneIn[keep]
labelIn <- labelIn[keep]
catsR <- sort(unique(labelR))
omitCategories <- c(omitCategories, "background")
catsR <- catsR[!is.element(catsR, omitCategories)]
catsIn <- sort(unique(labelIn))
typeIn <- glIn.2[keep, ][match(catsIn, labelIn), 3]
lenAll <- length(geneAll)
nCols.pValues <- 5
nComparisons <- length(catsR) * length(catsIn)
nIn <- length(catsIn)
nR <- length(catsR)
index <- 1
nOverlap <- rep(0, nComparisons)
pValues <- rep(1, nComparisons)
ovGenes <- vector(mode = "list", length = nComparisons)
isI <- matrix(FALSE, lenAll, nIn)
for (i in 1:nIn)
{
isI[, i] <- is.element(geneAll, geneIn[labelIn == catsIn[i]])
}
for (r in 1:length(catsR))
{
isR <- is.element(geneAll, geneR[(labelR == catsR[r])])
for (i in 1:length(catsIn))
{
isI.1 <- isI[, i]
lyn <- sum(isR & (!isI.1))
lny <- sum(isI.1 & (!isR))
lyy <- sum(isR & isI.1)
gyy <- geneAll[isR & isI.1]
lnn <- lenAll - lyy - lyn - lny
pv <- fisher.test(matrix(c(lnn, lny, lyn, lyy), 2, 2), alternative = "greater")$p.value
nOverlap[index] <- lyy
pValues[index] <- pv
ovGenes[[index]] <- gyy
index <- index + 1
}
}
results <- list(
pValues = data.frame(
InputCategories = rep(catsR, rep(nIn, nR)),
UserDefinedCategories = rep(catsIn, nR),
Type = rep(typeIn, nR),
NumOverlap = nOverlap,
Pvalues = pValues,
CorrectedPvalues = ifelse(pValues * nComparisons > 1, 1,
pValues * nComparisons
)
),
ovGenes = ovGenes
)
namesOv <- paste(results$pValues$InputCategories, "--", results$pValues$UserDefinedCategories)
names(results$ovGenes) <- namesOv
if (outputCorrectedPvalues) {
results$sigOverlaps <- results$pValues[results$pValues$CorrectedPvalues < 0.05, c(1, 2, 3, 6)]
} else {
results$sigOverlaps <- results$pValues[results$pValues$Pvalues < 0.05, c(1, 2, 3, 5)]
write(
"Note that outputted p-values are not corrected for multiple comparisons.",
""
)
}
results$sigOverlaps <- results$sigOverlaps[order(results$sigOverlaps[, 4]), ]
row.names(results$sigOverlaps) <- NULL
rSig <- results$sigOverlaps
nSig <- nrow(rSig)
if (nSig > 0) {
rCats <- paste(rSig$InputCategories, "--", rSig$UserDefinedCategories)
rNums <- rep(0, nSig)
rGenes <- rep("", nSig)
for (i in 1:nSig)
{
rGn <- results$ovGenes[[which(names(results$ovGenes) == rCats[i])]]
rNums[i] <- length(rGn)
rGenes[i] <- paste(rGn, collapse = ", ")
}
rSig$NumGenes <- rNums
rSig$CategoryGenes <- rGenes
rSig <- rSig[rSig$NumGenes >= minGenesInCategory, ]
if (!outputGenes) rSig <- rSig[, 1:4]
results$sigOverlaps <- rSig
write.csv(results$sigOverlaps, file = nameOut, row.names = FALSE)
}
write(
paste(length(namesOv), "comparisons were successfully performed."),
""
)
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.