README.md
In miccec/yaGST: Competitive gene set and regulon tests.

yaGST

This is a collection of wrappers to the Wilcoxon test to run competitive gene set tests and comparison between imbalanced datasets.

library(devtools)
install_github("miccec/yaGST")

library(yaGST)
data("rankedList")
# generate a random data set of dimension 100
geneSet <- sample(head(names(rankedList), 5000), 100)
ans <- mwwGST(rankedList, geneSet, moreDetails = TRUE)
ans
plot(ans)

# generate a second gene set
geneSet <- sample(tail(names(rankedList), 5000), 100)
ans <- mwwGST(rankedList, geneSet, moreDetails = TRUE)
plot(ans)

You may need to download the collection of gene-sets, for example from the Molecular Signatures Database at the Broad Institute

h.all.v6.0.symbols.gmt

In case the output of the analysis has to be saved in an .xls file, you need to install the WriteXLS package.

load the GO collections

library(yaGST)
GO <- gmt2GO("~/pathToYourFiles/h.all.v6.0.symbols.gmt")
length(GO)

load the ranked list

data("rankedList")
head(rankedList)
tail(rankedList)

run the mwwGST

ans <- lapply(GO, function(x) mwwGST(rankedList, x, minLenGeneSet = 15, verbose = FALSE))

Now, arrange the results in a table

actualGeneSetSize <- unlist(sapply(ans, function(x) x$actualGeneSetCount))
originalGeneSetSize <- unlist(sapply(ans, function(x) x$originalGeneSetCount))
pValue <- unlist(sapply(ans, function(x) x$p.value))
qValue <- p.adjust(pValue)
nes <- unlist(sapply(ans, function(x) x$nes))
pu <- unlist(sapply(ans, function(x) x$pu))
log2.pu <- unlist(sapply(ans, function(x) x$log.pu))
result_table <- cbind(originalGeneSetSize, actualGeneSetSize,  
  round(nes, 4), round(pu, 4), round(log2.pu, 4), round(pValue, 6), round(qValue, 6))
colnames(result_table) <- c("originalGeneSetSize", "actualGeneSetSize", "NES", "pu", "log2_pu", "pValue", "qValue")
result_table <- result_table[order(result_table[, "NES"], decreasing = TRUE),]

If needed, the table can be stored in an .xls file as follows

```{r, eval = FALSE} library(WriteXLS) gst_results <- as.data.frame(result_table) WriteXLS("gst_results", ExcelFileName = "~/pathToYourFiles/aNameForTheAnalysis.xls", row.names = TRUE)


filter and plot the results
```{r}
qValue_treshold <- 0.05
NES_treshold <- 0.6
selected <- which((result_table[, "NES"] > NES_treshold) & (result_table[, "qValue"] < qValue_treshold))
result_table[selected,]
first_geneSet <- names(selected)[1]
plot(ans[[first_geneSet]], rankedList = rankedList, main = first_geneSet)

data("rankedList")
positive_gs <- sample(head(names(rankedList), 10000), 200)
negative_gs <- sample(tail(names(rankedList), 10000), 200)
ans <- mwwExtGST(rankedList, positive_gs, negative_gs, moreDetails = TRUE)
ans
plot(ans)

nr <- 100; nc <- 1000
# generate a data-matrix with nr samples, and nc features
exprData <- matrix(rpois(nc * nr, 100), nrow = nr, ncol = nc)
colnames(exprData) <- paste0("feat", 1:nc)
rownames(exprData) <- paste0("sam", 1:nr)

# increase the first 3 samples (minority set) of 10\% of the original intensity
# of the first 30 features (later the gene-set)
exprData[1, 1:30] <- exprData[1, 1:30]* runif(30, min = 1, max = 1.10)
exprData[2, 1:30] <- exprData[1, 1:30]* runif(30, min = 1, max = 1.10)
exprData[3, 1:30] <- exprData[1, 1:30]* runif(30, min = 1, max = 1.10)
samples_of_interest <- rownames(exprData)[1:3] # minority set

# running in parallel
library(doParallel)
# adjust the number of CPUs as needed
cl <- makePSOCKcluster(3)
clusterApply(cl, floor(runif(length(cl), max = 10000000)), set.seed)
registerDoParallel(cl)
ans_eeMWW <- eeMWW(exprData, samples_of_interest)
stopCluster(cl)

# set the gene-set and run the enrichment analysis
geneSet <- colnames(exprData)[1:30]
(tmp <- mwwGST(ans_eeMWW, geneSet))
plot(tmp, rankedList = ans_eeMWW)

miccec/yaGST documentation built on May 23, 2019, 7:35 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Tweet to @rdrrHQ

GitHub issue tracker

ian@mutexlabs.com