#' Run gene set enrichement on Gene Ontology categories
#'
#' A wrapper function to run Fisher's test for enrichement
#' on Gene Ontology (GO) categories.
#' Depends on the bioconductor \code{org.xx.eg.db} and \code{GO.db} packages.
#'
#' @param foreground_ids A list of ENTREZ ids of interest
#' (e.g. significantly differentially expressed genes).
#' @param background_ids A list of background ENTREZ ids.
#' against which enrichment will be tested (i.e., the gene universe).
#' (see \code{fetchAnnotation}).
#' @param gene_id_type Either "entrez" (default") or "ensembl".
#' @param species Species identifier (only "hs" or "mm" are supported).
#' @export
#'
#' @seealso
#' \code{\link{runFisherTests}}.
#'
#' @author Steve Sansom
#'
#' @examples
#' # TODO
#
#' \dontrun{
#' # TODO
#' }
runGO <- function(
foreground_ids,
background_ids,
gene_id_type=c("entrez","ensembl"),
species=c("hs","mm"),
...
){
require(GO.db)
species <- match.arg(species)
gene_id_type <- match.arg(gene_id_type)
foreground_ids <- getEntrez(foreground_ids, gene_id_type, species)
background_ids <- getEntrez(background_ids, gene_id_type, species)
## Get the gene sets
if(!hasArg(genesets))
{
message("getting genesets")
genesets <- getGO(species)
}
if(!hasArg(SYMBOL))
{
message("getting symbols")
SYMBOL <- getSYMBOL(species)
}
## run the fisher tests
result_table <- runFisherTests(
genesets, foreground_ids, background_ids, SYMBOL)
## retrieve additional info about the GO categories
## and add to the results table
message("annotating the results")
go_info <- AnnotationDbi::select(
GO.db, keys=result_table$geneset_id,
columns=c("TERM", "ONTOLOGY"), keytype="GOID")
## ensure congruence
rownames(go_info) <- go_info$GOID
result_table$description <- go_info[result_table$geneset_id, "TERM"]
result_table$ontology <- go_info[result_table$geneset_id, "ONTOLOGY"]
first_cols <- c("geneset_id", "description", "ontology")
other_cols <- colnames(result_table)[!colnames(result_table) %in% first_cols]
result_table <- result_table[,c(first_cols, other_cols)]
return(result_table)
}
#' Run GO analysis on a multi-sample results table.
#' @param results A multi-sample results table
#' @param species Either "mm" or "hs"
#' @param background_ids A vector of gene ids. If NULL, taken from results.
#' @param gene_id_col The name of the column containing the gene identifiers.
#' @param gene_id_type Either "entrez" (default) or "ensembl".
#' @param sample_col The column in results that indicates the sample
#' @param p_col The column containing the p-values to use
#' @param p_threshold The significance threshold.
#'
#' @export
runGO.all <- function(results=NULL,
species=c("mm","hs"),
background_ids=NULL,
gene_id_col="gene_id",
gene_id_type=c("entrez","ensembl"),
sample_col="cluster",
p_col="p_val_adj",
p_threshold=0.1)
{
begin <- TRUE
species <- match.arg(species)
gene_id_type <- match.arg(gene_id_type)
genesets <- getGO(species)
SYMBOL <- getSYMBOL(species)
for(sample in unique(results[[sample_col]]))
{
message("working on ", sample_col,": ", sample)
data <- results[results[[sample_col]]==sample,]
if(is.null(background_ids))
{
background <-data[[gene_id_col]]
} else {
background <- background_ids
}
foreground <- data[[gene_id_col]][data[[p_col]] <= p_threshold]
if(length(foreground)==0) {
message("!! Skipping ",sample_col,": ",sample,", no significant genes !!")
next
}
tmp <- runGO(foreground_ids = foreground,
background_ids = background,
gene_id_type = gene_id_type,
species = species,
genesets,
SYMBOL)
tmp[[sample_col]] <- sample
if(begin) {
res <- tmp
begin <- FALSE
} else {
res <- rbind(res,tmp)
}
}
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.