#' GOfuncR
#' @title GOfuncR gene ontology enrichment testing
#' @description Perform Gene Ontology enrichment analysis of DMRs using \code{GOfuncR}.
#' @param sigRegions \code{GRanges} object of DMRs.
#' @param regions \code{GRanges} object of background regions.
#' @param n_randsets Number specifying the number of random sets for calculating the FWER.
#' @param upstream Numeric of how many bases to extend upstream from gene body for mapping DMRs to genes.
#' @param downstream Numeric of how many bases to extend downstream from gene body for mapping DMRs to genes.
#' @param annoDb Character specifying \code{OrgDb} annotation package for species of interest.
#' @param TxDb \code{TxDb} or \code{EnsDb} annotation package for genome of interest.
#' @param ... Additional arguments passed onto \code{GOfuncR::go_enrich()}.
#' @import GOfuncR
#' @import GenomicRanges
#' @rawNamespace import(ensembldb, except = c(select, filter))
#' @importFrom GenomicFeatures genes
#' @importFrom GenomeInfoDb keepStandardChromosomes as.data.frame seqlevelsStyle genome
#' @importFrom glue glue
#' @importFrom magrittr %>%
#' @importFrom dplyr as_tibble mutate distinct select filter
#' @importFrom tidyr unite
#' @importFrom plyranges count_overlaps
#' @importFrom purrr pluck
#' @export GOfuncR
#'
GOfuncR <- function(sigRegions = sigRegions,
regions = regions,
n_randsets = 1000,
upstream = 5000,
downstream = 1000,
annoDb = annoDb,
TxDb = TxDb,
...){
print(glue::glue("Selecting annotation databases..."))
if(is(TxDb, "TxDb")){
print(glue::glue("Obtaining UCSC gene annotations..."))
gene_coords <- TxDb %>%
GenomicFeatures::genes() %>%
GenomeInfoDb::keepStandardChromosomes(pruning.mode = "coarse") %>%
DMRichR::extend(upstream = upstream, downstream = downstream) %>%
dplyr::as_tibble() %>%
dplyr::mutate(gene_id = as.integer(.$gene_id)) %>%
dplyr::mutate(gene_id = GOfuncR:::entrez_to_symbol(.$gene_id, get(annoDb))[,2]) %>%
dplyr::distinct(gene_id, .keep_all = TRUE) %>%
dplyr::select(symbol = gene_id, seqnames, start, end) %>%
as.data.frame()
print(glue::glue("{nrow(gene_coords)} unique genes will be utilized for GOfuncR..."))
}else if(is(TxDb, "EnsDb")){
print(glue::glue("Obtaining ENSEMBL gene annotations..."))
genes <- TxDb %>%
ensembldb::genes(., filter = GeneBiotypeFilter("protein_coding")) %>%
GenomeInfoDb::keepStandardChromosomes(pruning.mode = "coarse")
GenomeInfoDb::seqlevelsStyle(genes) <- "UCSC"
gene_coords <- genes %>%
DMRichR::extend(upstream = upstream, downstream = downstream) %>%
dplyr::as_tibble() %>%
dplyr::distinct(gene_name, .keep_all = TRUE) %>%
dplyr::select(symbol = gene_name, seqnames, start, end) %>%
dplyr::filter(symbol != "") %>%
as.data.frame()
}
coord <- regions %>%
plyranges::mutate(candidate = plyranges::count_overlaps(., sigRegions)) %>%
plyranges::mutate(candidate = dplyr::case_when(candidate != 0 ~ 1,
candidate == 0 ~ 0)) %>%
GenomeInfoDb::as.data.frame() %>%
dplyr::select(seqnames, start, end, candidate) %>%
tidyr::unite(c("seqnames","start"), col = "seqstart", sep = ":") %>%
tidyr::unite(c("seqstart","end"), col = "coordinate", sep = "-")
coord$candidate %>%
table()
print(glue::glue("Performing enrichment testing..."))
GOfuncResults <- GOfuncR::go_enrich(genes = coord,
test = 'hyper',
n_randsets = n_randsets,
regions = TRUE,
gene_coords = gene_coords,
circ_chrom = TRUE, # Sample from the same chromosome
gene_len = TRUE,
orgDb = annoDb, # Blocking this makes it use human mappings, which can be better for non-model organisms if their org.dbs warn of old GO mappings
#txDb = TxDb, # Not used for custom gene coords
silent = TRUE,
...)
print(glue::glue("GOfuncR mapped {sigRegions} DMRs to within \\
{upstream} Kb upstream and {downstream} Kb downstream of {foregroundGenes} genes \\
and tested them against a universe of {regions} background regions mapping to within \\
{upstream} Kb upstream and {downstream} Kb downstream of {backgroundGenes} genes \\
from a total of {geneAnnotations} possible gene annotations in {genome}",
sigRegions = length(sigRegions),
regions = length(regions),
foregroundGenes = GOfuncResults %>%
purrr::pluck("genes") %>%
dplyr::filter(score == 1) %>%
nrow(),
backgroundGenes = GOfuncResults %>%
purrr::pluck("genes") %>%
nrow(),
geneAnnotations = nrow(gene_coords),
genome = TxDb %>%
GenomeInfoDb::genome() %>%
unique()
)
)
return(GOfuncResults)
}
#' slimGO
#' @title Slim GO results
#' @description Slims top significant Gene Ontology terms from \code{enrichR}, \code{rGREAT},
#' and \code{GOfuncR} using \code{rrvgo}.
#' @param GO A dataframe or list of dataframes returned from \code{enrichR::enrichr()},
#' \code{rGREAT::getEnrichmentTables()},or \code{GOfuncR::go_enrich()}.
#' @param tool A character vector of the name of the database (enrichR, rGREAT, or GOfuncR).
#' @param annoDb Character specifying \code{OrgDb} annotation package for species of interest.
#' @param plots Logical indicating if scatter and treemap plots should be generated.
#' @param threshold Numeric indicating similarity threshold (0-1) for \code{rrvgo::reduceSimMatrix()}.
#' 0.9 is large, 0.7 is medium, 0.5 is small, and 0.4 is tiny. Default is 0.7.
#' @return A \code{tibble} of top distinct and significant GO terms from an \code{enrichR},
#' \code{rGREAT} or \code{GOfuncR} analysis.
#' @importFrom rrvgo calculateSimMatrix reduceSimMatrix scatterPlot treemapPlot
#' @importFrom magrittr %>%
#' @importFrom dplyr filter as_tibble mutate select
#' @importFrom data.table rbindlist
#' @importFrom glue glue
#' @importFrom purrr set_names map_dfr
#' @export slimGO
#'
slimGO <- function(GO = GO,
tool = c("enrichR", "rGREAT", "GOfuncR"),
annoDb = annoDb,
plots = FALSE,
threshold = 0.7){
if(tool == "enrichR"){
GO <- GO %>%
data.table::rbindlist(idcol = "Gene Ontology") %>%
dplyr::as_tibble() %>%
dplyr::filter(`Gene Ontology` %in% c("GO_Biological_Process_2018",
"GO_Cellular_Component_2018",
"GO_Molecular_Function_2018")) %>%
dplyr::mutate(Term = stringr::str_extract(.$Term, "\\(GO.*")) %>%
dplyr::mutate(Term = stringr::str_replace_all(.$Term, "[//(//)]",""), "") %>%
dplyr::mutate("Gene Ontology" = dplyr::case_when(`Gene Ontology` == "GO_Biological_Process_2018" ~ "BP",
`Gene Ontology` == "GO_Cellular_Component_2018" ~ "CC",
`Gene Ontology` == "GO_Molecular_Function_2018" ~ "MF")) %>%
dplyr::select(p = P.value, go = Term, "Gene Ontology") %>%
dplyr::filter(p < 0.05)
}else if(tool == "rGREAT"){
GO <- GO %>%
data.table::rbindlist(idcol = "Gene Ontology") %>%
dplyr::as_tibble() %>%
dplyr::mutate("Gene Ontology" = dplyr::case_when(`Gene Ontology` == "GO Biological Process" ~ "BP",
`Gene Ontology` == "GO Cellular Component" ~ "CC",
`Gene Ontology` == "GO Molecular Function" ~ "MF")) %>%
dplyr::select(p = Hyper_Raw_PValue, go = ID, "Gene Ontology") %>%
dplyr::filter(p < 0.05)
}else if(tool == "GOfuncR"){
GO <- GO$results %>%
dplyr::as_tibble() %>%
dplyr::mutate("Gene Ontology" = dplyr::case_when(ontology == "biological_process" ~ "BP",
ontology == "cellular_component" ~ "CC",
ontology == "molecular_function" ~ "MF")) %>%
dplyr::select(p = raw_p_overrep, go = node_id, "Gene Ontology") %>%
dplyr::filter(p < 0.05)
}else{
stop(glue("{tool} is not supported, please choose either enrichR, rGREAT, or GOfuncR [Case Sensitive]"))
}
print(glue::glue("Submiting results from {tool} to rrvgo..."))
.slim <- function(GO = GO,
ont = ont,
annoDb = annoDb,
plots = plots,
tool = tool,
threshold = threshold){
GO <- GO %>%
dplyr::filter(`Gene Ontology` == ont)
print(glue::glue("rrvgo is now slimming {ont} GO terms from {tool}"))
simMatrix <- rrvgo::calculateSimMatrix(GO$go,
orgdb = annoDb,
ont = ont,
method = "Rel")
reducedTerms <- rrvgo::reduceSimMatrix(simMatrix,
setNames(-log10(GO$p), GO$go),
threshold = threshold,
orgdb = annoDb)
if(plots == TRUE){
p <- rrvgo::scatterPlot(simMatrix, reducedTerms) # Doesn't plot otherwise
plot(p)
rrvgo::treemapPlot(reducedTerms)
}
print(glue::glue("There are {max(reducedTerms$cluster)} clusters in your GO {ont} terms from {tool}"))
reducedTerms %>%
dplyr::as_tibble() %>%
return()
}
slimmed <- GO %>%
dplyr::select(`Gene Ontology`) %>%
table() %>%
names() %>%
purrr::set_names() %>%
purrr::map_dfr(~.slim(GO = GO,
ont = .,
annoDb = annoDb,
tool = tool,
plots = plots,
threshold = threshold),
.id = "Gene Ontology") %>%
dplyr::inner_join(GO) %>%
dplyr::filter(term == as.character(parentTerm)) %>%
dplyr::mutate("-log10.p-value" = -log10(p)) %>%
dplyr::mutate("Gene Ontology" = dplyr::recode_factor(`Gene Ontology`,
"BP" = "Biological Process",
"CC" = "Cellular Component",
"MF" = "Molecular Function")) %>%
dplyr::arrange(dplyr::desc(`-log10.p-value`)) %>%
dplyr::select("Gene Ontology", Term = term, "-log10.p-value") %>%
return()
}
#' GOplot
#' @title Plot slimmed GO results
#' @description Plots top significant slimmed Gene Ontology terms from from \code{enrichR},
#' \code{rGREAT}, and \code{GOfuncR}.
#' @param slimmedGO A \code{tibble} from \code{DMRichR::rrvgo()} or \code{DMRichR::REVIGO()}.
#' @return A \code{ggplot} object of top significant GO and pathway terms from an \code{enrichR},
#' \code{GOfuncR}, or \code{rGREAT} analysis that can be viewed by calling it, saved with
#' \code{ggplot2::ggsave()}, or further modified by adding \code{ggplot2} syntax.
#' @import ggplot2
#' @importFrom magrittr %>%
#' @importFrom dplyr mutate select group_by slice ungroup
#' @importFrom forcats fct_rev
#' @importFrom ggsci scale_fill_d3
#' @importFrom Hmisc capitalize
#' @importFrom stringr str_trim str_trunc
#' @importFrom glue glue
#' @export GOplot
#'
GOplot <- function(slimmedGO = slimmedGO){
slimmedGO %>%
dplyr::group_by(`Gene Ontology`) %>%
dplyr::slice(1:7) %>%
dplyr::ungroup() %>%
dplyr::mutate(Term = stringr::str_trim(.$Term)) %>%
dplyr::mutate(Term = Hmisc::capitalize(.$Term)) %>%
dplyr::mutate(Term = stringr::str_wrap(.$Term, 50)) %>%
#dplyr::mutate(Term = stringr::str_trunc(.$Term, 45, side = "right")) %>%
dplyr::mutate(Term = factor(.$Term, levels = unique(.$Term[order(forcats::fct_rev(.$`Gene Ontology`), .$`-log10.p-value`)]))) %>%
ggplot2::ggplot(ggplot2::aes(x = Term,
y = `-log10.p-value`,
fill = `Gene Ontology`,
group = `Gene Ontology`)) +
ggplot2::geom_bar(stat = "identity",
position = ggplot2::position_dodge(),
color = "Black") +
ggplot2::coord_flip() +
ggplot2::scale_y_continuous(expand = c(0, 0, 0.1, 0)) +
ggsci::scale_fill_d3() +
ggplot2::labs(y = expression("-log"[10](p))) +
ggplot2::theme_classic() +
ggplot2::theme(axis.text = ggplot2::element_text(size = 14),
axis.title.y = ggplot2::element_blank(),
legend.text = ggplot2::element_text(size = 14),
legend.title = ggplot2::element_text(size = 14),
strip.text = ggplot2::element_text(size = 14)) %>%
return()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.