Nothing
globalVariables(c('Description', 'group', 'pvalue',"geneID"))
#' Perform GO/KEGG Enrichment Analysis for Multiple Clusters
#' @author JunZhang
#'
#' This function performs Gene Ontology (GO) or KEGG enrichment analysis, or custom gene set enrichment,
#' on clustered genes. It supports multiple clusters, incorporating cluster-specific results into its analysis.
#'
#' @param object An object containing clustering results.
#' This is clusterData object. Alternatively, it can be a `CellDataSet` object, in which case the function can also visualize pseudotime data.
#' @param type Character. The type of enrichment analysis to perform. Options include:
#' - `"BP"`: Biological Process (GO)
#' - `"MF"`: Molecular Function (GO)
#' - `"CC"`: Cellular Component (GO)
#' - `"KEGG"`: KEGG Pathway analysis
#' - `"ownSet"`: Custom gene set enrichment, requiring `TERM2GENE` and optionally `TERM2NAME`.
#' @param TERM2GENE A data frame containing mappings of terms to genes. Required when `type = "ownSet"`.
#' This must be a two-column data frame, where the first column is the term and the second column is the gene.
#' @param TERM2NAME A data frame containing term-to-name mappings. Optional when `type = "ownSet"`.
#' This must also be a two-column data frame, where the first column is the term and the second column is the name.
#' @param OrgDb An organism database object (e.g., `org.Hs.eg.db` for human or `org.Mm.eg.db` for mouse),
#' used for GO or KEGG enrichment analysis.
#' @param id.trans Logical. Whether to perform gene ID transformation. Default is `TRUE`.
#' @param fromType Character. The type of the input gene IDs (e.g., `"SYMBOL"`, `"ENSEMBL"`). Default is `"SYMBOL"`.
#' @param toType Character. The target ID type for transformation using `clusterProfiler::bitr`
#' (e.g., `"ENTREZID"`). Default is `"ENTREZID"`.
#' @param readable Logical. Whether to convert the enrichment result IDs back to a readable format (e.g., SYMBOL).
#' Only applicable for GO and KEGG analysis. Default is `TRUE`.
#' @param organism Character. The KEGG organism code (e.g., `"hsa"` for human, `"mmu"` for mouse). Required when
#' performing KEGG enrichment. Default is `"hsa"`.
#' @param pvalueCutoff Numeric. The p-value cutoff for enriched terms to be included in the results. Default is `0.05`.
#' @param topn Integer or vector. The number of top enrichment results to extract. If a single value, it is applied
#' to all clusters. Otherwise, it should match the number of clusters. Default is `5`.
#' @param seed Numeric. Seed for random operations to ensure reproducibility. Default is `5201314`.
#' @param add.gene Logical. Whether to include the list of genes associated with each enriched term in the results.
#' Default is `FALSE`.
#' @param heatmap.type Character. The type of heatmap visualization to use when input data is a `CellDataSet` object.
#' Options include:
#' - `"plot_pseudotime_heatmap2"`
#' - `"plot_genes_branched_heatmap2"`
#' - `"plot_multiple_branches_heatmap2"`
#' @param ... Additional arguments passed to plot_pseudotime_heatmap2/plot_genes_branched_heatmap2/plot_multiple_branches_heatmap2 functions.
#'
#'
#' @return a data.frame.
#' @export
#'
enrichCluster <- function(object = NULL,
type = c("BP","MF","CC","KEGG","ownSet"),
TERM2GENE = NULL,
TERM2NAME = NULL,
OrgDb = NULL,
id.trans = TRUE,
fromType = "SYMBOL",
toType = c("ENTREZID"),
readable = TRUE,
# for kegg mouse(mmu)
organism = "hsa",
pvalueCutoff = 0.05,
topn = 5,
seed = 5201314,
add.gene = FALSE,
heatmap.type = c("plot_pseudotime_heatmap2",
"plot_genes_branched_heatmap2",
"plot_multiple_branches_heatmap2"),
...){
type <- match.arg(type,c("BP","MF","CC","KEGG","ownSet"))
heatmap.type <- match.arg(heatmap.type,c("plot_pseudotime_heatmap2",
"plot_genes_branched_heatmap2",
"plot_multiple_branches_heatmap2"))
# check datatype
cls <- class(object)
# check heatmap.type
if(cls == "CellDataSet"){
if(heatmap.type %in% c("plot_pseudotime_heatmap2", "plot_genes_branched_heatmap2")){
extra_params <- list(cds_subset = object,...)
object <- do.call(plot_pseudotime_heatmap2,extra_params)
}else{
extra_params <- list(cds = object,...)
object <- do.call(plot_pseudotime_heatmap2,extra_params)
}
}
# get data
enrich.data <- object$wide.res
# check geneType
if(startsWith(object$geneType,"nounique")){
split = unlist(strsplit(object$geneType,split = "\\|"))[2]
enrich.data$gene <- sapply(strsplit(as.character(enrich.data$gene),split = split),"[",1)
}
# loop for enrich
purrr::map_df(1:length(unique(enrich.data$cluster)),function(x){
# filter
tmp <- enrich.data %>%
dplyr::filter(cluster == unique(enrich.data$cluster)[x])
# =============================================
# enrich
# entriz id transformation
id.trans <- ifelse(type == "ownSet",FALSE,TRUE)
if(id.trans == TRUE){
gene.ent <- clusterProfiler::bitr(tmp$gene,
fromType = fromType,
toType = toType,
OrgDb = OrgDb)
tartget.gene = unlist(gene.ent[,c(toType)])
}else{
tartget.gene = tmp$gene
}
# GO enrich
if(type %in% c("BP","MF","CC")){
set.seed(seed)
ego <- clusterProfiler::enrichGO(gene = tartget.gene,
keyType = toType,
OrgDb = OrgDb,
ont = type,
pAdjustMethod = "BH",
pvalueCutoff = 1,
qvalueCutoff = 1,
readable = readable)
}else if(type == "ownSet"){
set.seed(seed)
ego <- clusterProfiler::enricher(gene = tartget.gene,
TERM2GENE = TERM2GENE,
TERM2NAME = TERM2NAME,
pvalueCutoff = 1,
pAdjustMethod = "BH",
qvalueCutoff = 1)
}else{
set.seed(seed)
ego <- clusterProfiler::enrichKEGG(gene = tartget.gene,
keyType = "kegg",
organism = organism,
universe = NULL,
pvalueCutoff = 1,
pAdjustMethod = "BH",
qvalueCutoff = 1)
# transform gene id
if(readable == TRUE){
ego <- clusterProfiler::setReadable(ego,OrgDb = OrgDb,keyType = toType)
}else{
ego <- ego
}
}
# to data.frame
enrich_res <- data.frame(ego)
if(nrow(enrich_res) > 0){
df <- data.frame(ego) %>%
dplyr::filter(pvalue < pvalueCutoff) %>%
dplyr::mutate(group = paste("C",unique(enrich.data$cluster)[x],sep = '')) %>%
dplyr::arrange(pvalue)
# whether save all res
if(!is.null(topn)){
# top n selection
if(length(topn) == 1){
top = topn
}else{
top = topn[x]
}
df <- df %>%
# dplyr::select(group,Description,pvalue) %>%
dplyr::slice_head(n = top)
# add gene enrich ratio
df1 <- purrr::map_df(1:nrow(df),function(x){
tmp1 <- df[x,]
size = unlist(strsplit(as.character(tmp1$GeneRatio),split = '/'))
tmp1$ratio <- (as.numeric(size[1])/as.numeric(size[2]))*100
return(tmp1)
})
# select columns
if(add.gene == TRUE){
df2 <- df1 %>% dplyr::select(group,Description,pvalue,ratio,geneID)
}else{
df2 <- df1 %>% dplyr::select(group,Description,pvalue,ratio)
}
}else{
df2 <- df
}
# remove no pass threshold terms
df2 <- df2 %>% stats::na.omit()
# results
return(df2)
}else{
return(NULL)
}
})
}
#' This is a test data for this package
#' test data describtion
#'
#' @name net
#' @docType data
#' @author Junjun Lao
"net"
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.