#' GO_visualization
#'
#' This function provides a dot plot chart of the GO enrichments after semantic similarity analysis and a recalculation
#' of enrichments based on new gene categories derived from the semantic similarity analysis.
#' @param cluster_enriched_df second object provided by the enrich_test _function
#' @param markers_df data frame resulting from FindAllMarkers function in the Seurat package, required if a clust_list is not provided
#' @param clust_list a list of gene vectors associated with each cluster.
#' @param numcats number of catergorical terms to plot on x-axis. Function is setup to select the the most frequent terms. Default is 15.
#' @param GOcats product of the msigdb_gsets funciton using GOBP. Can also be user supplied annotation in the same format.
#' @param org_db annotation package from Bioconductor of matching species to the species_x parameter.
#' @param chosen_cats User supplied catergorical terms to plot on x-axis. Default is null which lets the function choose the categories.
#' @param goterms a vector with GOIDS as object and the GO terms as names
#' @param species_x species names, default is "Homo sapiens", refer to ?msigdbr::msigdbr for available species
#' @import 'foreach'
#' @import 'ggplot2'
#' @import 'rrvgo'
#' @import 'LSAfun'
#' @import 'hypeR'
#' @import 'stringr'
#' @importFrom foreach %do%
#' @importFrom foreach %dopar%
#' @importFrom stats as.dist
#' @importFrom stats cutree
#' @importFrom stats hclust
#' @importFrom stringr str_squish
#' @return prints a plot and returns a list with three objects. The first object is a list of data frames generated by the semantic similarity analysis based on GO:BP enrichments, the second is a dataframe used for ggplot,
#' and the third object is a ggplot2 object that was plotted suitable for fine tuning the appearance of the plot.
#' @export
#' @examples
#' \dontrun{
#' GO_visualization(enrich_results$Enriched_df,markers_df=markers,goterms=goterms,numcats=10)
#' }
GO_visualization<-function(cluster_enriched_df,markers_df=NULL,clust_list=NULL,numcats=15,GOcats=NULL,chosen_cats=NULL,org_db="org.Hs.eg.db",goterms=NULL,species_x="Homo sapiens") {
i<-b<-NULL
if(is.null(goterms)) stop("A named GO annotation vector must be included with GO ids making up the vector and GO term names as the vector names")
if(is.null(markers_df) && is.null(clust_list)) stop("Need either a cluster gene list or a a Seurat markers data frame")
if(is.null(markers_df)) {clust_list} else {
x<-markers_df[markers_df$p_val_adj<0.1,]
clust_list<-foreach(i=1:length(levels(markers_df$cluster)),.packages="foreach")%do% {x[x$cluster==levels(x$cluster)[i],]$gene}
names(clust_list)<-levels(markers_df$cluster)
clust_list<-clust_list[lengths(clust_list)>0]
}
if(is.null(GOcats)) {GOcats <- msigdb_gsets(species=species_x, category="C5", subcategory="BP")}
cats<-GOcats$genesets
y<-substring(names(cats),6)
y<-breakdown(y)
y<-stringr::str_squish(y)
y<-goterms[y]
names(cats)<-y
Go.list.viz<-foreach(i=1:length(cluster_enriched_df),.packages="foreach") %do% {
print(i)
y<-rownames(cluster_enriched_df[[i]][[2]])
if(length(y)==0) {"No Enrichments"} else{
y<-unlist(substring(y,6))
y<-breakdown(y)
y<-stringr::str_squish(y)
y<-goterms[y]
scores<-cluster_enriched_df[[i]][[2]]$overlap
scores<-scores[!is.na(y)]
sizes<-cluster_enriched_df[[i]][[2]]$geneset
sizes<-sizes[!is.na(y)]
y<-y[!is.na(y)]
names(sizes)<-y
names(scores)<-y
if(length(y)<=1) {"No Enrichments"} else {
sm<-calculateSimMatrix(y,orgdb=org_db,ont="BP",method="Rel")
sizes<-sizes[intersect(rownames(sm),names(sizes))]
scores<-scores[intersect(rownames(sm),names(scores))]
o <- rev(order(scores, sizes, na.last = FALSE))
sm <- sm[o, o]
clustree <- cutree(hclust(as.dist(1 - sm)), h = .7)
clusterRep <- tapply(rownames(sm), clustree, function(x) x[which.max(scores[x])])
red<-data.frame(go = rownames(sm),
clustree = clustree,
parent = clusterRep[clustree],
parentSimScore = unlist(Map(seq_len(nrow(sm)),clusterRep[clustree],f = function(i, j) sm[i,j])),
score = scores[match(rownames(sm),names(scores))],
size = sizes[match(rownames(sm), names(sizes))],
term = getGoTerm(rownames(sm)),
parentTerm = getGoTerm(clusterRep[clustree]))
}
}
}
names(Go.list.viz)<-names(cluster_enriched_df)
df1<-foreach(i=1:length(Go.list.viz),.combine='rbind',.packages="foreach") %do% {
if(is.data.frame(Go.list.viz[[i]])){
x<-Go.list.viz[[i]]
x$cluster<-names(Go.list.viz)[i]
x} else {}
}
if(is.null(chosen_cats)){
x<-foreach(i=1:length(Go.list.viz),.combine='c',.packages="foreach") %do% {
suppressWarnings(if(!is.data.frame(Go.list.viz[[i]])) {}else{unique(Go.list.viz[[i]]$parentTerm)})
}
x<-sort(table(x),decreasing=T)
chosen_cats<-x[1:numcats]
}
chosen_cats<-chosen_cats[!(is.na(chosen_cats))]
x<-foreach(i=1:length(unique(df1$parentTerm)),.packages="foreach") %dopar% {
fin<-foreach(b=1:length(Go.list.viz),.combine='c',.packages="foreach") %do% {
g1<-Go.list.viz[[b]]
if(is.character(g1)) {} else{
g1<-g1[which(g1$parentTerm==unique(df1$parentTerm)[i]),]$go}
}
unique(fin)
}
cats2<-foreach(i=1:length(x),.packages="foreach") %do% {
x1<-x[[i]]
x2<-unlist(cats[x1])
unique(x2)
}
names(cats2)<-unique(df1$parentTerm)
G1<-foreach(i=1:length(clust_list),.packages=c("hypeR","foreach")) %do% {
g3<-hypeR(clust_list[[i]], cats2, background=23459, fdr=1)
Percentage<-g3$data$overlap/g3$data$geneset*100
FDR<-g3$data$fdr
data.frame(g3$data$label,Percentage,FDR)
}
names(G1)<-names(clust_list)
G2<-foreach(i=1:length(clust_list),.combine='rbind',.packages="foreach") %do% {
GO_Names<-G1[[i]]$g3.data.label
Percentage<-G1[[i]]$Percentage
Clusters<-as.character(rep(names(G1)[i],length(Percentage)))
FDR<-G1[[i]]$FDR
data.frame(GO_Names,Percentage,Clusters,FDR)
}
G2$Clusters<-factor(G2$Clusters,
levels=names(G1))
y<-names(chosen_cats)
G3<-foreach(i=1:length(y),.combine='rbind',.packages="foreach") %do% {
G2[G2$GO_Names==y[i],]
}
G3$GO_Names<-factor(G3$GO_Names,levels=y)
p<-ggplot(G3, aes(GO_Names,-log10(FDR)))
p<-p+theme_light()
p<-p+geom_jitter(height=0,width=0.3,aes_string(color="Clusters",size="Percentage"))
p<-p +theme(axis.text.x = element_text(angle=90,size=10),axis.title.x = element_blank())
p<-p+ylab("-log10(FDR)")
p<-p+geom_hline(yintercept=-log10(0.1),color="red",linetype="dashed")
p
fin<-list(df1,G2,p)
names(fin)<-c("GO_sem","GO_df","plot")
return(fin)
}
#' This function provides a dot plot chart of the GO enrichments of user defined Parent term categories from the results of GO_visualizatiohn function
#' @param GO_viz_results object provided by the Go_visulaization function
#' @param markers_df data frame resulting from FindAllMarkers function in the Seurat package, required if a clust_list is not provided
#' @param clust_list a list of gene vectors associated with each cluster.
#' @param chosen_cats User supplied catergorical terms to plot on x-axis. Default is null which lets the function choose the categories.
#' @param GOcats product of the msigdb_gsets funciton using GOBP. Can also be user supplied annotation in the same format.
#' @param goterms a vector with GOIDS as object and the GO terms as names
#' @param species_x species names, default is "Homo sapiens", refer to ?msigdbr::msigdbr for available species
#' @import 'foreach'
#' @import 'ggplot2'
#' @import 'hypeR'
#' @importFrom foreach %do%
#' @importFrom foreach %dopar%
#' @return prints a plot and returns a ggplot2 object suitable for fine tuning the appearance of the plot.
#' @export
#' @examples
#' \dontrun{
#' GO_viz_choose(go_vis_res1,chosen_cats=newcats,markers_df=markers)
#' }
GO_viz_choose<-function(GO_viz_results,chosen_cats,clust_list=NULL,markers_df=NULL,GOcats=NULL,species_x="Homo sapiens", goterms=NULL){
i<-NULL
if(is.null(goterms)) stop("A named GO annotation vector must be included with GO ids making up the vector and GO term names as the vector names")
if(is.null(markers_df) && is.null(clust_list)) stop("Need either a cluster gene list or a a Seurat markers data frame")
if(is.null(markers_df)) {clust_list} else {
x<-markers_df[markers_df$p_val_adj<0.1,]
clust_list<-foreach(i=1:length(levels(markers_df$cluster)),.packages="foreach")%do% {x[x$cluster==levels(x$cluster)[i],]$gene}
names(clust_list)<-levels(markers_df$cluster)
clust_list<-clust_list[lengths(clust_list)>0]
}
if(is.null(GOcats)) {GOcats <- msigdb_gsets(species=species_x, category="C5", subcategory="BP")}
cats<-GOcats$genesets
y<-unlist(substring(names(cats),6))
y<-breakdown(y)
y<-stringr::str_squish(y)
y<-goterms[y]
names(cats)<-y
res1<-GO_viz_results$GO_sem
g1<-foreach(i=1:length(unique(res1$parentTerm))) %dopar% {
res1[which(res1$parentTerm==unique(res1$parentTerm)[i]),]$go}
cats2<-foreach(i=1:length(g1)) %do% {
x1<-g1[[i]]
x2<-unlist(cats[x1])
unique(x2)
}
names(cats2)<-unique(res1$parentTerm)
G1<-foreach(i=1:length(clust_list),.packages="hypeR") %do% {
g3<-hypeR(clust_list[[i]], cats2, background=23459, fdr=1)
Percentage<-g3$data$overlap/g3$data$geneset*100
FDR<-g3$data$fdr
data.frame(g3$data$label,Percentage,FDR)
}
names(G1)<-names(clust_list)
G2<-foreach(i=1:length(clust_list),.combine='rbind') %do% {
GO_Names<-G1[[i]]$g3.data.label
Percentage<-G1[[i]]$Percentage
Clusters<-as.character(rep(names(G1)[i],length(Percentage)))
FDR<-G1[[i]]$FDR
data.frame(GO_Names,Percentage,Clusters,FDR)
}
G2$Clusters<-factor(G2$Clusters,
levels=names(G1))
G3<-foreach(i=1:length(chosen_cats),.combine='rbind') %do% {
G2[G2$GO_Names==chosen_cats[i],]
}
G3$GO_Names<-factor(G3$GO_Names,levels=chosen_cats)
p<-ggplot(G3, aes(GO_Names,-log10(FDR)))
p<-p+theme_light()
p<-p+geom_jitter(height=0,width=0.3,aes_string(color="Clusters",size="Percentage"))
p<-p +theme(axis.text.x = element_text(angle=90,size=10),axis.title.x = element_blank())
p<-p+ylab("-log10(FDR)")
p<-p+labs(size="Percentage")
p<-p+geom_hline(yintercept=-log10(0.1),color="red",linetype="dashed")
p
fin<-list(G2,p)
names(fin)<-c("GO_df","plot")
return(fin)
}
#' getGoTerm
#' Get the description of a GO term
#'
#' @param x GO terms
#' @import GO.db
#' @return the Term slot in GO.db::GOTERM[[x]]
getGoTerm <- function(x) {
sapply(x, function(x) GO.db::GOTERM[[x]]@Term)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.