R/module_enrichGO_utils.R

Defines functions wego_plot do_enrichGO

Documented in do_enrichGO wego_plot

#' Do enrichGO
#' 
#' This is a wrapper function to do \code{\link[clusterProfiler]{enrichGO}} on 
#' all ontology terms and convert the gene IDs to gene symbles.
#' 
#' @param ... Arguments pass to \code{\link[clusterProfiler]{enrichGO}} and 
#'  \code{\link[clusterProfiler]{setReadable}}
#'
#' @return A list of enrichGO objects
do_enrichGO <- function(...){
  go_obj <- list()
  go_obj_2 <- list()
  dots <- list(...)
  for(ont in c("BP", "CC", "MF")){
    SigBio:::sigbio_message(paste0("Doing enrichGO for: ", ont))
    go_obj[[ont]] <- clusterProfiler::enrichGO(keyType = "ENTREZID",ont = ont, ...)
    #print(go_obj[ont])
    SigBio:::sigbio_message("Converting entrezids to readable gene ids (gene symbles) ")
    go_obj_2[[ont]] <- clusterProfiler::setReadable(go_obj[[ont]], OrgDb = dots$OrgDb, keyType = "ENTREZID")
  }
  return(list("go_bp" = go_obj_2$BP,
              "go_cc" = go_obj_2$CC,
              "go_mf" = go_obj_2$MF))
}

# the arguments are result data frames returned by clusterProfiler::enrichGO objects
# it retuns a ploting object

#' Plot WEGO
#' 
#' This make a WEGO alike plot.
#' 
#' @param BP An enrichGO object for Biological Process.
#' @param CC An enrichGO object for Cellular Components.
#' @param MF An enrichGO object for Molecular Functions.
#' 
#' @return A WEGO alike ggplot plot object.
#' 
#' @import dplyr
#' @import ggplot2
#' @import forcats
#'

wego_plot <- function(BP=go_table, CC=go_table, MF=go_table){
  
  # add domain information to an extra column to each dataframe
  BP_2 <- cbind(BP, c(rep("Biological Process", nrow(BP))))
  colnames(BP_2)[10] <- "Domain"
  CC_2 <- cbind(CC, c(rep("Cellular Components", nrow(CC))))
  colnames(CC_2)[10] <- "Domain"
  MF_2 <- cbind(MF, c(rep("Molecular Functions", nrow(MF))))
  colnames(MF_2)[10] <- "Domain"
  
  # merge all GO data in a single obejct ----
  all_go_data <- rbind(BP_2, CC_2, MF_2)
  
  # make wego style output ----
  wego_style_go <- all_go_data %>% 
    dplyr::select(ID, Domain, Count, GeneRatio, Description)
  # get total number of genes 
  total <-as.numeric(gsub("[1-9]/", "", wego_style_go[1,4]))
  wego_style_go <- cbind(wego_style_go[-4], (wego_style_go$Count/total)*100)
  colnames(wego_style_go)[3] <- "Number of genes"
  colnames(wego_style_go)[5] <- "Percentage of genes"
  
  # Reference blog: https://sarahpenir.github.io/r/WEGO/
  
  # Preparing the table for plotting ----
  wego_style_go_2 <- wego_style_go %>%
    ## Group the entries by "Domain"
    group_by(Domain) %>%
    ## Take the top 5 entries per "Domain" according to "Percentage of genes"
    top_n(5, `Percentage of genes`) %>% 
    ## Ungroup the entries
    ungroup() %>% 
    ## Arrange the entries by "Domain", then by "Percentage of genes"
    arrange(Domain, `Percentage of genes`) %>% 
    ## Take note of the arrangement by creating a "Position" column
    mutate(Position = n():1)
  
  wego_style_go_2 <- wego_style_go_2[, c(1,2,3,5,4,6)]
  
  # making wego alike plot ----
  
  ## Calculate the normalizer to make "Number of genes" proportional to 
  ## "Percentage of genes" for the plotting of the second y-axis
  normalizer <- max(wego_style_go_2$`Number of genes`)/max(wego_style_go_2$`Percentage of genes`)
  
  ## Plot "Description" in the x-axis following the order stated in the "Position" column
  ## vs "Percentage of genes" in the first y-axis
  wego_alike_plot <- ggplot(data = wego_style_go_2, aes(x = forcats::fct_reorder(Description, desc(Position)), y = `Percentage of genes`, fill = Domain)) +
    ## Plot "Description" in the x-axis following the order stated in the "Position" column
    ## vs normalized "Number of genes" in the second y-axis
    geom_col(data = wego_style_go_2, aes(x = forcats::fct_reorder(Description, desc(Position)), y = `Number of genes`/normalizer)) +
    ## Add a second y-axis based on the transformation of "Percentage of genes" to "Number of genes".
    ## Notice that the transformation undoes the normalization for the earlier geom_col.
    scale_y_continuous(sec.axis = sec_axis(trans = ~.*normalizer, name = "Number of genes")) +
    ## Modify the aesthetic of the theme
    theme(text = element_text(size=13),
          axis.text = element_text(size = 13),
          axis.text.x = element_text(angle = 60, hjust = 1), 
          axis.title.y = element_text(size = 13),
          legend.text = element_text(size = 13), 
          legend.title = element_text(size = 13),
          legend.key.size =  unit(0.2, "in"), 
          plot.title = element_text(size = 13, hjust = 0.5),
          plot.margin = unit(c(1,1,1,5), "cm"),
          panel.background = element_rect(fill = "white", colour = "grey50")) +
    ## Add a title to the plot
    labs(x = NULL, title = "Gene Ontology")
  
  return(wego_alike_plot)
}
sk-sahu/sig-bio-shiny documentation built on June 12, 2020, 8:06 p.m.