R/GOenrichment.R

#' @title DEGsEnrich
#' @description GO enrichment analysis for differentially expressed genes
#' @param obj DEG matrix
#' @param fc cutoff of fold change of gene expression to filter genes
#' @param adj.p cutoff of adjusted p value to filter genes
#' @param min.percent minimum percent of cells in any cluster expressed a gene
#' @param pval use a specified p value to filter genes
#' @param term.key term key of GO
#' @examples
#' \dontrun{
#' DEGsEnrich(obj)
#' }
#' @export
#' @import enrichR
#' @import ggthemes
DEGsEnrich <- function(obj, fc=log(2), adj.p=0.05, min.percent=0.1, pval=NULL, term.key="GO_Biological_Process_2018"){
  clusters <- unique(obj$cluster)
  enrich.result <- lapply(clusters, function(x){
    this.data <- obj[ obj$cluster == x,]
    enrichR(this.data, fc=fc, adj.p=adj.p, min.percent=min.percent,pval=pval, term.key=term.key)
  })
  names(enrich.result) <- clusters
  return(enrich.result)
}

# GO enrich
enrichR <- function(obj, fc=log(2), adj.p=0.05, min.percent=0.1, pval=NULL, term.key="GO_Biological_Process_2018"){
  if(! ("gene" %in% names(obj))){
    obj$gene <- row.names(obj)
  }
  if(!is.null(pval)){
    up.idx <- which(obj$avg_logFC >= fc & (obj$pct.1 >= min.percent | obj$pct.2 >= min.percent) & obj$p_val <= pval)
    down.idx <- which(obj$avg_logFC <= -fc & (obj$pct.1 >= min.percent | obj$pct.2 >= min.percent) & obj$p_val <= pval)
    total.idx <- c(up.idx, down.idx)
  } else {
    up.idx <- which(obj$avg_logFC >= fc & (obj$pct.1 >= min.percent | obj$pct.2 >= min.percent) & obj$p_val_adj <= adj.p)
    down.idx <- which(obj$avg_logFC <= -fc & (obj$pct.1 >= min.percent | obj$pct.2 >= min.percent) & obj$p_val_adj <= adj.p)
    total.idx <- c(up.idx, down.idx)
  }

  gene.up <- obj$gene[up.idx]
  gene.down <- obj$gene[down.idx]
  gene.total <- obj$gene[total.idx]

  enrich.up <- NULL
  enrich.down <- NULL
  enrich.total <- NULL

  if(length(gene.up) > 0){
    enrich.up <- enrichr(gene.up, term.key)
  }
  if(length(gene.down) > 0){
    enrich.down <- enrichr(gene.down, term.key)
  }
  if(length(gene.total) > 0){
    enrich.total <- enrichr(gene.total, term.key)
  }

  list("Up"=enrich.up, "Down"=enrich.down, "Total"=enrich.total, "Gene.up"=gene.up, "Gene.down"=gene.down)
}

#' @title viewGO
#' @description Visualization of GO enrichment for each cluster
#' @param obj enrichment obtained from function DEGs.enrich
#' @param tag stamp for output file name
#' @param adjust.Pcut cutoff of adjusted p value to filter genes
#' @param term.key term key of GO
#' @param deg.num minimum number of DEGs in each enriched term
#' @examples
#' \dontrun{
#' viewGO(obj, tag="test")
#' }
#' @export
#' @import ggplot2
viewGO <- function(obj, tag, iterm.num=10, deg.num=3, term.key="GO_Biological_Process_2018", adjust.Pcut=0.05){
  for(i in 1:length(obj)){
    gps <- obj[[i]]
    gpr.num <- names(obj)[i]
    # gather data for each set of GO enrichment
    enrich.terms <- list("Term"=NULL, "Overlap"=NULL, "Genes"=NULL, "UpDown"=NULL, "AdjustPvalue"=NULL)
    enrich.terms.names <- names(enrich.terms)
    for(type in c("Up", "Down", "Total")){
      if(!is.null(gps[[type]])){
        this.gps <- gps[[type]][[term.key]]
        # sort by adjusted p values
        this.gps <- this.gps[order(this.gps[["Adjusted.P.value"]], decreasing=T), ]
        this.gps <- tail(this.gps, n=iterm.num)
        enrich.terms[["Term"]] <- c(enrich.terms[["Term"]], this.gps[["Term"]])
        enrich.terms[["Overlap"]] <- c(enrich.terms[["Overlap"]], this.gps[["Overlap"]])
        enrich.terms[["Genes"]] <- c(enrich.terms[["Genes"]], this.gps[["Genes"]])
        enrich.terms[["UpDown"]] <- c(enrich.terms[["UpDown"]], rep(type, length(this.gps[["Term"]])))
        enrich.terms[["AdjustPvalue"]] <- c(enrich.terms[["AdjustPvalue"]], -log10(this.gps[["Adjusted.P.value"]]))
      }
    }

    # data frame preparation
    term.num <- length(enrich.terms$Term)
    enrich.terms <- data.frame(matrix(unlist(enrich.terms), nrow=term.num, byrow=F),stringsAsFactors=FALSE)
    names(enrich.terms) <- enrich.terms.names
    enrich.terms$AdjustPvalue <- as.numeric(enrich.terms$AdjustPvalue)
    enrich.terms$Term <- gsub(" \\(.*\\)", "", enrich.terms$Term)
    DEG.num <- sapply(enrich.terms$Overlap, function(x){
      unlist(strsplit(x = x, split = "/", fixed = T))[1]
    })
    enrich.terms <- enrich.terms[DEG.num >= deg.num, ]
    write.table(enrich.terms, file=paste0("./GOenrich.", tag, "_", gpr.num, ".xls"), quote=F, sep="\t", col.names=T, row.names=F)
    enrich.terms <- enrich.terms[ enrich.terms$AdjustPvalue >= abs(log10(adjust.Pcut)), ]
    p <- ggplot(enrich.terms, aes(x=factor(Term, levels = unique(enrich.terms$Term), ordered = T), y=AdjustPvalue)) +
      geom_bar(aes(fill=UpDown), stat="identity") +
      geom_hline(aes(yintercept = 2), linetype="dashed") +
      scale_fill_manual(values=c("Up"="red", "Down"="blue", "Total"="brown"), guide=F) +
      ylab("Enrichment (log10 adjusted P)") + xlab("") +
      theme_Publication() +
      coord_flip()
    if(length(unique(enrich.terms$UpDown)) > 1){
      p <- p + facet_wrap(~factor(UpDown, levels=c("Up", "Down","Total")), scales = "free", ncol=1)
    }
    p
    ggsave(paste0("./GOenrich.", tag, "_", gpr.num, ".pdf"), width=10, height=6, onefile=F)
  }
}

#' @title viewMergeGO
#' @description Merged visualization of GO enrichment for all cluster
#' @param obj enrichment obtained from function DEGs.enrich
#' @param label stamp for output file name
#' @param topN top terms to show
#' @param adjP cutoff of adjusted p value to select terms
#' @param term.key term key of GO
#' @param deg.num minimum number of DEGs in each enriched term
#' @examples
#' \dontrun{
#' viewMergeGO(obj, label="test")
#' }
#' @export
#' @import ggplot2
viewMergeGO <- function(obj, label, topN = 5, adjP = 0.05, deg.num=3, term.key="GO_Biological_Process_2018", groupOrder=names(obj)){
  topTerms <- list()
  for(i in names(obj)){
    for(j in c("Up", "Down")){
      if(!is.null(obj[[i]][[j]])){
        this.enrich <- obj[[i]][[j]][[term.key]]
        DEG.num <- sapply(this.enrich$Overlap, function(x){
          unlist(strsplit(x = x, split = "/", fixed = T))[1]
        })
        this.enrich <- this.enrich[DEG.num >= deg.num, ]
        this.enrich <- this.enrich[this.enrich$Adjusted.P.value <= adjP,]
        topTerms[[j]] <- unique(c(topTerms[[j]], tail(this.enrich$Term[ order(this.enrich$Adjusted.P.value, decreasing = T)], n=topN)))
      }
    }
  }

  mergedTerms <- c()
  for(i in names(obj)){
    for(j in c("Up", "Down")){
      if(!is.null(obj[[i]][[j]])){
        this.enrich <- obj[[i]][[j]][[term.key]]
        this.idx <- which(this.enrich$Term %in% topTerms[[j]])
        mergedTerms <- rbind(mergedTerms, data.frame(
          "term" = gsub(" \\(.*\\)", "", this.enrich$Term[this.idx]),
          "score" = this.enrich$Combined.Score[this.idx],
          "adjP" = -log10(this.enrich$Adjusted.P.value[this.idx]),
          "cluster" = rep(i, length(this.idx)),
          "change" = rep(j, length(this.idx))
        ))
      }
    }
  }

  pMaxCut <- 5
  mergedTerms$adjP[ mergedTerms$adjP >= pMaxCut] <- pMaxCut

  for( i in c("Up", "Down")){
    ggplot(subset(mergedTerms, change==i), aes(x=factor(cluster,levels=groupOrder), y=term))+
      geom_point(aes(color=adjP, size=score), alpha=0.5) +
      scale_colour_gradientn(colors=c("white", "orange", "red")) +
      xlab("") + ylab("") +
      theme(axis.text=element_text(size=14),
            axis.text.x=element_text(angle=45, hjust=1, vjust=1))
    ggsave(paste0("./", label, ".", i, "_GO_merged.pdf"), width=15)
  }
}

# reference: https://rpubs.com/Koundy/71792
theme_Publication <- function(base_size=14, base_family="sans") {
  (theme_foundation(base_size=base_size, base_family=base_family)
   + theme(plot.title = element_text( face = "bold", size = rel(1.2), hjust = 0.5),
           text = element_text(),
           panel.background = element_rect(colour = NA),
           plot.background = element_rect(colour = NA),
           panel.border = element_rect(colour = NA),
           axis.title = element_text(face = "bold",size = rel(1)),
           axis.title.y = element_text(angle=90,vjust =2),
           axis.title.x = element_text(vjust = -0.2),
           axis.text = element_text(),
           axis.line = element_line(colour="black"),
           axis.ticks = element_line(),
           #panel.grid.major = element_line(colour="#f0f0f0"),
           panel.grid.major = element_blank(),
           panel.grid.minor = element_blank(),
           legend.key = element_rect(colour = NA),
           legend.position = "top",
           #legend.direction = "horizontal",
           legend.key.size= unit(0.2, "cm"),
           legend.margin = margin(0),
           legend.title = element_text(face="italic"),
           plot.margin=unit(c(10,5,5,5),"mm"),
           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
           strip.text = element_text(face="bold")
   ))
}
zhupinglab/Aodav documentation built on June 10, 2019, 2:31 a.m.