R/ggenrichTree_pre.R

Defines functions ggenrichTree_pre

Documented in ggenrichTree_pre

#'ggenrichTree_pre function
#'@export
#'@description
#'ggenrichTree_pre function is used to draw the similarity of terms in the result of enrichment analysis.
#'@param inputfile the input file,(the ggenrichData function param: type: cluster; format:R_ClusterProfiler or DAVID)
#'@param type the way to get the results of the GO analysis.This param should correspond to the ggenrichData function.(DAVID,ClusterProfiler)
#'@param species default human. (human,mouse)
#'@param ont enrichment analysis parameter.default BP. (BP,CC,MF)
#'@importFrom utils write.table
#'@importFrom stats as.dist
#'@importFrom graphics plot
#'@return a tree plot.
#'@examples
#input <- ggenrichData(system.file("extdata", "easy_input.csv", package = "ggenrich",mustWork = TRUE),format="DAVID")
#result <- ggenrichTree_pre(input,type="DAVID",species="human")
#inputfile <- ggenrichData(system.file("extdata", c("enrichGO1.csv","enrichGO2.csv","enrichGO3.csv"), package = "ggenrich",mustWork = TRUE),format="R_ClusterProfiler",type="cluster",GENE_MAX=400,GENE_MIN=200)
#result1 <- ggenrichTree_pre(inputfile,type="ClusterProfiler",species="mouse")






ggenrichTree_pre <- function(inputfile,type,species="human",ont="BP"){
  if(type == "DAVID"){
    get_terms_info <- inputfile$Annotation_information
    unique_terms_info <- unique(get_terms_info)
    terms <- sort(unique_terms_info)
    get_gene_identifier <- inputfile$Gene_identifier
    unique_gene_identifier <- unique(get_gene_identifier)
    genes <- sort(unique_gene_identifier)
    nterms <- length(terms)
    ngenes <- length(genes)
    kappa <- matrix(0,nterms,nterms)
    rownames(kappa) <- terms
    colnames(kappa) <- terms
    for (i in 1:nterms){
      for (j in 1:nterms){
        if (i==j){
          kappa[i,j] <- 1
        }else if(i<j){
          termA <- terms[[i]]
          termB <- terms[[j]]
          if (requireNamespace("dplyr", quietly = TRUE)) {
            geneA <- (dplyr::filter(.data=inputfile,Annotation_information==termA))[,"Gene_identifier"]
            geneB <- (dplyr::filter(.data=inputfile,Annotation_information==termB))[,"Gene_identifier"]
          }
          a <- length(intersect(geneA,geneB))
          b <- length(setdiff(geneB,geneA))
          c <- length(setdiff(geneA,geneB))
          d <- length(setdiff(genes,union(geneA,geneB)))
          OA <- (a+d)/ngenes
          AC <- ((a+c)*(a+b)+(c+d)*(b+d))/ngenes^2
          kappa[i,j] <- (OA-AC)/(1-AC)
          kappa[j,i] <- kappa[i,j]
        }
      }
    }
    write.table(kappa,
                "kappa_matrix.txt",
                sep="\t",
                quote = F,
                col.names = F,
                row.names = F)
    if (requireNamespace("ape", quietly = TRUE)) {
      tree <- ape::nj(as.dist(1-kappa))
    }
  }
  if(type == "ClusterProfiler"){
    if(species == "human"){
      if (requireNamespace("GOSemSim", quietly = TRUE)) {
        if (requireNamespace("org.Hs.eg.db", quietly = TRUE)) {
          hgGO <- GOSemSim::godata('org.Hs.eg.db', ont=ont)
        }
          ego.sim <- GOSemSim::mgoSim(as.character(inputfile$ID),
                                      as.character(inputfile$ID),
                                      semData=hgGO,
                                      measure="Wang",
                                      combine=NULL)
      }
    }
    if(species == "mouse"){
      if (requireNamespace("GOSemSim", quietly = TRUE)) {
        if (requireNamespace("org.Mm.eg.db", quietly = TRUE)) {
          mmGO <- GOSemSim::godata('org.Mm.eg.db', ont=ont)
        }
        ego.sim <- GOSemSim::mgoSim(as.character(inputfile$ID),
                                    as.character(inputfile$ID),
                                    semData=mmGO,
                                    measure="Wang",
                                    combine=NULL)
      }
    }
    rownames(ego.sim) <- inputfile$Description
    colnames(ego.sim) <- inputfile$Description
    if (requireNamespace("ape", quietly = TRUE)) {
        tree <- ape::nj(as.dist(1-ego.sim))
    }
  }

  if (requireNamespace("ggtree", quietly = TRUE)) {
    if (requireNamespace("ggplot2", quietly = TRUE)) {
      p <- ggtree::ggtree(tree) +
        ggtree::geom_tiplab() +
        ggtree::geom_text2(ggplot2::aes(subset=!isTip, label=node), hjust=-.3) +
        ggplot2::coord_cartesian(xlim=c(-.1,1.3))
    }
  }
  plot(p)
  return(tree)
}
ying-ge/ggEnrich documentation built on Nov. 24, 2019, 12:34 p.m.