R/multiOutput.R

Defines functions heatmap.3 ACStransform genePM mdsModel clustModelOne clustModel SA_algo heatmapPathway mdsPathway ACS_ADS_DE_cluster clustNumber clustPathway multiOutput

Documented in multiOutput

##' Downstream visualization tools: visualization outputs including overall
##' pathway clustering and output for each pathway
##' The \code{multiOutput} is function to visualize pathway level c-scores and d-scoresincluding
##' pathway clustering results with co-membership heatmaps,
##' within-pathway MDS plot (on studies),
##' within-pathway clustering heatmap (on studies),
##' within-pathway posterior DE heatmap,
##' KEGG and Reactome topology plots for each pathway.
##' @title Downstream visualization tools
##' @param mcmc.merge.list: a list of merged MCMC output matrices.
##' @param dataset.names: a vector of dataset names matched with the mcmc.merge.list.
##' @param select.pathway.list: a list of selected pathways (containing gene components) for clustering/visualizations.
##' @param ACS_ADS_pathway: a list of four data frames: pathway-specific c-scores, pathway-specific d-scores
##' and their permuted p-value (each row is a pathway and each column is a study).
##' @param output: choose from: "clustPathway" (pathway clustering),
##' "mdsModel"(within-pathway MDS plot on studies),
##' "clustModel" (within-pathway clustering heatmap),
##' "genePM" (within-pathway posterior DE heatmap),
##' "keggView" (KEGG pathway topology, default is human - hsa, for other species,
##'  KEGG organism name and gene Entrez ID needs to be provided as 'KEGG.dataG2EntrezID'),
##' "reactomeView" (Reactome pathway topology, default is human - HSA, for other species, Reactome organism name needs
##' to be provided as "reactome.species").
##' Clustering analysis is not applicable when the number of studies is smaller than 3. "output" cannot be empty.
##' @param optK: Optimal number of clusters. For "clustPathway" output only.
##' @param sil_cut: silhouette index to control scatterness. Larger value indicates tigher cluster and
##'  more scattered pathways.
##' @param use_ADS: whether use d-scores for clustering/visualizations. Default is FALSE.
##' @param hashtb: a flat noun-pathway table for text mining. Prepared tables from KEGG and Reactome pathway
##' descriptions for 5 species "hsa", "mmu", "rno", "cel" and "dme"are provided by \code{data(hashtb_hsa)},
##' \code{data(hashtb_mmu)}, \code{data(hashtb_rno)}, \code{data(hashtb_cel)} and \code{data(hashtb_dme)}.
##' Please refer to Zeng, Xiangrui, et al. "Comparative Pathway Integrator: a framework
##' of meta-analytic integration of multiple transcriptomic studies for consensual and differential pathway
##' analysis." Genes 11.6 (2020): 696.
##' @param keywords_cut: keywords above this cut will be shown in the text mining spreadsheet output.
##' @param text.permutation: select from "all" or "enriched". In text mining, "all" permutates pathways from
##' full pathway.list provided while "enriched" permutates from selected pathways. "all" is suitable for
##' cross-species comparision while "enriched" is recommended for within-species comparision.
##' @param comemberProb_cut: probability below this cut will be colored blue in comembership heatmaps.
##' @param ViewPairSelect: which two datasets to view in the KEGG/Reactome topology plot. All pairs will be
##' considered under default (may take a while).
##' @param kegg.species: KEGG species abbreviation. For "keggView" only. Default is "hsa".
##' @param KEGG.dataGisEntrezID: whether gene names in data are EntrezID. Default is FALSE.
##' @param KEGG.dataG2EntrezID: a data frame which maps gene names in mcmc.merge.list (first column) to
##' Entrez IDs (second column). If NULL & KEGG.dataGisEntrezID=F & kegg.species is one of "hsa", "mmu",
##' "rno", "cel" or "dme", gene symbols will be automatically mapped to EntrezID by Bioconductor packages
##' "org.Hs.eg.db", "org.Mm.eg.db", "org.Rn.eg.db", "org.Ce.eg.db" or org.Dm.eg.db". For "keggView" only.
##' @param KEGG.pathID2name: a list where each element is a KEGG pathway name with correponding pathway ID
##' as its name. ID will be retrieved from KEGGREST if this is NULL.
##' @param reactome.species: Reactome species abbreviation. For "reactomeView" only. Default is "HSA".
##' @param Reactome.dataG2TopologyGtype: a data frame which maps gene names in mcmc.merge.list (first
##' column) and select.pathway.list to gene name types in Reactome topology (second column). For "reactomeView" only.
##' @param Reactome.pathID2name: a list where each element is a Reactome pathway name with correponding
##' pathway ID as its name. ID will be retrieved from reactome.db if this is NULL.
##'
##' @return all figures and tables are stored in created folders in the current directory.
##' @export
##' @examples
##' \dontrun{
##' #mcmc.merge.list from the merge step (see the example in function 'merge')
##' #select.pathway.list from the pathSelect step (see the example in function 'pathSelect')
##' #ACS_ADS_pathway from the multi_ACS_ADS_pathway step (see example in 'multi_ACS_ADS_pathway')
##' data(hashtb_hsa) #include hashtb & pathways for text mining
##' dataset.names = c("hb","hs","ht","ha","hi","hl",
##'                   "mb","ms","mt","ma","mi","ml")
##' #1. step1: select K by elbow plot from consensus clustering
##' ACSpvalue.mat = ACS_ADS_pathway$ACSpvalue.mat
##' results = ConsensusClusterPlus(d=t(-log10(ACSpvalue.mat)),maxK=10,reps=50,pItem=0.8,
##' pFeature=1,title="Consensus Clustering",clusterAlg="hc",innerLinkage="ward.D2",
##' finalLinkage="ward.D2",seed=12345,plot="png")
##'
##' #2. step2: run multiOutput with pre-selected K=4
##' multiOutput(mcmc.merge.list,dataset.names,select.pathway.list,ACS_ADS_pathway,
##'            output=c("clustPathway","mdsModel","clustModel","genePM","keggView"),
##'            hashtb=hashtb,optK = 4,keywords_cut=0.2,comemberProb_cut=0.6)
##' }
multiOutput <- function(mcmc.merge.list,dataset.names,select.pathway.list,ACS_ADS_pathway,
                        output=c("clustPathway","mdsModel","clustModel","genePM","keggView","reactomeView"),
                        optK=NULL,sil_cut=0.1,use_ADS=FALSE,hashtb=NULL,keywords_cut=0.05,
                        text.permutation = "all",comemberProb_cut=0.7,
                        ViewPairSelect = NULL,kegg.species="hsa",KEGG.dataGisEntrezID=FALSE,KEGG.dataG2EntrezID=NULL,KEGG.pathID2name=NULL,
                        reactome.species="HSA",Reactome.dataG2TopologyGtype=NULL,Reactome.pathID2name=NULL) {#ViewPairSelect:a subset of dataset.names for keggView

  if(length(output)==0 || is.null(output)){
    stop("at least one type of output has to be chosen")
  }

  orig.path <- getwd()
  pathway.name <- names(select.pathway.list)
  K <- length(pathway.name)

  if(use_ADS == TRUE){
    AS.mat = ACS_ADS_pathway$ADS.mat
    ASpvalue.mat = ACS_ADS_pathway$ADSpvalue.mat
  }else{
    AS.mat = ACS_ADS_pathway$ACS.mat
    ASpvalue.mat = ACS_ADS_pathway$ACSpvalue.mat
  }

  M <- length(mcmc.merge.list)
  P <- ncol(AS.mat)

  if( (M<=2) & (length(intersect(output,c("mdsModel","clustModel"))) != 0)){
    print("Co-membership heatmap, individual MDS and clustering are not applicable to 2 DTS's case, removed from 'output'...")
    output = intersect(output,c('clustPathway','genePM','keggView'))
  }

  if("clustPathway" %in% output){
    dir.path <- "clustPathway"
    if (!file.exists(dir.path)) dir.create(dir.path)
    setwd(paste(orig.path,"/",dir.path,sep=""))

    #1. consensus clustering
    results <- clustPathway(ASpvalue.mat)

    #2. determine optimal number of clusters
    if(is.null(optK)){
      optK <- clustNumber(results)
    }
    cluster.assign <- results[[optK]]$consensusClass

    #3. identify scattered objects
    scatter.index <- scatter(-log10(ASpvalue.mat), cluster.assign, sil_cut=sil_cut)
    cluster = list(cluster.assign=cluster.assign,scatter.index=scatter.index)
    save(cluster,file = "cluster_labels.RData")

    cluster.assign2 = cluster.assign[-scatter.index]
    rmClust = setdiff(unique(cluster.assign),unique(cluster.assign2))
    if(length(rmClust) != 0){
      msg = paste0("Cluster ",rmClust," is removed because of scatterness. Please consider a smaller cluster number.")
      print(msg)
    }else{
      msg = NULL
    }

    #4. mds plot
    res <- mdsPathway(acsPvalue=ASpvalue.mat,
                      cluster.assign=cluster.assign,
                      scatter.index=scatter.index)
    #5. text mining
    if(!is.null(hashtb)){
      pathways = hashtb[match(1:max(hashtb[,3]),hashtb[,3]),"pathway"]
      tm_filtered = textMine(hashtb=hashtb,pathways=pathways,cluster.assign=cluster.assign,scatter.index=scatter.index,thres=keywords_cut,permutation=text.permutation)
      save(tm_filtered,file = "TextMine_results.RData")

    }

    #6. heatmap
    res <- heatmapPathway(acsPvalue=ASpvalue.mat,
                          cluster.assign=cluster.assign,
                          scatter.index=scatter.index)

    #7. ACS_ADS_DE plot (colored by cluster)
    res <- ACS_ADS_DE_cluster(mcmc.merge.list=mcmc.merge.list,ACS_ADS_pathway=ACS_ADS_pathway,
                              dataset.names=dataset.names,select.pathway.list=select.pathway.list,
                              cluster.assign=cluster.assign,scatter.index=scatter.index,plot.path=NULL)

    if(M>2){
      print("Construct comembership matrix...")
      dir.path <- "comemberPlot"
      if (!file.exists(paste(orig.path,"/",dir.path,sep=""))) dir.create(paste(orig.path,"/",dir.path,sep=""))
      setwd(paste(orig.path,"/",dir.path,sep=""))

      model.cluster.result <- vector("list",length=K)
      names(model.cluster.result) <- rownames(ASpvalue.mat)

      for(k in 1:K) {
        model.cluster.result[[k]] <- SA_algo(unlist(c(ASpvalue.mat[k,])),dataset.names,sep="_")
      }

      if(is.null(scatter.index)){
        pathway.cluster.assign = cluster.assign
        Cvec = 1:optK
      }else{
        pathway.cluster.assign = cluster.assign
        pathway.cluster.assign[scatter.index] = "scatter"
        Cvec = sort(unique(pathway.cluster.assign))
      }

      comember.list <- vector("list",length=length(Cvec))
      for (c in 1:length(Cvec)){
        select.pathways <- names(pathway.cluster.assign)[pathway.cluster.assign==Cvec[c]]
        denom <- length(select.pathways)
        model.result.pathways <- matrix(unlist(model.cluster.result[select.pathways]),nrow=denom,ncol=M,byrow =T )
        rownames(model.result.pathways) <- select.pathways
        colnames(model.result.pathways) <- dataset.names
        comember.mat <- matrix(1,M,M)
        rownames(comember.mat) <- colnames(comember.mat) <- dataset.names
        for (i in 1:(M-1)){
          for (j in (i+1):M){
            if (denom==1){
              comember.mat[j,i] = 1
            }else{
              model1 <- dataset.names[i]
              model2 <- dataset.names[j]
              twomodel.result <- model.result.pathways[,c(model1,model2)]
              comember.mat[j,i] <- comember.mat[i,j] <- sum(apply(twomodel.result,1,function(x) x[1]==x[2]))/denom
            }
          }
        }
        comember.list[[c]] <- comember.mat
      }
      names(comember.list) = Cvec
      save(comember.list,file="comember.list.RData")

      #1. thresholding:
      threshold = comemberProb_cut

      mat.thres <- function(mat, threshold){
        mat[which(mat <= threshold)] <- 0
        return(mat)
      }

      #2. matrix multiplication
      for(i in 1:length(comember.list)){
        mat <- comember.list[[i]]

        par(font.main=2)
        par(font.axis=2)
        par(xpd=FALSE)

        #thresholding

        thres.mat <- mat.thres(mat,threshold)

        #pdf(paste("ComemMat_cluster_",names(comember.list)[i],"_threshold_",threshold,".pdf",sep=""))
        jpeg(paste("ComemMat_cluster_",names(comember.list)[i],"_threshold_",threshold,".jpeg",sep=""),quality = 100)


        hm <- heatmap.3(thres.mat,
                        main=NULL,
                        cexCol=1.2,cexRow=1.2,
                        colsep=1:nrow(mat),
                        rowsep=1:ncol(mat),
                        sepwidth=c(0.02, 0.02),  # width of the borders
                        sepcolor='black',
                        symbreaks=T,key=T, keysize=1,symkey=F,
                        dendrogram=c('none'),density.info="none",
                        trace="none",Rowv=T,Colv=T,symm=F,
                        #srtCol=50,
                        col=bluered,breaks=seq(0,1,by=0.01),
                        margins=c(12,14))

        dev.off()
      }
    }
  }
  setwd(orig.path)

  if("mdsModel" %in% output){
    dir.path <- "mdsModel"
    if (!file.exists(dir.path)) dir.create(dir.path)
    setwd(paste(orig.path,"/",dir.path,sep=""))

    for(k in 1:K){
      print(paste("mdsModel",k,sep=":"))
      pathk.name <- pathway.name[k]
      res <- mdsModel(unlist(c(AS.mat[k,])),dataset.names,pathk.name,sep="_")
    }
  }
  setwd(orig.path)

  if("clustModel" %in% output){
    dir.path <- "clustModel"
    if (!file.exists(dir.path)) dir.create(dir.path)
    setwd(paste(orig.path,"/",dir.path,sep=""))

    for(k in 1:K){
      print(paste("clustModel",k,sep=":"))
      pathk.name <- pathway.name[k]
      cluster.assign.path <- try(SA_algo(unlist(c(ASpvalue.mat[k,])),dataset.names,sep="_"))
      if(length(unique(cluster.assign.path))>1 && class(cluster.assign.path) != "try-error" ){
        res <- clustModel(unlist(c(ASpvalue.mat[k,])),dataset.names,cluster.assign.path,
                          pathk.name,sep="_")
      }
      if(length(unique(cluster.assign.path))==1 || class(cluster.assign.path) == "try-error" ){
        warning("clustModel only identifies one cluster")
        res <- clustModelOne(unlist(c(ASpvalue.mat[k,])),dataset.names,
                             pathk.name,sep="_")
      }
    }
  }
  setwd(orig.path)


  if("genePM" %in% output){
    dir.path <- "genePM"
    if (!file.exists(dir.path)) dir.create(dir.path)
    setwd(paste(orig.path,"/",dir.path,sep=""))
    for(k in 1:K){
      print(paste("genePM",k,sep=":"))
      pathk.name <- pathway.name[k]
      pathway.genes <- select.pathway.list[[k]]
      signPM.list <- lapply(mcmc.merge.list,function(x) apply(x,1,mean))
      names(signPM.list) <- dataset.names
      hm <- genePM(signPM.list, pathway.genes=pathway.genes,
                   pathway.name=pathk.name)
    }
  }
  setwd(orig.path)

  if("keggView" %in% output){
    if(sum(grepl("KEGG",pathway.name))==0) {
      warning("No KEGG pathways")
    } else{
      #dir.path <- "keggView"
      #if (!file.exists(dir.path)) dir.create(dir.path)
      #setwd(paste(orig.path,"/",dir.path,sep=""))

      #map.ls = as.list(as.list(org.Hs.egALIAS2EG))

      if(is.null(KEGG.pathID2name)){
        message("Retrieve KEGG pathway IDs from KEGGREST...")
        KEGG.pathID2name = lapply(KEGGREST::keggList("pathway",kegg.species),function(x) strsplit(x," - ")[[1]][-length(strsplit(x," - ")[[1]])])
        names(KEGG.pathID2name) = gsub("path:","",names(KEGG.pathID2name))
      }

      if(is.null(ViewPairSelect)){
        data.pair = combn(dataset.names,2)
      }else{
        data.pair = combn(ViewPairSelect,2)
      }
      select.kegg.path.name = pathway.name[grep("KEGG",pathway.name)]## KEGG pathways selected
      K_KEGG = length(select.kegg.path.name)
      for(k in 1:K_KEGG){
        print(paste("keggView",k,sep=":"))
        keggk.name = select.kegg.path.name[k]
        keggk.name.clean = gsub("KEGG ","",keggk.name)
        keggk.name0 = gsub(" / ","_",keggk.name,fixed = T)

        pathwayID = gsub(kegg.species,"",names(KEGG.pathID2name)[which(KEGG.pathID2name == keggk.name.clean)])
        if(length(pathwayID) == 0){
          print(paste0("No KEGG pathway ID found for ",keggk.name,", skipped. Please check R package 'KEGGREST' for correct pathway name."))
        }else{
          dir.path <- paste0(orig.path,"/keggView/", keggk.name0)
          if (!file.exists(dir.path)) dir.create(dir.path,recursive = T)
          setwd(dir.path)
          for (i in 1:ncol(data.pair)) {
            dat1.name = data.pair[1,i]
            dat2.name = data.pair[2,i]
            dat1 = mcmc.merge.list[[match(dat1.name,dataset.names)]]
            dat2 = mcmc.merge.list[[match(dat2.name,dataset.names)]]
            overlap.genes <- intersect(rownames(dat1),select.pathway.list[[keggk.name]])
            signPM.mat <- cbind(apply(dat1[overlap.genes,],1,mean),
                                apply(dat2[overlap.genes,],1,mean))
            colnames(signPM.mat) <- c(dat1.name,dat2.name)
            std.genes <- rownames(signPM.mat)
            if(KEGG.dataGisEntrezID == TRUE){
              message("'KEGG.dataGisEntrezID == TRUE', gene names are used for KEGG pathview directly.")

            }else if(!is.null(KEGG.dataG2EntrezID)){
              entrezID = KEGG.dataG2EntrezID[match(rownames(signPM.mat),KEGG.dataG2EntrezID[,1]),2]
              rownames(signPM.mat) = entrezID

              message("Gene names are converted to EntrezID by the provided data frame KEGG.dataG2EntrezID.")

            }else if(kegg.species == "hsa"){
              map.ls = as.list(as.list(org.Hs.eg.db::org.Hs.egALIAS2EG))
              entrezID = sapply(rownames(signPM.mat),function(g) map.ls[[g]][[1]])
              rownames(signPM.mat) = entrezID

              message("Gene names are converted to EntrezID by org.Hs.eg.db::org.Hs.egALIAS2EG.")

            }else if(kegg.species == "mmu"){
              map.ls = as.list(as.list(org.Mm.eg.db::org.Mm.egALIAS2EG))
              entrezID = sapply(rownames(signPM.mat),function(g) map.ls[[g]][[1]])
              rownames(signPM.mat) = entrezID

              message("Gene names are converted to EntrezID by org.Hs.eg.db::org.Mm.egALIAS2EG.")

            }else if(kegg.species == "rno"){
              map.ls = as.list(as.list(org.Rn.eg.db::org.Rn.egALIAS2EG))
              entrezID = sapply(rownames(signPM.mat),function(g) map.ls[[g]][[1]])
              rownames(signPM.mat) = entrezID

              message("Gene names are converted to EntrezID by org.Hs.eg.db::org.Rn.egALIAS2EG")

            }else if(kegg.species == "cel"){
              map.ls = as.list(as.list(org.Ce.eg.db::org.Ce.egALIAS2EG))
              entrezID = sapply(rownames(signPM.mat),function(g) map.ls[[g]][[1]])
              rownames(signPM.mat) = entrezID

              message("Gene names are converted to EntrezID by org.Hs.eg.db::org.Ce.egALIAS2EG")

            }else if(kegg.species == "dme"){
              map.ls = as.list(as.list(org.Dm.eg.db::org.Dm.egALIAS2EG))
              entrezID = sapply(rownames(signPM.mat),function(g) map.ls[[g]][[1]])
              rownames(signPM.mat) = entrezID

              message("Gene names are converted to EntrezID by org.Hs.eg.db::org.Dm.egALIAS2EG")

            }

            if(kegg.species == "hsa"|kegg.species == "mmu"){
              #Average DE genes in each node
              #Only applicable to human and mouse because both their XML genes and pathview package use EntrezID
              download.kegg(pathway.id = pathwayID, kegg.species, kegg.dir = ".", file.type="xml")
              parsePathway = KEGGgraph::parseKGML(paste(getwd(), "/",kegg.species, pathwayID,".xml",sep = ""))
              parsePathway = KEGGgraph::splitKEGGgroup(parsePathway)

              entries = KEGGgraph::nodes(parsePathway)
              types = sapply(entries, KEGGgraph::getType)
              entryNames = as.list(sapply(entries, KEGGgraph::getName))
              if(any(types == "group") || any(types=="map")){
                entryNames = entryNames[!(types %in% c("group","map"))]
              }
              entryIds = names(entryNames)
              entryNames = lapply(1:length(entryNames), function(i) paste(entryNames[[i]],collapse="_"))
              names(entryNames) = entryIds

              entryNames.unique = unique(entryNames)

              xmlG = gsub(paste0(kegg.species,":"),"",unlist(entryNames.unique))
              xmlG.ls = lapply(xmlG, function(x){
                strsplit(x,"_")[[1]]
              })

              mergePMls = lapply(1:length(xmlG.ls), function(x){
                genes = xmlG.ls[[x]]
                cmG = intersect(rownames(signPM.mat),genes)
                if(length(cmG) !=0){
                  sub.signPM.mat = matrix(signPM.mat[cmG,],ncol = 2)
                  avgPM = apply(sub.signPM.mat, 2, mean)
                  return(avgPM)
                }
              })
              names(mergePMls) = xmlG
              mergePMmat = do.call(rbind,mergePMls)

              row.names(mergePMmat) = sapply(row.names(mergePMmat), function(x) strsplit(x,"_")[[1]][1])
              signPM.mat = mergePMmat
            }
            res = pathview(gene.data = signPM.mat, pathway.id = pathwayID,
                           species = kegg.species, out.suffix = "", kegg.native = T,
                           key.pos = "bottomright", map.null=T,cex = 0.15)

            keggID = paste(kegg.species,pathwayID,sep="")
            file.rename(paste(keggID,"..multi.png",sep=""),
                        paste(keggk.name0,"_",dat1.name,"_",dat2.name,".png",sep=""))
            file.remove(paste(keggID,".xml",sep=""))
            file.remove(paste(keggID,".png",sep=""))
          }
        }
      }
    }
  }
  setwd(orig.path)

  if("reactomeView" %in% output){
    if(sum(grepl("Reactome",pathway.name))==0) {
      warning("No Reactome pathways")
    } else{
      source_python(system.file("ImageProcess.py", package = "CAMO"))

      #genes in mcmc.merge.list, pathway.list should both be the type shown on Reactome topology
      if(!is.null(Reactome.dataG2TopologyGtype)){
        #match mcmc.merge.list genes
        mcmcG = row.names(mcmc.merge.list[[1]])
        mcmcG.topology = Reactome.dataG2TopologyGtype[match(mcmcG,Reactome.dataG2TopologyGtype[,1]),2]
        na.index = which(is.na(mcmcG.topology))
        if(length(na.index) !=0){
          mcmcG.topology.rmna = mcmcG.topology[-na.index]
          mcmc.merge.list.topology = lapply(mcmc.merge.list, function(x) {
            x_rm = x[-na.index,]
            row.names(x_rm) = mcmcG.topology.rmna
            return(x_rm)
          })
        }else{
          mcmc.merge.list.topology = lapply(mcmc.merge.list, function(x) {
            row.names(x) = mcmcG.topology
            return(x)
          })
        }
        #match pathway genes
        select.pathway.list.topology = lapply(select.pathway.list, function(x){
          pathwayG.topology = Reactome.dataG2TopologyGtype[match(x,Reactome.dataG2TopologyGtype[,1]),2]
          pathwayG.topology = pathwayG.topology[-which(is.na(pathwayG.topology)|pathwayG.topology == "")]
          return(pathwayG.topology)
        })
      }else{
        message("Reactome.dataG2TopologyGtype is not provided, gene names are used to hightlight genes Reactome topology plots directly.")

        mcmc.merge.list.topology = mcmc.merge.list
        select.pathway.list.topology = select.pathway.list
      }

      if(is.null(Reactome.pathID2name)){
        message("Retrieve Reactome pathway IDs from KEGGREST...")

        pathid2name = as.list(reactomePATHID2NAME)
        pathid2name_species = pathid2name[grep(reactome.species,names(pathid2name))]
        Reactome.pathID2name = lapply(pathid2name_species, function(x) strsplit(x,": ")[[1]][2])
      }

      pathway.name.topology = names(select.pathway.list.topology)
      select.path.name = gsub("Reactome ","",pathway.name.topology[grep("Reactome",pathway.name.topology)])
      select.path.name.intersect = intersect(Reactome.pathID2name,select.path.name)
      select.path.id.intersect = names(Reactome.pathID2name)[match(select.path.name.intersect,Reactome.pathID2name)]

      if(length(select.path.id.intersect) == 0){
        print(paste0("No matched Reactome pathway IDs for pathways provided. Please check R package 'reactome.db' for correct pathway name."))
      }else{
        print(paste0("Matched ",length(select.path.id.intersect)," Reactome pathway IDs"))

        select.path.name.intersect2 = paste0("Reactome ",select.path.name.intersect)
        reactome.index = match(select.path.name.intersect2, pathway.name.topology)
        reactome.df = data.frame(ID = as.character(select.path.id.intersect),
                                 Genes = as.character(sapply(select.pathway.list.topology[select.path.name.intersect2], function(x) paste(x,collapse = " "))))

        if(is.null(ViewPairSelect)){
          data.pair = combn(dataset.names,2)
        }else{
          data.pair = combn(ViewPairSelect,2)
        }
        for(i in 1:length(select.path.id.intersect)){
          print(paste("reactomeView",i,sep=":"))
          aID = select.path.id.intersect[i]
          aname = select.path.name.intersect2[i]
          if(grepl("/|:|,", aname)) {
            aname1 = gsub("/|:|,", "", aname)
          } else {
            aname1 = aname
          }
          dir.path = paste0(orig.path,"/reactomeView/", aname1)
          if (!file.exists(dir.path)) dir.create(dir.path,recursive = T)
          setwd(dir.path)
          file.copy(from = system.file("pallete.jpeg", package = "CAMO"),
                    to   = getwd())
          for (j in 1:ncol(data.pair)) {
            tryCatch({
              # aID = select.path.id.intersect[i]
              # aname = select.path.name.intersect2[i]
              dat1.name = data.pair[1,j]
              dat2.name = data.pair[2,j]
              dat1 = mcmc.merge.list.topology[[match(dat1.name,dataset.names)]]
              dat2 = mcmc.merge.list.topology[[match(dat1.name,dataset.names)]]
              print(paste0(aID," ",dat1.name," ",dat2.name))
              overlap.genes0 = intersect(rownames(dat1),select.pathway.list.topology[[aname]])
              if(length(overlap.genes0) != 0){
                signPM.mat = cbind(apply(dat1[overlap.genes0,],1,mean),
                                   apply(dat2[overlap.genes0,],1,mean))
                signPM = data.frame(Genes = row.names(signPM.mat),signPM.mat)
                colnames(signPM) = c("Genes","dat1","dat2")


                CallFromR(signPM=signPM,ReactomePath=reactome.df,datadir=paste0(getwd(),"/"),pathwayID=aID)

                file.rename(paste(aID,"New.jpeg",sep=""),
                            paste(aID,"_",dat1.name,"_",dat2.name,".jpeg",sep=""))
                file.remove(paste(aID,".jpeg",sep=""))
                file.remove(paste(aID,"(1).jpeg",sep=""))
                file.remove(paste(aID,".sbgn",sep=""))
              }
            }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
          }
          file.remove("pallete.jpeg")
          setwd(orig.path)
        }
      }
    }
  }
  setwd(orig.path)

  print("Multiple pairs analysis completed.")

}

#internal functions for multiOutput:

clustPathway <- function(acsPvalue) {
  #require(ConsensusClusterPlus)
  #set your working dir, automatically save there

  #check if pearson correlation is applicable
  sd_check = apply(acsPvalue, 1, sd)
  if(!all(sd_check != 0) | ncol(acsPvalue) == 1){
    results = ConsensusClusterPlus(d=t(-log10(acsPvalue)),maxK=10,reps=50,pItem=0.8,pFeature=1,title="Pathway clustering",clusterAlg="hc",innerLinkage="ward.D2",finalLinkage="ward.D2",seed=15213,plot="png",distance = "euclidean")
    message("Exist pathways with zero standard deviation. Euclidean distance is used for clustring. Please consider a larger permutaion number if need a pearson distance.")

  }else{
    results = ConsensusClusterPlus(d=t(-log10(acsPvalue)),maxK=10,reps=50,pItem=0.8,pFeature=1,title="Pathway clustering",clusterAlg="hc",innerLinkage="ward.D2",finalLinkage="ward.D2",seed=15213,plot="png")
  }

  return(results) ## a list of K elements (each k represents number of clusters)
}

clustNumber <- function(results){
  Kvec = 2:length(results);
  x1 = 0.1; x2 = 0.9 # threshold defining the intermediate sub-interval
  PAC = rep(NA,length(Kvec))
  names(PAC) = paste("K=",Kvec,sep="") # from 2 to 10 (maxK)
  for(i in Kvec){
    M = results[[i]]$consensusMatrix
    Fn = ecdf(M[lower.tri(M)])
    PAC[i-1] = Fn(x2) - Fn(x1)
  }#end for i
  # The optimal K
  optK = Kvec[which.min(PAC)-2]
  return(optK)
}

ACS_ADS_DE_cluster = function(mcmc.merge.list,ACS_ADS_pathway,dataset.names,
                              select.pathway.list,cluster.assign,scatter.index,
                              plot.path=NULL){

  if(is.null(plot.path)){
    plot.path = getwd()
  }

  ACS_pvalue = ACS_ADS_pathway$ACSpvalue.mat
  ACSlog10p.mat = -log10(ACS_pvalue)
  ADS_pvalue = ACS_ADS_pathway$ADSpvalue.mat
  ADSlog10p.mat = -log10(ADS_pvalue)

  P <- nrow(ACSlog10p.mat)
  allgenes <- rownames(mcmc.merge.list[[1]])
  pm.list <- lapply(1:length(mcmc.merge.list), function(x)
    apply(mcmc.merge.list[[x]],1,mean))
  names(pm.list) <- dataset.names


  DEevid <- matrix(NA,P,length(dataset.names))
  rownames(DEevid) = row.names(ACS_pvalue)
  colnames(DEevid) = dataset.names
  for(j in 1:P){
    pathj <- rownames(ACSlog10p.mat)[j]
    genej <- select.pathway.list[[pathj]]
    intergenej <- intersect(genej,allgenes)

    for(ds in dataset.names){
      DEevid[j,ds] <-  mean(abs(pm.list[[ds]])[intergenej],na.rm=T)
    }
  }
  write.csv(DEevid,paste0(plot.path,"/DEevid_abs.csv"))

  cluster.lb = cluster.assign
  cluster.lb[scatter.index] = "scatter"
  K = length(unique(cluster.assign))

  dpairs = combn(dataset.names,2)
  dpairs.index = combn(length(dataset.names),2)
  Plist = lapply(1:ncol(dpairs), function(x) {
    ds1 <- dpairs[1,x]
    ds2 <- dpairs[2,x]
    DEevid1 = DEevid[,ds1]
    DEevid2 = DEevid[,ds2]
    ACSp <- ACS_pvalue[,paste(ds1,ds2,sep="_")]
    ADSp <- ADS_pvalue[,paste(ds1,ds2,sep="_")]

    #index_pos = index_neg =1:P
    #index_pos[-c(highlight_index_pos,highlight_index_neg)] = ""
    #index_neg[-c(highlight_index_pos,highlight_index_neg)] = ""

    plist = ACS_ADS_DE(ds1,ds2,DEevid1,DEevid2,ACSp,ADSp,cluster.lb,size.scale = 1)
    return(plist)
  })

  Pcordi = cbind(rep(seq(1:length(dataset.names)),each = length(dataset.names)),
                 rep(seq(1:length(dataset.names)),times = length(dataset.names)))
  Plist.org = list()
  for (i in 1:nrow(Pcordi)) {
    if (Pcordi[i,1] < Pcordi[i,2]){
      plist.index = which(sapply(1:ncol(dpairs.index),function(c) {
        all(dpairs.index[,c] == Pcordi[i,])
      }))
      Plist.org[[i]] = Plist[[plist.index]][[1]]
    } else if (Pcordi[i,1] > Pcordi[i,2]){
      plist.index = which(sapply(1:ncol(dpairs.index),function(c) {
        (dpairs.index[1,c] == Pcordi[i,2])&(dpairs.index[2,c] == Pcordi[i,1])
      }))
      Plist.org[[i]] = Plist[[plist.index]][[2]]
    } else {
      Plist.org[[i]] = grid::rectGrob(gp=gpar(fill="white"))
    }
  }
  lay = matrix(seq(1:length(Plist.org)),
               nrow = length(dataset.names),
               ncol = length(dataset.names), byrow = T)

  pdf(paste0(plot.path,"/multiPlot_ACS_ADS_DE_K",K,".pdf"),width = 50,height = 50)
  gridExtra::grid.arrange(grobs = Plist.org, layout_matrix = lay)
  dev.off()

}

mdsPathway <- function(acsPvalue,cluster.assign,scatter.index=NULL) {

  ## plot MDS for all pathways
  ## acsPvalue is a matrix of K (pathways) rows and choose(M,2) columns
  ## cluster.assign is the result from consensus clustering

  C <- length(unique(cluster.assign)) #number of clusters
  if(C>9){
    warning("Too many clusters, not enough colors")
  }

  dist.mat <-  dist(-log10(acsPvalue),method = "euclidean",
                    upper = TRUE, diag = TRUE)

  fit <- cmdscale(dist.mat,k=2)
  x <- fit[,1]
  y <- fit[,2]
  xlimit <- ifelse(abs(min(x))>abs(max(x)),abs(min(x)),abs(max(x)))
  ylimit <- ifelse(abs(min(y))>abs(max(y)),abs(min(y)),abs(max(y)))
  xcenter <- tapply(x,as.factor(cluster.assign),mean)
  ycenter <- tapply(y,as.factor(cluster.assign),mean)

  if(!is.null(scatter.index)){
    cluster.assign[scatter.index] <- -1
  }

  unique.color <- rainbow(C)
  unique.shape <- 1:C
  sizes <- shapes <- colors <- cluster.assign
  for(i in 1:(C+1)){
    colors[cluster.assign==i] <- unique.color[i]
    shapes[cluster.assign==i] <- unique.shape[i]
    sizes[cluster.assign==i] <- 2
    if(i== (C+1)){
      colors[cluster.assign== -1] <- "gray50"
      shapes[cluster.assign== -1] <- 20
      sizes[cluster.assign== -1] <- 1
    }
  }
  #pdf(paste("mdsPathway","_K_",C,".pdf",sep=""))
  jpeg(paste("mdsPathway","_K_",C,".jpeg",sep=""),quality = 100)

  p <- ggplot() +
    ggtitle("") +
    xlab("Coordinate 1") + ylab("Coordinate 2") +
    xlim(c(-xlimit,xlimit)) + ylim(c(-ylimit,ylimit)) +
    geom_point(aes(x, y), shape=shapes,
               color = colors ,size=sizes) +
    geom_point(aes(xcenter,ycenter),
               shape=unique.shape, color = unique.color,
               size =5) +
    theme_bw() +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          plot.title = element_text(size = 15, hjust=0.5,face="bold"),
          axis.text.x = element_text(size = 12),
          axis.text.y = element_text(size = 12))
  print(p)
  dev.off()
  return(p)
}

heatmapPathway <- function(acsPvalue, cluster.assign,scatter.index=NULL){

  ## cluster.assign is the result from consensus clustering
  acsPvalue <- data.matrix(acsPvalue)
  C <- length(unique(cluster.assign)) #number of clusters
  dataOrder <- -log10(acsPvalue)[unlist(sapply(1:C,function(x) which(cluster.assign==x))),]
  colnames(dataOrder) <- colnames(acsPvalue)
  row.sep <-  c(0,cumsum(unlist(sapply(1:C,function(x) sum(cluster.assign==x)))) )

  if(!is.null(scatter.index)){
    cluster.assign[scatter.index] <- -1
    dataOrder <- -log10(acsPvalue)[unlist(sapply(c(1:C,-1),function(x) which(cluster.assign==x))),]
    colnames(dataOrder) <- colnames(acsPvalue)
    row.sep <-  c(0,cumsum(unlist(sapply(c(1:C,-1),function(x) sum(cluster.assign==x)))) )
  }
  #rownames(dataOrder) <- sapply(rownames(dataOrder),function(x) substr(x,1,15))
  ordered.cluster.assign <- cluster.assign[rownames(dataOrder)]
  row.colors <- rep(NA, length(ordered.cluster.assign) )
  for(i in 1:length(row.colors)){
    if(ordered.cluster.assign[i]== -1){
      row.colors[i] <- "gray"
    } else {
      row.colors[i] <- rainbow(C)[ordered.cluster.assign[i]]
    }
  }
  #pdf(paste("heatmapPathway","_K_",C,".pdf",sep=""))
  jpeg(paste("heatmapPathway","_K_",C,".jpeg",sep=""),quality = 100)
  par(cex.main=1, font.lab=2, font.axis=2)
  hm<-gplots::heatmap.2(dataOrder, symm=F,main=NULL,
                cexCol=0.7,cexRow=0.3,adjCol= c(NA,-1),
                rowsep=row.sep,
                sepwidth=c(0.1, 0.3),  # width of the borders
                sepcolor=c('white'),scale='none',
                symbreaks=T,key=T, keysize=1,symkey=F,
                dendrogram=c('none'),density.info="none",
                trace="none",Rowv=F,Colv=T,
                srtCol=50,RowSideColors=row.colors,
                col=greenred,breaks=seq(0,max(dataOrder),by=0.01),
                key.ytickfun=function(){
                  side = 2
                } )
  dev.off()

  return(hm)

}

SA_algo <- function(acsPvaluePath,model.name,sep,Tm=10,P=0.5,
                    mu=0.9,epsilon=1e-5,N=1000,seed=12345){
  ## Total possible configurations = choose(n,1) + choose(n,2) + ...
  ## clustering models using SA algorithm
  ## delta.mat is a matrix of pairwise -log10(acsPvalue) of M rows and M columns
  ## with model names
  M <- length(model.name)
  distF <- -log10(acsPvaluePath)
  delta.mat <- matrix(NA,nrow=M,ncol=M)
  rownames(delta.mat) <- colnames(delta.mat) <- model.name

  for(i in 1:(M-1)){
    for(j in (i+1):M){
      name1 <- rownames(delta.mat)[i]
      name2 <- rownames(delta.mat)[j]
      delta.mat[name1,name2] <- delta.mat[name2,name1] <- distF[paste(name1,name2,sep=sep)]
    }
  }
  diag(delta.mat) <- max(delta.mat,na.rm=T)

  n <- nrow(delta.mat) # =M
  obs.name <- rownames(delta.mat)

  ## initialize
  hc <- hclust(d=dist((max(delta.mat)-delta.mat)^2))
  K <- 3
  a <- cutree(hc, k = K)
  names(a) <- obs.name
  delta.est <- Est_mean(delta.mat,a)
  names(delta.est) <- c(0,1:K)

  count <- 0 #initial
  Jc <- E_tot(delta.mat,a,delta.est)
  pi <- exp(-Jc/Tm) ## Boltzmann dist

  while((count < N) && (Tm >= epsilon)) {
    ##New trial
    u <- runif(1)
    if(u>0.5){
      a_new <- Split(a)
    } else{
      a_new <- Relocate(a)
    }
    names(a_new) <- obs.name
    K_new <- length(unique(a_new))
    delta.est_new <- Est_mean(delta.mat,a_new)
    Jn <- E_tot(delta.mat, a_new, delta.est_new)
    pi_new <- exp(-Jn/Tm)

    if(Jn < Jc) {
      ##accept
      Jc <- Jn;
      a <- a_new;
      K <- K_new;
      delta.est <- delta.est_new;
    } else {
      count <- count + 1;
      r <- min(1,pi_new/pi); ## acceptance prob.
      u <- runif(1);
      if(u>r) {
        ##not accept
        Tm <- Tm*mu
      } else {
        Jc <- Jn;
        a <- a_new;
        K <- K_new;
        delta.est <- delta.est_new;
      }
    }
  }
  return(a) #cluster assigned
}

clustModel <- function(acsPvaluePath,model.name, cluster.assign,pathway.name,sep){
  ## delta.mat is a matrix of pairwise -log(acsPvalue) of M rows and M columns
  ## with model names
  ## cluster.assign: results from SA algorithm
  ## pathway.name: pathway name

  M <- length(model.name)
  distF <- -log10(acsPvaluePath)
  delta.mat <- matrix(NA,nrow=M,ncol=M)
  rownames(delta.mat) <- colnames(delta.mat) <- model.name

  for(i in 1:(M-1)){
    for(j in (i+1):M){
      name1 <- rownames(delta.mat)[i]
      name2 <- rownames(delta.mat)[j]
      delta.mat[name1,name2] <- delta.mat[name2,name1] <- distF[paste(name1,name2,sep=sep)]
    }
  }
  diag(delta.mat) <- max(delta.mat,na.rm=T)

  if(grepl("/",pathway.name)){
    pathway.name <- gsub("/","-",pathway.name)
  }
  if(max(delta.mat)<1){
    breaks=seq(0,max(delta.mat),length.out = 10)
  }else{
    breaks=seq(0,round(max(delta.mat)),by=0.01)
  }
  png(paste(pathway.name,'.png',sep="_"))
  hm <- gplots::heatmap.2(delta.mat[order(cluster.assign),order(cluster.assign)],
                  main=pathway.name,
                  cexCol=1,cexRow=1,
                  colsep=cumsum(table(cluster.assign)),
                  rowsep=cumsum(table(cluster.assign)),
                  sepwidth=c(0.05, 0.05),  # width of the borders
                  sepcolor=c('white'),
                  symbreaks=T,key=T, keysize=1,symkey=F,
                  dendrogram=c('none'),density.info="none",
                  trace="none",Rowv=F,Colv=F,
                  srtCol=50, symm=F,
                  col=greenred,breaks = breaks)
  dev.off()
  return(hm)
}

clustModelOne <- function(acsPvaluePath,model.name, pathway.name,sep){
  ## delta.mat is a matrix of pairwise -log(acsPvalue) of M rows and M columns
  ## with model names
  ## cluster.assign: results from SA algorithm
  ## pathway.name: pathway name

  M <- length(model.name)
  distF <- -log10(acsPvaluePath)
  delta.mat <- matrix(NA,nrow=M,ncol=M)
  rownames(delta.mat) <- colnames(delta.mat) <- model.name

  for(i in 1:(M-1)){
    for(j in (i+1):M){
      name1 <- rownames(delta.mat)[i]
      name2 <- rownames(delta.mat)[j]
      delta.mat[name1,name2] <- delta.mat[name2,name1] <- distF[paste(name1,name2,sep=sep)]
    }
  }
  diag(delta.mat) <- max(delta.mat,na.rm=T)
  if(max(delta.mat)<1){
    breaks=seq(0,max(delta.mat),length.out = 10)
  }else{
    breaks=seq(0,round(max(delta.mat)),by=0.01)
  }
  if(grepl("/",pathway.name)){
    pathway.name <- gsub("/","-",pathway.name)
  }

  png(paste(pathway.name,'.png',sep="_"))
  hm <- gplots::heatmap.2(delta.mat,
                  main=pathway.name,
                  cexCol=1,cexRow=1,
                  symbreaks=T,key=T, keysize=1,symkey=F,
                  dendrogram=c('none'),density.info="none",
                  trace="none",Rowv=F,Colv=F,
                  srtCol=50, symm=F,
                  col=greenred,breaks=seq(0,round(max(delta.mat)),by=0.01) )
  dev.off()
  return(hm)
}

mdsModel <- function(acsPath,model.name,pathway.name,sep) {
  ## for each pathway, plot MDS for all models
  ## acsP is a vector of choose(M,2) elements (named in paste(name1,name2,sep=""))
  ## model.name is a vector of model names
  M <- length(model.name)
  distF <- ACStransform(acsPath)
  d <- matrix(NA,nrow=M,ncol=M)
  rownames(d) <- colnames(d) <- model.name

  for(i in 1:(M-1)){
    for(j in (i+1):M){
      name1 <- rownames(d)[i]
      name2 <- rownames(d)[j]
      d[name1,name2] <- d[name2,name1] <- distF[paste(name1,name2,sep=sep)]
    }
  }

  diag(d) <- 0
  dist <- as.dist(d,upper = TRUE, diag = TRUE)
  fit <- sammon(d=dist, y= jitter(cmdscale(dist, 2)), k=2) # k is the number of dim

  x <- fit$points[,1]
  y <- fit$points[,2]
  xlimit <- ifelse(abs(min(x))>abs(max(x)),abs(min(x)),abs(max(x)))
  ylimit <- ifelse(abs(min(y))>abs(max(y)),abs(min(y)),abs(max(y)))

  if(grepl("/",pathway.name)){
    pathway.name <- gsub("/","-",pathway.name)
  }

  color <- rainbow(M,s=0.5,v=1,alpha=1)
  png(paste(pathway.name,".png",sep=""))
  p<-ggplot() +
    ggtitle(pathway.name) +
    xlab("Coordinate 1") + ylab("Coordinate 2") +
    xlim(c(-xlimit-0.5,xlimit+0.5)) + ylim(c(-ylimit-0.5,ylimit+0.5)) +
    geom_point(aes(x, y), color = color  ,size=6) +
    geom_text_repel(aes(x, y, label = rownames(d),fontface="bold"),size=8) +
    theme(plot.title = element_text(size = 15, hjust=0.5,face="bold"),
          axis.text.x = element_text(size = 12),
          axis.text.y = element_text(size = 12))
  print(p)
  dev.off()
}

genePM <- function(signPM.list, pathway.genes, pathway.name){

  M <- length(signPM.list)
  model.name <- names(signPM.list)
  std.genes <- intersect(names(signPM.list[[1]]),pathway.genes)
  G <- length(std.genes)

  mat <- matrix(0,nrow=G,ncol=M)
  rownames(mat) <- std.genes
  colnames(mat) <- model.name

  for (m in 1:M){
    mat[std.genes,m] <- signPM.list[[m]][std.genes]
  }

  if(grepl("/",pathway.name)){
    pathway.name <- gsub("/","-",pathway.name)
  }

  pdf(paste(pathway.name,'.pdf',sep=""))
  #jpeg(paste(pathway.name,".jpeg",sep=""),quality = 100)
  p = pheatmap::pheatmap(mat, clustering_method = "complete",main=pathway.name,
                     color = colorRampPalette(c("green","grey","red"))(n = 499),
                     breaks = seq(-1,1,length.out = 500))
  print(p)
  dev.off()
  return(p)
}

ACStransform <- function(ACS, theta=7) {
  trun.ACS <-ifelse(ACS<0,0,ACS)
  trsf.ACS <- theta*exp(-theta*trun.ACS)
  return(trsf.ACS)
}

heatmap.3 <- function(x,
                      Rowv = TRUE, Colv = if (symm) "Rowv" else TRUE,
                      distfun = dist,
                      hclustfun = hclust,
                      dendrogram = c("both","row", "column", "none"),
                      symm = FALSE,
                      scale = c("none","row", "column"),
                      na.rm = TRUE,
                      revC = identical(Colv,"Rowv"),
                      add.expr,
                      breaks,
                      symbreaks = max(x < 0, na.rm = TRUE) || scale != "none",
                      col = "heat.colors",
                      colsep,
                      rowsep,
                      sepcolor = "white",
                      sepwidth = c(0.05, 0.05),
                      cellnote,
                      notecex = 1,
                      notecol = "cyan",
                      na.color = par("bg"),
                      trace = c("none", "column","row", "both"),
                      tracecol = "cyan",
                      hline = median(breaks),
                      vline = median(breaks),
                      linecol = tracecol,
                      margins = c(5,5),
                      ColSideColors,
                      RowSideColors,
                      side.height.fraction=0.3,
                      cexRow = 0.2 + 1/log10(nr),
                      cexCol = 0.2 + 1/log10(nc),
                      labRow = NULL,
                      labCol = NULL,
                      key = TRUE,
                      keysize = 1.5,
                      density.info = c("none", "histogram", "density"),
                      denscol = tracecol,
                      symkey = max(x < 0, na.rm = TRUE) || symbreaks,
                      densadj = 0.25,
                      main = NULL,
                      xlab = NULL,
                      ylab = NULL,
                      lmat = NULL,
                      lhei = NULL,
                      lwid = NULL,
                      ColSideColorsSize = 1,
                      RowSideColorsSize = 1,
                      KeyValueName="Value",...){

  invalid <- function (x) {
    if (missing(x) || is.null(x) || length(x) == 0)
      return(TRUE)
    if (is.list(x))
      return(all(sapply(x, invalid)))
    else if (is.vector(x))
      return(all(is.na(x)))
    else return(FALSE)
  }

  x <- as.matrix(x)
  scale01 <- function(x, low = min(x), high = max(x)) {
    x <- (x - low)/(high - low)
    x
  }
  retval <- list()
  scale <- if (symm && missing(scale))
    "none"
  else match.arg(scale)
  dendrogram <- match.arg(dendrogram)
  trace <- match.arg(trace)
  density.info <- match.arg(density.info)
  if (length(col) == 1 && is.character(col))
    col <- get(col, mode = "function")
  if (!missing(breaks) && (scale != "none"))
    warning("Using scale=\"row\" or scale=\"column\" when breaks are",
            "specified can produce unpredictable results.", "Please consider using only one or the other.")
  if (is.null(Rowv) || is.na(Rowv))
    Rowv <- FALSE
  if (is.null(Colv) || is.na(Colv))
    Colv <- FALSE
  else if (Colv == "Rowv" && !isTRUE(Rowv))
    Colv <- FALSE
  if (length(di <- dim(x)) != 2 || !is.numeric(x))
    stop("`x' must be a numeric matrix")
  nr <- di[1]
  nc <- di[2]
  if (nr <= 1 || nc <= 1)
    stop("`x' must have at least 2 rows and 2 columns")
  if (!is.numeric(margins) || length(margins) != 2)
    stop("`margins' must be a numeric vector of length 2")
  if (missing(cellnote))
    cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x))
  if (!inherits(Rowv, "dendrogram")) {
    if (((!isTRUE(Rowv)) || (is.null(Rowv))) && (dendrogram %in%
                                                 c("both", "row"))) {
      if (is.logical(Colv) && (Colv))
        dendrogram <- "column"
      else dedrogram <- "none"
      warning("Discrepancy: Rowv is FALSE, while dendrogram is `",
              dendrogram, "'. Omitting row dendogram.")
    }
  }
  if (!inherits(Colv, "dendrogram")) {
    if (((!isTRUE(Colv)) || (is.null(Colv))) && (dendrogram %in%
                                                 c("both", "column"))) {
      if (is.logical(Rowv) && (Rowv))
        dendrogram <- "row"
      else dendrogram <- "none"
      warning("Discrepancy: Colv is FALSE, while dendrogram is `",
              dendrogram, "'. Omitting column dendogram.")
    }
  }
  if (inherits(Rowv, "dendrogram")) {
    ddr <- Rowv
    rowInd <- order.dendrogram(ddr)
  }
  else if (is.integer(Rowv)) {
    hcr <- hclustfun(distfun(x))
    ddr <- as.dendrogram(hcr)
    ddr <- reorder(ddr, Rowv)
    rowInd <- order.dendrogram(ddr)
    if (nr != length(rowInd))
      stop("row dendrogram ordering gave index of wrong length")
  }
  else if (isTRUE(Rowv)) {
    Rowv <- rowMeans(x, na.rm = na.rm)
    hcr <- hclustfun(distfun(x))
    ddr <- as.dendrogram(hcr)
    ddr <- reorder(ddr, Rowv)
    rowInd <- order.dendrogram(ddr)
    if (nr != length(rowInd))
      stop("row dendrogram ordering gave index of wrong length")
  }
  else {
    rowInd <- nr:1
  }
  if (inherits(Colv, "dendrogram")) {
    ddc <- Colv
    colInd <- order.dendrogram(ddc)
  }
  else if (identical(Colv, "Rowv")) {
    if (nr != nc)
      stop("Colv = \"Rowv\" but nrow(x) != ncol(x)")
    if (exists("ddr")) {
      ddc <- ddr
      colInd <- order.dendrogram(ddc)
    }
    else colInd <- rowInd
  }
  else if (is.integer(Colv)) {
    hcc <- hclustfun(distfun(if (symm)
      x
      else t(x)))
    ddc <- as.dendrogram(hcc)
    ddc <- reorder(ddc, Colv)
    colInd <- order.dendrogram(ddc)
    if (nc != length(colInd))
      stop("column dendrogram ordering gave index of wrong length")
  }
  else if (isTRUE(Colv)) {
    Colv <- colMeans(x, na.rm = na.rm)
    hcc <- hclustfun(distfun(if (symm)
      x
      else t(x)))
    ddc <- as.dendrogram(hcc)
    ddc <- reorder(ddc, Colv)
    colInd <- order.dendrogram(ddc)
    if (nc != length(colInd))
      stop("column dendrogram ordering gave index of wrong length")
  }
  else {
    colInd <- 1:nc
  }
  retval$rowInd <- rowInd
  retval$colInd <- colInd
  retval$call <- match.call()
  x <- x[rowInd, colInd]
  x.unscaled <- x
  cellnote <- cellnote[rowInd, colInd]
  if (is.null(labRow))
    labRow <- if (is.null(rownames(x)))
      (1:nr)[rowInd]
  else rownames(x)
  else labRow <- labRow[rowInd]
  if (is.null(labCol))
    labCol <- if (is.null(colnames(x)))
      (1:nc)[colInd]
  else colnames(x)
  else labCol <- labCol[colInd]
  if (scale == "row") {
    retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)
    x <- sweep(x, 1, rm)
    retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)
    x <- sweep(x, 1, sx, "/")
  }
  else if (scale == "column") {
    retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)
    x <- sweep(x, 2, rm)
    retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)
    x <- sweep(x, 2, sx, "/")
  }
  if (missing(breaks) || is.null(breaks) || length(breaks) < 1) {
    if (missing(col) || is.function(col))
      breaks <- 16
    else breaks <- length(col) + 1
  }
  if (length(breaks) == 1) {
    if (!symbreaks)
      breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm),
                    length = breaks)
    else {
      extreme <- max(abs(x), na.rm = TRUE)
      breaks <- seq(-extreme, extreme, length = breaks)
    }
  }
  nbr <- length(breaks)
  ncol <- length(breaks) - 1
  if (class(col) == "function")
    col <- col(ncol)
  min.breaks <- min(breaks)
  max.breaks <- max(breaks)
  x[x < min.breaks] <- min.breaks
  x[x > max.breaks] <- max.breaks
  if (missing(lhei) || is.null(lhei))
    lhei <- c(keysize, 4)
  if (missing(lwid) || is.null(lwid))
    lwid <- c(keysize, 4)
  if (missing(lmat) || is.null(lmat)) {
    lmat <- rbind(4:3, 2:1)

    if (!missing(ColSideColors)) {
      #if (!is.matrix(ColSideColors))
      #stop("'ColSideColors' must be a matrix")
      if (!is.character(ColSideColors) || nrow(ColSideColors) != nc)
        stop("'ColSideColors' must be a matrix of nrow(x) rows")
      lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] + 1)
      #lhei <- c(lhei[1], 0.2, lhei[2])
      lhei=c(lhei[1], side.height.fraction*ColSideColorsSize/2, lhei[2])
    }

    if (!missing(RowSideColors)) {
      #if (!is.matrix(RowSideColors))
      #stop("'RowSideColors' must be a matrix")
      if (!is.character(RowSideColors) || ncol(RowSideColors) != nr)
        stop("'RowSideColors' must be a matrix of ncol(x) columns")
      lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) - 1), 1), lmat[,2] + 1)
      #lwid <- c(lwid[1], 0.2, lwid[2])
      lwid <- c(lwid[1], side.height.fraction*RowSideColorsSize/2, lwid[2])
    }
    lmat[is.na(lmat)] <- 0
  }

  if (length(lhei) != nrow(lmat))
    stop("lhei must have length = nrow(lmat) = ", nrow(lmat))
  if (length(lwid) != ncol(lmat))
    stop("lwid must have length = ncol(lmat) =", ncol(lmat))
  op <- par(no.readonly = TRUE)
  on.exit(par(op))

  layout(lmat, widths = lwid, heights = lhei, respect = FALSE)

  if (!missing(RowSideColors)) {
    if (!is.matrix(RowSideColors)){
      par(mar = c(margins[1], 0, 0, 0.5))
      image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)
    } else {
      par(mar = c(margins[1], 0, 0, 0.5))
      rsc = t(RowSideColors[,rowInd, drop=F])
      rsc.colors = matrix()
      rsc.names = names(table(rsc))
      rsc.i = 1
      for (rsc.name in rsc.names) {
        rsc.colors[rsc.i] = rsc.name
        rsc[rsc == rsc.name] = rsc.i
        rsc.i = rsc.i + 1
      }
      rsc = matrix(as.numeric(rsc), nrow = dim(rsc)[1])
      image(t(rsc), col = as.vector(rsc.colors), axes = FALSE)
      if (length(rownames(RowSideColors)) > 0) {
        axis(1, 0:(dim(rsc)[2] - 1)/max(1,(dim(rsc)[2] - 1)), rownames(RowSideColors), las = 2, tick = FALSE)
      }
    }
  }

  if (!missing(ColSideColors)) {

    if (!is.matrix(ColSideColors)){
      par(mar = c(0.5, 0, 0, margins[2]))
      image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)
    } else {
      par(mar = c(0.5, 0, 0, margins[2]))
      csc = ColSideColors[colInd, , drop=F]
      csc.colors = matrix()
      csc.names = names(table(csc))
      csc.i = 1
      for (csc.name in csc.names) {
        csc.colors[csc.i] = csc.name
        csc[csc == csc.name] = csc.i
        csc.i = csc.i + 1
      }
      csc = matrix(as.numeric(csc), nrow = dim(csc)[1])
      image(csc, col = as.vector(csc.colors), axes = FALSE)
      if (length(colnames(ColSideColors)) > 0) {
        axis(2, 0:(dim(csc)[2] - 1)/max(1,(dim(csc)[2] - 1)), colnames(ColSideColors), las = 2, tick = FALSE)
      }
    }
  }

  par(mar = c(margins[1], 0, 0, margins[2]))
  x <- t(x)
  cellnote <- t(cellnote)
  if (revC) {
    iy <- nr:1
    if (exists("ddr"))
      ddr <- rev(ddr)
    x <- x[, iy]
    cellnote <- cellnote[, iy]
  }
  else iy <- 1:nr
  image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + c(0, nr), axes = FALSE, xlab = "", ylab = "", col = col, breaks = breaks, ...)
  retval$carpet <- x
  if (exists("ddr"))
    retval$rowDendrogram <- ddr
  if (exists("ddc"))
    retval$colDendrogram <- ddc
  retval$breaks <- breaks
  retval$col <- col
  if (!invalid(na.color) & any(is.na(x))) { # load library(gplots)
    mmat <- ifelse(is.na(x), 1, NA)
    image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "",
          col = na.color, add = TRUE)
  }
  axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0,
       cex.axis = cexCol)
  if (!is.null(xlab))
    mtext(xlab, side = 1, line = margins[1] - 1.25)
  axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0,
       cex.axis = cexRow)
  if (!is.null(ylab))
    mtext(ylab, side = 4, line = margins[2] - 1.25)
  if (!missing(add.expr))
    eval(substitute(add.expr))
  if (!missing(colsep))
    for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0, length(csep)), xright = csep + 0.5 + sepwidth[1], ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1, col = sepcolor, border = sepcolor)
  if (!missing(rowsep))
    for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) + 1 - rsep) - 0.5, xright = nrow(x) + 1, ytop = (ncol(x) + 1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1, col = sepcolor, border = sepcolor)
  min.scale <- min(breaks)
  max.scale <- max(breaks)
  x.scaled <- scale01(t(x), min.scale, max.scale)
  if (trace %in% c("both", "column")) {
    retval$vline <- vline
    vline.vals <- scale01(vline, min.scale, max.scale)
    for (i in colInd) {
      if (!is.null(vline)) {
        abline(v = i - 0.5 + vline.vals, col = linecol,
               lty = 2)
      }
      xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5
      xv <- c(xv[1], xv)
      yv <- 1:length(xv) - 0.5
      lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
    }
  }
  if (trace %in% c("both", "row")) {
    retval$hline <- hline
    hline.vals <- scale01(hline, min.scale, max.scale)
    for (i in rowInd) {
      if (!is.null(hline)) {
        abline(h = i + hline, col = linecol, lty = 2)
      }
      yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5
      yv <- rev(c(yv[1], yv))
      xv <- length(yv):1 - 0.5
      lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
    }
  }
  if (!missing(cellnote))
    text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote),
         col = notecol, cex = notecex)
  par(mar = c(margins[1], 0, 0, 0))
  if (dendrogram %in% c("both", "row")) {
    plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
  }
  else plot.new()
  par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2]))
  if (dendrogram %in% c("both", "column")) {
    plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
  }
  else plot.new()
  if (!is.null(main))
    title(main, cex.main = 1.5 * op[["cex.main"]])
  if (key) {
    par(mar = c(5, 4, 2, 1), cex = 0.75)
    tmpbreaks <- breaks
    if (symkey) {
      max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)
      min.raw <- -max.raw
      tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)
      tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)
    }
    else {
      min.raw <- min(x, na.rm = TRUE)
      max.raw <- max(x, na.rm = TRUE)
    }

    z <- seq(min.raw, max.raw, length = length(col))
    image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks,
          xaxt = "n", yaxt = "n")
    par(usr = c(0, 1, 0, 1))
    lv <- pretty(breaks)
    xv <- scale01(as.numeric(lv), min.raw, max.raw)
    axis(1, at = xv, labels = lv)
    if (scale == "row")
      mtext(side = 1, "Row Z-Score", line = 2)
    else if (scale == "column")
      mtext(side = 1, "Column Z-Score", line = 2)
    else mtext(side = 1, KeyValueName, line = 2)
    if (density.info == "density") {
      dens <- density(x, adjust = densadj, na.rm = TRUE)
      omit <- dens$x < min(breaks) | dens$x > max(breaks)
      dens$x <- dens$x[-omit]
      dens$y <- dens$y[-omit]
      dens$x <- scale01(dens$x, min.raw, max.raw)
      lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol,
            lwd = 1)
      axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y))
      title("Color Key\nand Density Plot")
      par(cex = 0.5)
      mtext(side = 2, "Density", line = 2)
    }
    else if (density.info == "histogram") {
      h <- hist(x, plot = FALSE, breaks = breaks)
      hx <- scale01(breaks, min.raw, max.raw)
      hy <- c(h$counts, h$counts[length(h$counts)])
      lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s",
            col = denscol)
      axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))
      title("Color Key\nand Histogram")
      par(cex = 0.5)
      mtext(side = 2, "Count", line = 2)
    }
    else title("Color Key")
  }
  else plot.new()
  retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)],
                                  high = retval$breaks[-1], color = retval$col)
  invisible(retval)
}
CAMO-R/Rpackage documentation built on July 20, 2023, 6:04 a.m.