R/plottingFuncs.R

Defines functions plotTreeCategorical plotTreeHighlightBranches nvmaster plotRers returnRersAsTreesAll returnRersAsNewickStrings returnRersAsTree treePlotGG treePlotRers treePlotNew treePlot

Documented in plotRers plotTreeCategorical plotTreeHighlightBranches returnRersAsNewickStrings returnRersAsTree returnRersAsTreesAll treePlotNew treePlotRers

#' RERconverge
#'
#'
#' @docType package
#' @author
#' @import Rcpp
#' @importFrom Rcpp evalCpp
#' @useDynLib RERconverge
#' @name RERconverge
#comment everything out

require(dplyr)
require(ggplot2)
require(ggtree)

if(F){
multiplot = function(..., plotlist=NULL, file, cols=1, layout=NULL, widths=NULL, heights=NULL, flip=F) {
  require(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  if(flip){
    layout=t(layout)
  }
  if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()



    if(!is.null(widths)){
      widths=widths
    }
    else{
      widths = unit(rep_len(1, ncol(layout)), "null")
    }
    if(!is.null(heights)){
      heights=heights
    }
    else{
      heights = unit(rep_len(1, nrow(layout)), "null")
    }
    show(widths)
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout), widths = widths, heights = heights )))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

plotAllHists=function(resAll){
  plotlist=list()
  count=0;
  for(i in 1:length(resAll$res)){
    count=count+1
    p=resAll$res[[i]]$P
    s= sign(resAll$res[[i]]$Rho)
    ii=which(s!=0)
    p=p[ii]
    s=s[ii]
    plotlist[[count]]=plotHists(p, as.factor(s))+ggtitle(names(resAll$res)[i])
  }
  return(plotlist)

}

plotAllHistsWithPerm=function(resAll, resAllSim, resAllPerm=NULL, updown=T){
  plotlist=list()
  count=1;
  for(i in 1:length(resAll$res)){
    #  count=count+1
    realp=resAll$res[[i]]$P
    reals= sign(resAll$res[[i]][[1]])
    ii=which(reals!=0)
    realp=realp[ii]
    reals=reals[ii]


    simp=resAllSim[[i]]
    sims=sign(simp)
    simp=abs(simp)
    if(updown){
      show("UpDown")
      if(!is.null(resAllPerm)){
        permp=resAllPerm[[i]]
        perms=sign(permp)
        permp=abs(permp)

        vals=c(tmp1<-realp[reals>0], tmp2<-permp[perms>0], tmp3<-simp[sims>0])
        vals.grp=ordered(rep(c("Real", "Permuted","Simulated"), c(length(tmp1), length(tmp2),length(tmp3)), levels=c("Real", "Simulated","Permuted")))
      }
      else{
        vals=c(tmp1<-realp[reals>0], tmp3<-simp[sims>0])
        vals.grp=rep(c("Real","Simulated"), c(length(tmp1), length(tmp3)))
      }

      plotlist[[count]]=plotHists(vals, as.factor(vals.grp))+ggtitle(paste(names(resAll$res)[i], "Positive"))+theme_bw()+ theme(legend.position="none")
      count=count+1

      if(!is.null(resAllPerm)){
        vals=c(tmp1<-realp[reals<0], tmp2<-permp[perms<0], tmp3<-simp[sims<0])
        vals.grp=ordered(rep(c("Real", "Permuted","Simulated"), c(length(tmp1), length(tmp2),length(tmp3)), levels=c("Real", "Simulated","Permuted")))
      }
      else{
        vals=c(tmp1<-realp[reals<0], tmp3<-simp[sims<0])
        vals.grp=rep(c("Real","Simulated"), c(length(tmp1), length(tmp3)))
      }

      plotlist[[count]]=plotHists(vals, as.factor(vals.grp))+ggtitle(paste(names(resAll$res)[i], "Negative"))+theme_bw()
      count=count+1

    }

    else{
      show("All")
      if(!is.null(resAllPerm)){
        permp=resAllPerm[[i]]
        perms=sign(permp)
        permp=abs(permp)

        vals=c(tmp1<-realp[], tmp2<-permp[], tmp3<-simp[])
        vals.grp=ordered(rep(c("Real", "Permuted","Simulated"), c(length(tmp1), length(tmp2),length(tmp3)), levels=c("Real", "Simulated","Permuted")))
      }
      else{
        vals=c(tmp1<-realp[], tmp3<-simp[])
        vals.grp=rep(c("Real","Simulated"), c(length(tmp1), length(tmp3)))
      }
      show(count)
      plotlist[[count]]=plotHists(vals, as.factor(vals.grp))+ggtitle(paste(names(resAll$res)[i], "All"))+theme_bw()
      count=count+1
      show(count)
    }
  }
  return(plotlist)

}


plotHists=function(data, grp, ...){
  ii=which(is.na(data))
  if(length(ii)>0){
    data=data[-ii]
    grp=grp[-ii]
  }
  x=data.frame(data=data)
  x$grp=grp
  ggplot(x, aes(data, fill=grp))+geom_histogram(aes(y=..density..),position="dodge",  alpha=1,right=T, origin=0, ...)
}



plotContinuousChar=function(gene, treeObj, tip.vals, tip.vals.ref=NULL, rank=F, nlevels=8, type="c", col=NULL, residfun=residLO, useDiff=T){
  #get the tree projection
  tip.vals=tip.vals[!is.na(tip.vals)]
  op <- par(no.readonly = TRUE)
  on.exit(par(op))
  stopifnot(gene %in% names(treeObj$trees))
  tree=treeObj$trees[[gene]]
  stopifnot(!is.null(names(tip.vals)))
  both=intersect(tree$tip.label, names(tip.vals))

  stopifnot(length(both)>10)


  torm=setdiff(treeObj$masterTree$tip.label, both)
  tree=unroot(pruneTree(tree, both))
  allreport=treeObj$report[,both]
  ss=rowSums(allreport)
  iiboth=which(ss==length(both))


  ee=edgeIndexRelativeMaster(tree, treeObj$masterTree)
  ii= match(namePaths(ee,T), colnames(treeObj$paths))

  allbranch=treeObj$paths[iiboth,ii]

  allbranch=scaleMat_c(allbranch)
  show(sum(is.na(allbranch)))
  nv=projection(t(allbranch), method="AVE", returnNV = T)

  proj=residfun(tree$edge.length, nv)
  treeChar=edgeVars(tree, tip.vals, useDiff=useDiff)


  nn=nameEdges(tree)
  nn[nn!=""]=speciesNames[nn[nn!=""], ]
  par(mfrow=c(2,2), mai=rep(0.7,4))

  plotWtext(sqrt(nv), sqrt(tree$edge.length), xlab="char", ylab="Gene branch length", labels = nn)
  if(!is.null(tip.vals.ref)){
    treeCharRef=edgeVars(tree, tip.vals.ref, useDiff=useDiff)
    proj=resid(rbind(proj), model.matrix(~1+treeCharRef$edge.length))[1,]
  }
  plotWtext(treeChar$edge.length, proj, xlab="char", ylab="Gene branch length", labels = nn)
  stat=cor.test(treeChar$edge.length, proj, method="s")
  mtext(paste0("r=", round(stat$estimate,2), ";  p-value=", format.pval(stat$p.value)), side = 3, line=1)



  tree$tip.label=speciesNames[tree$tip,1]
  col=colorpanel(nlevels, "blue", "red", "yellow3")


  vals=treeChar$edge.length
  if(rank){
    vals=rank(vals)
  }
  plot.phylo(tree, use.edge.length = F,type=type,edge.color=col[cut(vals, nlevels)], edge.width=3.5, lab4ut="axial", cex=0.6)
  title("character")

  vals=proj
  if(rank){
    vals=rank(vals)
  }
  plot.phylo(tree, use.edge.length = F,type=type,edge.color=col[cut(vals, nlevels)], edge.width=3.5, lab4ut="axial", cex=0.6)
  title("projection")



}

treePlot=function(tree, vals=NULL,rank=F, nlevels=5, type="c", col=NULL){
  op <- par(no.readonly = TRUE)
  on.exit(par(op))
  if(is.null(vals)){
    vals=tree$edge.length
  }
  vals=as.numeric(vals)
  if(rank){
    vals=rank(vals)
  }
  layout(matrix(c(1,2), ncol=1),heights=c(10,2))
  if(is.null(col)){
    col=colorpanel(nlevels, "blue", "red")
  }
  tree$tip.label=speciesNames[tree$tip,1]
  plot.phylo(tree, use.edge.length = F,type=type,edge.color=col[cut(vals, nlevels)], edge.width=3.5, lab4ut="axial", cex=0.6)


  min.raw <- min(vals, na.rm = TRUE)
  max.raw <- max(vals, na.rm = TRUE)
  z <- seq(min.raw, max.raw, length = length(col))

  par(mai=c(1,0.5,0,0.5))
  image(z = matrix(z, ncol = 1), col = col, breaks = seq(min.raw, max.raw, length.out=nlevels+1),
        xaxt = "n", yaxt = "n")
  #par(usr = c(0, 1, 0, 1))
  lv <- pretty(seq(min.raw, max.raw, length.out=nlevels+1))
  scale01 <- function(x, low = min(x), high = max(x)) {
    x <- (x - low)/(high - low)
    x
  }
  xv <- scale01(as.numeric(lv), min.raw, max.raw)
  axis(1, at = xv, labels = lv)
}

}
treePlot=function(tree, vals=NULL,rank=F, nlevels=5, type="c", col=NULL){
  op <- par(no.readonly = TRUE)
  on.exit(par(op))
  if(is.null(vals)){
    vals=tree$edge.length
  }
  vals=as.numeric(vals)
  if(rank){
    vals=rank(vals)
  }
  layout(matrix(c(1,2), ncol=1),heights=c(10,2))
  if(is.null(col)){
    col=colorpanel(nlevels, "blue", "red")
  }
  #tree$tip.label=speciesNames[tree$tip,1]
  plot.phylo(tree, use.edge.length = F,type=type,edge.color=col[cut(vals, nlevels)], edge.width=3.5, lab4ut="axial", cex=0.6)


  min.raw <- min(vals, na.rm = TRUE)
  max.raw <- max(vals, na.rm = TRUE)
  z <- seq(min.raw, max.raw, length = length(col))

  par(mai=c(1,0.5,0,0.5))
  image(z = matrix(z, ncol = 1), col = col, breaks = seq(min.raw, max.raw, length.out=nlevels+1),
        xaxt = "n", yaxt = "n")
  #par(usr = c(0, 1, 0, 1))
  lv <- pretty(seq(min.raw, max.raw, length.out=nlevels+1))
  scale01 <- function(x, low = min(x), high = max(x)) {
    x <- (x - low)/(high - low)
    x
  }
  xv <- scale01(as.numeric(lv), min.raw, max.raw)
  axis(1, at = xv, labels = lv)
}

#Version of tree plotting function with more options for colors and tip labels
#Used for plotting trees for PON1 manuscript
#Issue with invalid graphics state after running: Solve by only plotting legend if
#margins are not too large (how to test?)

#' Plot `tree` with branch labels colored in a heatmap based on values in `vals`
#'
#' @param tree. A phylo object, used for topology
#' @param vals. Values to use for heatmap to color branches, in same order as edges of tree
#' @param rank. Whether to plot ranks instead of values
#' @param nlevels. How many colors to use in the heatmap
#' @param type. Type for plot.phylo
#' @param col. Vector of user-defined colors
#' @param maintitle. Main title label
#' @param useedge. Whether to use edge lengths in `tree` for plotting
#' @param doreroot. Whether to re-root the tree before  plotting
#' @param rerootby. If re-rooting, what to use to root the tree (verify by checking against unrooted plot)
#' @param useSpecies. A vector of species to include in the plot (verify by checking against full set)
#' @param species.names. Data fram for converting names in `tree` to names to be plotted
#' (row names should be tip labels of tree, and first column should contain the
#' corresponding desired tip labels)
#' @param speclist1. A vector of tip labels to highlight in bold, blue text
#' @param speclist2. A vector of tip labels to which to add an asterisk
#' @param aligntip. Whether to align tip labels (default FALSE)
#' @param colpan1. Color for lowest value in heatmap (default blue)
#' @param colpan2. Color for highest value in heatmap (default rose)
#' @param colpanmid. Color for middle value in heatmap (default gray)
#' @param plotspecies. A vector of tip labels to display on the tree (the remainder will be masked,
#' but the corresponding tips will be plotted on the tree)
#' @param edgetype. Vector of line type for edges.
#' @param textsize. cex value for tip labels (default 0.6)
#' @param colbarlab. Label for the color bar legend
#' @param splist2sym. A value within species names to display as a symbol
#' @param dolegend. Whether to display the heatmap legend.
#' @param nacol. Color to display for any edges with length NA
#' @param figwid. Adjust x limits of plot.phylo by 1/figwid. May be related to figure width
#' (requires some optimization)
#' @param .... further arguments to be passed to `plot` or to `plot.phylo`
#' @return Plots a cladogram of `tree` with branch colors determined by `vals`
#' @export

treePlotNew=function(tree, vals=NULL, rank=F, nlevels=9, type="c", col=NULL,
                     maintitle= NULL, useedge=F, doreroot=F, rerootby=NULL, useSpecies=NULL,
                     species.names=NULL, speclist1=NULL, speclist2=NULL, aligntip=F,
                    colpan1=rgb(0,119,187,maxColorValue=255),
                    colpan2=rgb(204,51,17,maxColorValue=255),
                    colpanmid=rgb(187,187,187,maxColorValue=255),plotspecies=NULL,
                    edgetype=NULL,textsize=0.6,colbarlab="",splist2sym="psi",
                    dolegend=T,nacol=rgb(0,0,0),figwid=10,...){
 #bold speclist1, star speclist2
 #reroot before plotting to match up vals
  require(gplots)
  if(is.null(vals)){
    vals=tree$edge.length
  }
  vals=as.numeric(vals)
  if (rank) {
    vals = rank(vals)
  }
  faketree=tree
  faketree$edge.length=vals
  #layout(matrix(c(1,2), ncol=1),heights=c(10,2))
  #layout(matrix(c(1,2), ncol=1),heights=c(7,1))
  if(is.null(col)){
    if (is.null(colpanmid)) {
      col=colorpanel(nlevels, colpan1, colpan2)
    } else {
      col=colorpanel(nlevels, colpan1, colpanmid, colpan2)
    }
  }
  if (!is.null(useSpecies)) {
    message("Pruning may influence color plotting; greater accuracy with branches displayed as NA")
    tree=pruneTree(tree,useSpecies)
    faketree=pruneTree(tree,useSpecies)
    vals=faketree$edge.length
  }
  if (doreroot) {
    message("Rooting may influence color plotting; greater accuracy with original topology")
    rerootby=intersect(rerootby,tree$tip.label)
    if (length(rerootby) > 0) {
        tree=root(tree,rerootby,resolve.root=T)
        #want to distribute vals along branches instead
        faketree=root(faketree,rerootby,resolve.root=T)
        vals=faketree$edge.length
    } else {
      print("No species in rerootby in tree! Leaving tree unrooted.")
    }
  }
  if (!is.null(species.names)) {
    for(s in 1:length(tree$tip.label)) {
      if (tree$tip.label[s] %in% row.names(species.names)) {
        tree$tip.label[s] <- species.names[,1][which(row.names(species.names)==tree$tip.label[s])]
        }
    }
  }
  pfonts <- c(rep(1,length(tree$tip.label)))
  tipcol <- c(rep("black",length(tree$tip.label)))
  if (!is.null(speclist1)) {
    pfonts[which(tree$tip.label %in% speclist1)] <- 2
    tipcol[which(tree$tip.label %in% speclist1)] <- "blue"
  }
  if (! is.null(plotspecies)) { #only plot certain species names by making others white
    tipcol[which(tree$tip.label %in% plotspecies == FALSE)] <- "white"
  }
  if (!is.null(speclist2)) {
    toadd <- which(tree$tip.label %in% speclist2)
    tree$tip.label[toadd] <- str_replace_all(tree$tip.label[toadd]," ","~")
    tree$tip.label[toadd] <- as.expression(parse(text=paste(tree$tip.label[toadd],"~",splist2sym,sep="")))
  }
  if (is.null(edgetype)) {
    edgetype = c(rep(1,length(vals))) #does not play well with rerootby
  }
  calcoff <- quantile(tree$edge.length[tree$edge.length>0],0.25,na.rm=T)
  if (min(vals,na.rm=T)>=0) {
    forbreaks = quantile(vals, probs = seq(0,1, length.out = nlevels+1), na.rm=T)
  } else {
    #separately estimate quantiles below and above 0
    negvals = vals[vals < 0]
    posvals = vals[vals >= 0]
    negbreaks = quantile(negvals, probs = seq(0,1, length.out = nlevels+1),
                         na.rm=T)[seq(1,nlevels+1,2)]
    posbreaks = quantile(posvals, probs = seq(0,1, length.out = nlevels+1),
                         na.rm=T)[seq(nlevels %% 2 + 1,nlevels+1,2)]
    forbreaks = unique(c(negbreaks,posbreaks))
  }
  eccalc = col[cut(vals, breaks = forbreaks, include.lowest = T, right = T)]
  edgetype[which(is.na(eccalc))] = 3 #set na branches to dashed
  eccalc[which(is.na(eccalc))] = nacol #set na branches to some other color (black?)

  #Plotting horizontally messes up the margins, so re-set x limits *before* setting layout
  #dev.new(width=10,height=9)
  pdf()
  dev.control('enable')
  layout(matrix(c(1,2), nrow=1),widths=c(5,1)) #switching to horizontal
  par(mar=c(2,1,0,0.2)+0.1)
  par(omi=c(0,0,0,0.0001))
  forx = plot.phylo(tree, use.edge.length = useedge,type=type,
                       edge.width=4, edge.lty=edgetype,lab4ut="axial", cex=textsize,
                       align.tip.label=aligntip,font=pfonts,label.offset=calcoff,
                       no.margin=T,plot=F,...)
  #print(forx$x.lim)
  dev.off()
  #dev.new(width=10,height=9)
  layout(matrix(c(1,2), nrow=1),widths=c(5,1)) #switching to horizontal
  par(mar=c(2,1,0,0.2)+0.1)
  #oldpar = par()
  #par(omi=c(0,0,0,0))
  par(omi=c(0,0,0,0.0001))
  #plotobj = plot.phylo(tree, use.edge.length = useedge,type=type,edge.color=col[cut(vals, nlevels)], edge.width=4, edge.lty=edgetype,lab4ut="axial", cex=textsize, align.tip.label=aligntip,font=pfonts,label.offset=calcoff,
  #tip.color=tipcol,no.margin=T,plot=T, main = maintitle)

  #Plotting horizontally messes up the margins, so re-set x limits
  plotobj = plot.phylo(tree, use.edge.length = useedge,type=type,
                       edge.color=eccalc,
                       edge.width=4, edge.lty=edgetype,lab4ut="axial", cex=textsize,
                       align.tip.label=aligntip,font=pfonts,label.offset=calcoff,
                       tip.color=tipcol,no.margin=T,plot=T, x.lim=forx$x.lim/figwid,main = maintitle,...)
  #par(mar=oldpar)
  if (dolegend) {
    min.raw <- min(vals, na.rm = TRUE)
    max.raw <- max(vals, na.rm = TRUE)
    z <- seq(min.raw, max.raw, length = length(col))
    #z <- quantile(vals, probs = seq(0,1,length = length(col)))
    par(mai=c(0.25,0.01,0.25,0.9))
    #Optimal margins depend upon graphics window
    #Try to exit if the plot cannot be produced, so that the device does not remain open
    #image(z = matrix(z, ncol = 1), col = col, breaks = seq(min.raw, max.raw, length.out=nlevels+1),
    #      xaxt = "n", yaxt = "n")
    #image(z = matrix(z, ncol = 1), col = col, breaks = quantile(vals, probs = seq(0, 1, length.out=nlevels+1)),
    #      xaxt = "n", yaxt = "n")
    #par(usr = c(0, 1, 0, 1))
    lv <- pretty(seq(min.raw, max.raw, length.out=nlevels+1))
    lv1 <- seq(min.raw, max.raw, length.out=nlevels+1)
    #print(lv)
    lv2 <- quantile(vals, probs = seq(0,1, length.out = nlevels+1), na.rm = T)
    #print(lv2)
    scale01 <- function(x, low = min(x), high = max(x)) {
      x <- (x - low)/(high - low)
      x
    }
    xv <- scale01(as.numeric(lv), min.raw, max.raw)
    xv1 <- scale01(as.numeric(lv1), min.raw, max.raw)
    image(z = matrix(z, nrow = 1), col = col, breaks = seq(min.raw, max.raw, length.out=nlevels+1),
          xaxt = "n", yaxt = "n")
    xvadj = 1/(2*nlevels)
    #axis(1, at = round(xv1,3), labels = round(lv2,3), cex.axis=0.8)
    #axis(4, at = seq(0-xvadj,1+xvadj,length.out=(nlevels+1)), labels = round(lv2,3), cex.axis=0.8,
    #     las=1)
    axis(4, at = seq(0-xvadj,1+xvadj,length.out=(nlevels+1)), labels = round(forbreaks,3),
         cex.axis=0.8, las=1)
    #axis(1, at = xv/2, labels = lv, cex.axis=0.8)
    mtext(colbarlab,at=0.5)
  }
  return(plotobj)
}

#' Plot a cladogram with RERs shown as either labels (type="label") or a color
#' heatmap along the branches (type="color")
#' Wraps around \code{\link{returnRersAsTree}} or \code{\link{treePlotNew}}, respectively
#'
#' @param treesObj. A treesObj created by \code{\link{readTrees}}
#' @param rermat. A residual matrix, output of the getAllResiduals() function
#' @param index. A character denoting the name of gene, or a numeric value
#' corresponding to the gene's row index in the residuals matrix
#' @param type. Whether to display RERs as branch labels ('label') or a heatmap ('color')
#' @param phenv. A phenotype vector returned by \code{\link{tree2Paths}} or \code{\link{foreground2Paths}}
#' @param .... Additional parameters to be passed to \code{\link{returnRersAsTree}}
#' or \code{\link{treePlotNew}}
#' #' @param figwid. Adjust x limits of plot.phylo by 1/figwid. May be related to figure width
#' (requires some optimization)
#' @return Plots a cladogram of the master tree with RERs displayed as branch labels or colors
#' @export

treePlotRers <- function(treesObj, rermat=NULL, index=NULL, type=c("label","color"),
                         phenv=NULL, figwid=10, ...) {
  type = match.arg(type)
  if (type=="label") {
    #use returnRersAsTree with plot = TRUE
    tmpout = returnRersAsTree(treesObj, rermat, index, phenv, plot = T, ...)
  }
  if (type=="color") {
    #use treePlotNew with edges colored by RER
    rerstoplot = returnRersAsTree(treesObj, rermat, index, phenv, plot = F)
    tmpout = treePlotNew(treesObj$trees[[index]], rerstoplot$edge.length,
                         colbarlab="RER", figwid=figwid, ...)
  }
  #return(tmpout)
}

#Plotting function using ggtree, with branch colors (currently) from edge lengths
#Demonstrates how to map edge order in R object to edge order in ggtree
treePlotGG = function(traittree, tiplabels = FALSE, title=NULL) {
  #use ggtree to make a pretty circular tree with branch lengths colored
  #currently bases colors on edge lengths in trait tree
  require(ggtree)
  forcol = c(rep("black",length(traittree$edge.length)))
  forcol[which(traittree$edge.length == -1)] = "turquoise"
  forcol[which(traittree$edge.length == 1)] = "red"
  forcol[which(is.na(traittree$edge.length))] = "gray"
  forcol = c(forcol,"goldenrod") #check to make sure the extra color is not included

  #Colors appear to be assigned in the following order:
  #1) terminal branches (in order of tip labels)
  #2) internal branches (by number of child node)
  #This means there may be a number skipped, and that they need to be mapped to edges somehow.
  #Make a vector of which color index corresponds to which edge index
  forcolgg = c(rep("black",max(traittree$edge)))
  for (g in c(1:length(forcolgg))) {
    #get the color based on the original forcol
    wgg = which(traittree$edge[,2] == g)
    if (length(wgg) > 0) {
      forcolgg[g] = forcol[wgg]
    }
  }

  #Title plots using ggtitle (optionally)
  treeplot = ggtree(traittree,layout='circular',branch.length='none',color=forcolgg)
  if (!is.null(title)) {
    treeplot = treeplot + ggtitle(title)
  }
  if (tiplabels) {
    treeplot = treeplot + geom_tiplab(aes(angle=angle),size=1.5)
  }
  return(treeplot)
}

#' Produce a gene tree with branch lengths representing RERs and (optionally)
#' display these RERs as branch labels
#'
#' @param treesObj. A treesObj created by \code{\link{readTrees}}
#' @param rermat. A residual matrix, output of the getAllResiduals() function
#' @param index. A character denoting the name of gene, or a numeric value corresponding to the gene's row index in the residuals matrix
#' @param phenv. A phenotype vector returned by \code{\link{tree2Paths}} or \code{\link{foreground2Paths}}
#' @param rer.cex. Numeric expansion for RER labels
#' @param tip.cex. Numeric expansion for tip labels
#' @param nalab. Label given to any NA RERs
#' @param plot. Whether to produce a plot displaying the RERs on the gene tree
#' @return An object of class "phylo" with edge lengths representing RERs for the given gene
#' @return If plot = TRUE, also displays a plot of the gene tree with edges labeled with RERs
#' @export

returnRersAsTree <- function(treesObj, rermat, index, phenv = NULL, rer.cex = 0.7,
                             tip.cex = 0.7, nalab = 'NA', plot = T){
  trgene <- treesObj$trees[[index]]
  trgene$edge.length <- rep(2,nrow(trgene$edge))
  ee=edgeIndexRelativeMaster(trgene, treesObj$masterTree)
  ii= treesObj$matIndex[ee[, c(2,1)]]
  rertree=rermat[index,ii]
  rertree[is.nan(rertree)]=NA #replace NaNs from C functions
  if (plot) {
    par(mar = c(1,1,1,0))
    edgcols <- rep('black', nrow(trgene$edge))
    edgwds <- rep(1, nrow(trgene$edge))
    if(!is.null(phenv)){
      edgcols <- rep('black', nrow(trgene$edge))
      edgwds <- rep(1, nrow(trgene$edge))
      edgcols[phenv[ii]==1] <- 'red'
      edgwds[phenv[ii]==1] <- 2
    }
    plot.phylo(trgene, font = 2, edge.color = edgcols, edge.width = edgwds, cex = tip.cex)
    rerlab <- round(rertree,3)
    rerlab[is.na(rerlab)] <- nalab
    edgelabels(rerlab, bg = NULL, adj = c(0.5,0.9), col = edgcols, frame = 'none',cex = rer.cex, font =2)
  }
  trgene$edge.length <- rertree
  return(trgene)
}

#' Produce a vector of newick strings representing gene trees where the edge lengths
#' correspond to RERs
#'
#' @param treesObj. A treesObj created by \code{\link{readTrees}}
#' @param rermat. A residual matrix, output of the getAllResiduals() function
#' @return A named character vector of newick strings, one per gene, representing RERs as edge lengths'
#' @export
#'
returnRersAsNewickStrings <- function(treesObj, rermat){
  rerNwkstrings <- sapply(names(treesObj$trees), function(index){
    trgene <- treesObj$trees[[index]]
    ee=edgeIndexRelativeMaster(trgene, treesObj$masterTree)
    ii= treesObj$matIndex[ee[, c(2,1)]]
    rertree=rermat[index,ii]
    rertree[is.nan(rertree)]=NA
    trgene$edge.length <- rertree
    write.tree(trgene)
  })
}

#' Produce a multiPhylo object of all gene trees with branch lengths representing RERs
#' for each gene
#' @param treesObj. A treesObj created by \code{\link{readTrees}}
#' @param rermat. A residual matrix, output of the getAllResiduals() function
#' @return An object of class "multiPhylo" of named gene trees with edge lengths
#' representing RERs for the given gene
#' @export

returnRersAsTreesAll <- function(treesObj, rermat){
  allrers = lapply(names(treesObj$trees),returnRersAsTree,treesObj=treesObj,
                   rermat=rermat,plot=F)
  names(allrers)=names(treesObj$trees)
  class(allrers)<-"multiPhylo"
  return(allrers)
}

#' Plot the residuals reflecting the relative evolutionary rates (RERs) of a gene across
#' species present in the gene tree
#' @param rermat. A residual matrix, output of the getAllResiduals() function
#' @param index. A character denoting the name of gene, or a numeric value corresponding to the gene's row index in the residuals matrix
#' @param phenv. A phenotype vector returned by \code{\link{tree2Paths}} or \code{\link{foreground2Paths}}
#' @return A plot of the RERs with foreground species labelled in red, and the rest in blue
#' @export

plotRers <- function(rermat=NULL, index= NULL, phenv = NULL, rers= NULL, method = 'k', xlims = NULL, plot = 1, xextend = 0.2, sortrers = F){
  # check if data is categorical
  if(!is.null(phenv) && length(unique(phenv[!is.na(phenv)])) > 2) {
    categorical = TRUE
    if(method != "aov") {
      method = "kw"
    }
  } else {
    categorical = FALSE
  }

  if(is.null(rers)){
      e1 = rermat[index,][!is.na(rermat[index,])]
      colids = !is.na(rermat[index,])
      e1plot <- e1
      #print(e1plot)
      if(exists('speciesNames')){
          names(e1plot) <- speciesNames[names(e1),]
      }
      if(is.numeric(index)){
         gen = rownames(rermat)[index]
      }else{
         gen = index
      }
     }else{
      e1plot = rers
      gen = 'rates'
     }
     names(e1plot)[is.na(names(e1plot))]=""
     if(!is.null(phenv)){
      phenvid = phenv[colids]

      if(categorical) {
        fgdcor = getAllCor(rermat[index,,drop=F],phenv, method = method)[[1]]
      } else {
        fgdcor = getAllCor(rermat[index,,drop=F],phenv, method = method) # get cors (to get rho and p)
      }

      plottitle = paste0(gen, ': rho = ',round(fgdcor$Rho,4),', p = ',round(fgdcor$P,4))
      #fgd = setdiff(names(e1plot)[phenvid == 1],"")

      # make color palette
      if(categorical){
        n = length(unique(phenvid))
        if(n > length(palette())) {
          pal = colorRampPalette(palette())(n)
        } else{
          pal = palette()[1:n]
        }
      }

      if(categorical) {
        df <- data.frame(species = names(e1plot), rer = e1plot, stringsAsFactors=FALSE)  %>%
          mutate(mole = as.factor(phenvid))
      } else {
        df <- data.frame(species = names(e1plot), rer = e1plot, stringsAsFactors=FALSE)  %>%
          mutate(mole = as.factor(ifelse(phenvid > 0,2,1))) # returns vector same length as phenv, if > 0 it is 2, otherwise it is 1, then converts these to factors
      }

     }else{
      plottitle = gen
      #fgd = NULL
      df <- data.frame(species = names(e1plot), rer = e1plot, stringsAsFactors=FALSE) %>%
          mutate(mole = as.factor(ifelse(0,2,1)))
     }
     #print(plottitle)
     if(sortrers){
      df = filter(df, species!="") %>%
          arrange(desc(rer))
     }
     #print(df)
     #df <- data.frame(species = names(e1plot), rer = e1plot, stringsAsFactors=FALSE) %>%
     #     mutate(mole = as.factor(ifelse(names(e1plot) %in% fgd,2,1)))
     #ll=c(min(df$rer)*1.1, max(df$rer)+xextend)
     if(is.null(xlims)){
          ll=c(min(df$rer)*1.1, max(df$rer)+xextend)
     }else{
          ll=xlims
     }
     # create the plot
     if(categorical) {
       g  <- ggplot(df, aes(x = rer, y=factor(species, levels = unique(ifelse(rep(sortrers, nrow(df)), species[order(rer)], sort(unique(species)))) ), col=mole, label=species)) + scale_size_manual(values=c(1,1,1,1))+ geom_point(aes(size=mole))+
         scale_color_manual(values = pal) +
         scale_x_continuous(limits=ll)+
         # scale_x_continuous(expand = c(.1,.1))+
         # geom_text(hjust=1, size=5)+
         geom_text(hjust=1, size=2)+
         ylab("Branches")+
         xlab("relative rate")+
         ggtitle(plottitle)+
         geom_vline(xintercept=0, linetype="dotted")+
         theme(axis.ticks.y=element_blank(),axis.text.y=element_blank(),legend.position="none",
               panel.background = element_blank(),
               axis.text=element_text(size=18,face='bold',colour = 'black'),
               axis.title=element_text(size=24,face="bold"),
               plot.title= element_text(size = 24, face = "bold"))+
         theme(axis.line = element_line(colour = 'black',size = 1))+
         theme(axis.line.y = element_blank())
     } else {
       g  <- ggplot(df, aes(x = rer, y=factor(species, levels = unique(ifelse(rep(sortrers, nrow(df)), species[order(rer)], sort(unique(species)))) ), col=mole, label=species)) + scale_size_manual(values=c(1,1,1,1))+ geom_point(aes(size=mole))+
         scale_color_manual(values = c("deepskyblue3", "brown1"))+
         scale_x_continuous(limits=ll)+
         # scale_x_continuous(expand = c(.1,.1))+
         # geom_text(hjust=1, size=5)+
         geom_text(hjust=1, size=2)+
         ylab("Branches")+
         xlab("relative rate")+
         ggtitle(plottitle)+
         geom_vline(xintercept=0, linetype="dotted")+
         theme(axis.ticks.y=element_blank(),axis.text.y=element_blank(),legend.position="none",
               panel.background = element_blank(),
               axis.text=element_text(size=18,face='bold',colour = 'black'),
               axis.title=element_text(size=24,face="bold"),
               plot.title= element_text(size = 24, face = "bold"))+
         theme(axis.line = element_line(colour = 'black',size = 1))+
         theme(axis.line.y = element_blank())
     }
     if(plot){
      print(g)
     }
     else{
      g
     }
}


nvmaster <- function(treesObj, useSpecies = NULL, fgd = NULL, plot = 0){
     #treesObj = simtrees
     #useSpecies = NULL
     #fgd = paste0('species',fgdbranchnums1)
     #fgd = matrix(paste0('species',fgdcomb),4)
     control = NULL
     if (is.null(useSpecies)){
          useSpecies=treesObj$masterTree$tip.label
     }
     both=intersect(treesObj$master$tip.label, useSpecies)
     allreport=treesObj$report[,both]
     ss=rowSums(allreport)
     iiboth=which(ss==length(both))
     ee=edgeIndexRelativeMaster(treesObj$masterTree, treesObj$masterTree)
     ii= treesObj$matIndex[ee[, c(2,1)]]
     allbranch=treesObj$paths[iiboth,ii]
     nv=t(projection(t(allbranch), method="AVE", returnNV = T))
     nv=as.vector(nv)
     mastertree=treesObj$master
     mastertree$edge.length=nv
     nn=character(length(nv))
     iim=match(1:length(treesObj$masterTree$tip.label), treesObj$masterTree$edge[,2])
     nn[iim]=treesObj$masterTree$tip.label
     names(mastertree$edge.length) = nn
     nvplot2 = sort(mastertree$edge.length)
     nvplot=nvplot2[names(nvplot2)!=""]
     if(plot){
          barcols = rep('black',length(nv))
          avlcols <- c('red','green','blue','yellow','orange','purple')
          nfgd = ifelse(!is.null(dim(fgd)),dim(fgd)[1],1)
          if(nfgd > 1){
               for(ii in 1:nfgd){
                    barcols[names(nvplot) %in% fgd[ii,]] = avlcols[ii]
               }
          }else{
               barcols[names(nvplot) %in% fgd] = avlcols[1]
          }
          bpl <- barplot(nvplot, horiz = T, col = barcols, names.arg = F, cex.axis = 1,
                         cex.lab = 2, xlab = 'average rate', space = 2, width = 0.5,
                         xlim = c(0,1.07*max(nvplot)))
          text( nvplot+rep(0.00,length(nvplot)) , bpl, labels = names(nvplot), srt = 0, pos = 4, cex = 0.75)
     }
     return(mastertree)
}


#' Plot the provided tree, (optionally) rerooted, with specified branches highlighted
#'
#' @param tree. A tree object.
#' @param outgroup. A vector of species to use to root the tree. If not provided, the tree remains unrooted.
#' @param hlspecies. A vector of species whose terminal branches to highlight, or a vector of branch numbers within the tree.
#' @param hlcols. Colors to use in highlighting the branches. If not specified, will use default R colors.
#' @param main. Main text for plot.
#' @param useGG option to creat plot with ggtree. Improves plot readability for most data sets.
#' @return A plot of the the (optionally rerooted) tree, with branches highlighted.
#' @export

plotTreeHighlightBranches <- function(tree, outgroup=NULL, hlspecies, hlcols=NULL, main="", useGG=FALSE){

  if (is.null(hlcols)) {
    hlcols <- c(2:(length(hlspecies)+1))
  }
  if (length(hlcols) < length(hlspecies)) {
    hlcols <- rep_len(hlcols, length(hlspecies))
  }
  if (!is.null(outgroup)) {
    outgroup <- outgroup[outgroup %in% tree$tip.label]
    if (length(outgroup) > 0) {
      #root the tree
      if (is.numeric(hlspecies)) {
        #track the branches
        dummytree <- tree
        dummytree$edge.length <- c(rep(1,nrow(dummytree$edge)))
        for (i in c(1:length(hlspecies))) {
          dummytree$edge.length[hlspecies] <- i+1
        }
        dummyrooted <- root(dummytree, outgroup)
      }
      rooted <- root(tree, outgroup)
    } else {
      print("No members of requested outgroup found in tree; keeping unrooted.")
      rooted <- tree
      outgroup <- NULL
    }
  } else {
    rooted <- tree
  }

  if(useGG){
    #if hlcols is numeric make branches blue
    if (is.numeric(hlcols)) {
      hlcols <- rep_len("#0000ff", length(hlspecies)) #if there are fewer colors than species to highlight, repeat colors
    }

    #create hlspecies_named
    hlspecies_named <- vector(mode="character")
    if(is.numeric(hlspecies)){
      for(i in 1: length(hlspecies))
      {
        hlspecies_named[i] <- tree$tip.label[hlspecies[i]]
      }
    }else
      hlspecies_named <- hlspecies

    #Make branches of length 0 just *slightly* larger values to visualize tree
    rooted2 <- rooted
    mm <- min(rooted2$edge.length[rooted2$edge.length>0])
    rooted2$edge.length[rooted2$edge.length==0] <- max(0.02,mm/20)


    #vector of tip label colors
    tipCols <- vector(mode = "character")
    x <- 1
    for(i in 1: length(tree$tip.label))
    {
      if(tree$tip.label[i] %in% hlspecies_named)
      {
        tipCols[i] <- hlcols[x]
        x <- x+1
      }
      else tipCols[i] <- "black"
    }

    #number of labels
    nlabel <- rooted2$Nnode + length(rooted2$tip.label)

    edgeCols <- vector(mode="character", length=nlabel)
    x <- 1
    for(i in 1: nlabel)
    {
      if(i < length(rooted2$tip.label))
      {
        if(rooted2$tip.label[i] %in% hlspecies_named)
        {
          edgeCols[i] <- hlcols[x]
          x <- x+1
        }else
          edgeCols[i] <- "Black"
      }else
        edgeCols[i] <- "Black"
    }

    plotobj = ggtree(rooted2, color = edgeCols)
    plotobj = plotobj + geom_tiplab(color= tipCols, geom="text", cex = 3) + labs(title = main)
    return(plotobj)
  }else{ #This is executed when useGG is FALSE
    colMaster <- c(rep("black",nrow(rooted$edge)))
    if (is.numeric(hlspecies)) {
      if (!is.null(outgroup)) {
        hlcols <- c("black",hlcols)
        colMaster <- hlcols[dummyrooted$edge.length]
      } else {
        for (i in 1:length(hlspecies)) {
          colMaster[hlspecies[i]] <- hlcols[i]
        }
      }
    } else {
      wspmr <- rooted$tip.label[rooted$edge[,2]] #order of tips in edges
      for (i in 1:length(hlspecies)) {
        colMaster[which(wspmr==hlspecies[i])] <- hlcols[i]
      }
    }
    termedge <- order(rooted$edge[,2])[1:length(rooted$tip.label)] #which edge corresponds to each terminal branch
    colMasterTip <- colMaster[termedge]
    #Make branches of length 0 just *slightly* larger values to visualize tree
    rooted2=rooted
    mm=min(rooted2$edge.length[rooted2$edge.length>0])
    rooted2$edge.length[rooted2$edge.length==0]=max(0.02,mm/20)
    plotobj = plot.phylo(rooted2, main = main, edge.color=colMaster,
                         tip.color = colMasterTip, edge.width = 2, cex=0.8)

    return(plotobj)
  }

}

#' Plot a phenotype tree generated by \code{\link{char2TreeCategorical}} with the branches colored by category
#' @param tree The phenotype tree returned by \code{\link{char2TreeCategorical}}
#' @param category_names The names of the categories in order of the corresponding numerical labels
#' @param master If provided, the tree will be plotted using the branch lengths of that tree, coloring branches by the phenotype in the phenotype tree. The master tree and phenotype tree should have the same topology and the same species. If not provided, use.edge.length is set to FALSE.
#' @param node_states A vector of the phenotype state at each node in the tree (in order of the nodes). If provided, vertical lines representing nodes in the tree are colored by state. It must be numerical and the integers must have the same mapping to the category names as in the phenotype tree.
#' @return A plot of a phylogenetic tree with branches colored by phenotype category.
#' @export
plotTreeCategorical = function(tree, category_names = NULL, master = NULL,
                               node_states = NULL) {
  # generate a color palette
  n = length(unique(tree$edge.length))
  if(n > length(palette())) {
    colors = colorRampPalette(palette())(n)
  } else{
    colors = palette()[1:n]
  }

  # generate a colors vector of the same length as edge.length with the colors mapped accordingly
  edge_colors = tree$edge.length
  edge_colors = sapply(edge_colors, function(x){colors[x]})

  # add space in right margin for legend
  par(mar = c(5, 4, 4, 10), xpd = TRUE)

  # plot tree using the plot.phylo function
  if(!is.null(master)) {
    # prune the master tree to only include species in the phenotype tree
    cm = intersect(master$tip.label, tree$tip.label)
    master = pruneTree(master, cm)
    # plot(master, cex = 0.25, edge.color = edge_colors)
    if(!is.null(node_states)) {
      node_colors = node_states
      node_colors = sapply(node_colors, function(x){colors[x]})
      plot(master, cex = 0.25, edge.color = edge_colors, node.color = node_colors)
    } else {
      plot(master, cex = 0.25, edge.color = edge_colors)
    }
  } else {
    # plot(tree, cex = 0.25, edge.color = edge_colors, use.edge.length = FALSE)
      if(!is.null(node_states)) {
        node_colors = node_states
        node_colors = sapply(node_colors, function(x){colors[x]})
        plot(tree, cex = 0.25, edge.color = edge_colors, use.edge.length = FALSE,
             node.depth = 2, node.color = node_colors)
      } else {
        plot(tree, cex = 0.25, edge.color = edge_colors, use.edge.length = FALSE,
             node.depth = 2)
      }
  }

  # add a legend if category names are provided in the order of the numerical labels
  if(!is.null(category_names)) {
    legend(x = "bottomright",
           inset = c(-0.25, 0),
           cex = 0.5,
           legend = category_names,
           col = colors,
           lwd = 2)
  }
}
nclark-lab/RERconverge documentation built on March 2, 2024, 8:51 a.m.