R/shinyModules.R

Defines functions plot_compareClusts plot_compareClusts_volcano plot_compareClusts_DEscatter plot_compareClusts_MAplot compareClusts_DF plot_GEboxplot geneSearch plot_clusterGenes_search plot_clusterGenes_DEgenes plot_clusterGenes_markers singleDot doubleDot lower.half.circle upper.half.circle plot_deDotplot dotplotDEgenes plot_mdPerClust plot_mdBoxplot plot_mdBarplot plot_mdCompare plot_mdBoxplotY plot_mdBoxplotX plot_mdScatter plot_tsne tsne_labels plot_sil plot_clustSep

Documented in dotplotDEgenes plot_clusterGenes_DEgenes plot_clusterGenes_markers plot_clusterGenes_search plot_clustSep plot_compareClusts plot_deDotplot plot_GEboxplot plot_mdCompare plot_mdPerClust plot_sil plot_tsne tsne_labels

#' @include deTest.R spreadLabels2.R
NULL


# Cluster Solution DE boxplots -------------

#' scClustViz plot: Cluster separation boxplots
#'
#' This function plots metrics of cluster solution cohesion or overfitting as a
#' function of the number of clusters found.
#'
#' @param sCVdL A named list of sCVdata objects, output of
#'   \code{\link{CalcAllSCV}}.
#' @param DEtype One of "DEneighb", "DEmarker", or "silWidth". "DEneighb" shows
#'   number of significantly differentially expressed genes between nearest
#'   neighbouring clusters. "DEmarker" shows number of marker genes per cluster,
#'   significantly positively differentially expressed genes in all pairwise
#'   comparisons with other clusters. "silWidth" shows silhouette widths with
#'   average silhouette width as a trace across all clustering solutions. (see
#'   \code{\link[cluster]{silhouette}}).
#' @param FDRthresh Default=0.05. The false discovery rate threshold for
#'   determining significance of differential gene expression.
#' @param res Optional. Name of cluster resolution to highlight. Must be one of
#'   \code{names(sCVdL)}.
#' @param Xlim Optional. Passed to
#'   \code{\link[graphics]{plot.default}(xlim=Xlim)}.
#' @param Ylim Optional. Passed to
#'   \code{\link[graphics]{plot.default}(ylim=Ylim)}.
#'
#' @examples
#' \dontrun{
#' plot_clustSep(sCVdL,DEtype="DEneighb",FDRthresh=0.05,res="res.0.8")
#' }
#'
#' @export

plot_clustSep <- function(sCVdL,DEtype,FDRthresh=0.05,res,Xlim,Ylim) {
  if (missing(Xlim)) { Xlim <- NULL }
  if (missing(Ylim)) { Ylim <- NULL }
  if (missing(res)) { res <- "" }
  if (!res %in% c(names(sCVdL),"")) {
    warning(paste(paste0("res = '",res,"' not found in cluster resolutions."),
                  "Cluster resolutions are names(sCVdL):",
                  paste(names(sCVdL),collapse=", "),sep="\n  "))
  }
  if (!DEtype %in% c("DEneighb","DEmarker","silWidth")) {
    stop('DEtype must be one of "DEneighb", "DEmarker", or "silWidth".')
  }
  numClust <- sapply(sCVdL,function(X) length(levels(Clusters(X))))
  for (X in unique(numClust[duplicated(numClust)])) {
    numClust[numClust == X] <- seq(X-.25,X+.25,length.out=sum(numClust == X))
  }
  
  if (is.null(Xlim)) { Xlim <- range(numClust) }
  bpData <- sapply(sCVdL,function(X) switch(DEtype,
                                            # DR=DEdist(X,"DR"),
                                            # MGE=DEdist(X,"MGE"),
                                            # PCA=DEdist(X,getEmb(inD,Param(sCVdL[[1]],"DRforClust"))),
                                            # scoreDE=as.vector(as.dist(DEdist(X))),
                                            DEneighb=sapply(DEneighb(X,FDRthresh),nrow),
                                            DEmarker=sapply(DEmarker(X,FDRthresh),nrow),
                                            silWidth=Silhouette(X)[,"sil_width"]),
                   simplify=F)
  if (is.null(Ylim)) { Ylim <- range(unlist(bpData)) }
  
  if (grepl("^Comp",res)) {
    par(mar=c(3,3,2,1))
    plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA)
    text(.5,.5,paste("Press 'View clusters at this resolution'",
                     "to view the comparison",
                     sub("Comp.","",res,fixed=T),sep="\n"))
  } else {
    par(mar=c(3,3,2,1),mgp=2:0)
    if (DEtype == "silWidth") {
      plot(x=NA,y=NA,xlim=Xlim + c(-.5,.5),ylim=Ylim,xaxt="n",
           xlab="Number of clusters",ylab="Silhouette width per cluster")
    } else {
      plot(x=numClust,y=sapply(bpData,median),type="l",xaxt="n",
           xlim=Xlim + c(-.5,.5),ylim=Ylim,xlab="Number of clusters",
           ylab=switch(DEtype,
                       # DR="Distance between clusters by gene detection rates",
                       # MGE="Distance between clusters by mean gene expression",
                       scoreDE="Distance between clusters by differential expression score",
                       DEmarker="Positive DE genes per cluster to all other clusters",
                       DEneighb="Positive DE genes per cluster to nearest cluster"))
    }
    axis(side=3,at=seq(round(min(numClust)) - 0.5,round(max(numClust)) + 0.5,by=1),
         labels=F,tick=T,pos=par("usr")[3])
    axis(side=1,at=seq(round(min(numClust)) - 0.5,round(max(numClust)) + 0.5,by=1),
         labels=F,tick=T,pos=par("usr")[3])
    axis(side=1,at=seq(round(min(numClust)),round(max(numClust)),by=1),labels=T,tick=F)
    
    abline(h=seq(0,max(unlist(bpData)),switch(as.character(diff(Ylim) > 1000),
                                              "FALSE"=10,"TRUE"=100)),
           lty=3,col=alpha("black",0.3))
    for (i in names(bpData)[names(bpData) != res]) {
      boxplot(bpData[[i]],add=T,at=numClust[i],yaxt="n",col=alpha("white",.5))
    }
    if (any(names(bpData) == res)) {
      if (DEtype == "silWidth") {
        boxplot(bpData[[res]],add=T,at=numClust[res],border="red")
      } else {
        boxplot(bpData[[res]],add=T,at=numClust[res],border="red",outline=F)
        points(jitter(rep(numClust[res],length(bpData[[res]])),amount=.2),
               bpData[[res]],col=alpha("red",.5),pch=20)
      }
    }
    if (DEtype == "silWidth") {
      temp_avSil <- sapply(bpData,mean)
      lines(numClust,y=temp_avSil,type="b",col="darkred",pch=16)
      points(numClust[res],temp_avSil[res],col="red",pch=16)
      legend(x=par("usr")[2],y=par("usr")[4],
             xjust=1,yjust=0.2,xpd=NA,bty="n",horiz=T,
             legend=c("Average silhouette width",
                      paste("Selected resolution:",res)),
             col=c("darkred","red"),pch=c(16,0),lty=c(1,NA))
    } else {
      legend(x=par("usr")[2],y=par("usr")[4],
             xjust=1,yjust=0.2,xpd=NA,bty="n",
             legend=paste("Selected resolution:",res),
             col="red",pch=0)
    }
  }
}


# Silhouette plot ------

#' scClustViz plot: Silhouette plot
#'
#' This function is a wrapper to \code{plot(silhouette(x))}.
#'
#' @param sCVd An \code{\link{sCVdata}} object with a non-null \code{Silhouette}
#'   slot.
#'
#' @export

plot_sil <- function(sCVd) {
  if (is.null(attr(Clusters(sCVd),"ClusterColours"))) {
    cols <- colorspace::qualitative_hcl(length(levels(Clusters(sCVd))),
                                        palette="Dark 3")
  } else {
    cols <- attr(Clusters(sCVd),"ClusterColours")
  }
  par(mar=c(4.5,.5,1.5,1.5),mgp=2:0)
  plot(Silhouette(sCVd),
       beside=T,border=NA,main=NA,
       col=cols,
       do.n.k=T)
}


# tsnePlot -------------------

#' scClustViz plot element: Cluster names on cluster centroid.
#'
#' See \code{\link{plot_tsne}} for application.
#'
#' @param sCVd An sCVdata object.
#' @param cell_coord A numeric matrix where named rows are cells, and two
#'   columns are the x and y dimensions of the cell embedding.
#' @param lab_type One of "ClusterNames", "ClusterNamesAll", or "Clusters".
#'   "ClusterNames" places cluster names (added to sCVdata object by
#'   \code{\link{labelCellTypes}}) at the centroid of all points sharing that
#'   cluster name (can span clusters). "ClusterNamesAll" places cluster names at
#'   the centroid of each cluster. "Clusters" places cluster ID
#'   (\code{levels(Clusters(sCVd))}) at the centroid of each cluster.
#'   
#' @export

tsne_labels <- function(sCVd,cell_coord,lab_type) {
  if (!lab_type %in% c("ClusterNames","ClusterNamesAll","Clusters")) {
    stop('lab_type must be one of "ClusterNames","ClusterNamesAll","Clusters"')
  }
  if (lab_type == "ClusterNames") {
    temp_labelNames <- sapply(unique(attr(Clusters(sCVd),"ClusterNames")),function(X) 
      names(which(attr(Clusters(sCVd),"ClusterNames") == X)),simplify=F)
    temp_labels <- apply(cell_coord,2,function(Y) 
      tapply(Y,apply(sapply(temp_labelNames,function(X) Clusters(sCVd) %in% X),1,which),mean))
    if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) }
    rownames(temp_labels) <- names(temp_labelNames)
  } else if (lab_type == "ClusterNamesAll") {
    temp_labels <- apply(cell_coord,2,function(X) tapply(X,Clusters(sCVd),mean))
    if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) }
    rownames(temp_labels) <- attr(Clusters(sCVd),"ClusterNames")
  } else if (lab_type == "Clusters") {
    temp_labels <- apply(cell_coord,2,function(X) tapply(X,Clusters(sCVd),mean))
    if (!is.matrix(temp_labels)) { temp_labels <- rbind(temp_labels) }
    rownames(temp_labels) <- levels(Clusters(sCVd))
  } else {
    stop("lab_type should be one of 'ClusterNames', 'ClusterNamesAll', or 'Clusters'.")
  }
  return(temp_labels)
}

#' scClustViz plot: Plot cell embedding in 2D
#'
#' This function plots cells in two dimensions, with various overlays.
#'
#' @param cell_coord A numeric matrix where named rows are cells, and two
#'   columns are the x and y dimensions of the cell embedding.
#' @param md The overlay information. Either a factor or numeric vector matching
#'   the rows (cells) of the \code{cell_coord} matrix. If this is a factor, the
#'   cells will be coloured by the factor levels. If a positive numeric vector,
#'   the cells will be coloured using the Viridis sequential colourscale
#'   implemented in \code{\link[colorspace]{sequential_hcl}}. Otherwise a
#'   diverging red-blue colourscale from \code{\link[colorspace]{diverging_hcl}}
#'   will be used.
#' @param md_title NULL or a character vector of one. If NULL, \code{md} is
#'   assumed to be cluster assignments. Otherwise this should be the title of
#'   the overlay represented by \code{md}.
#' @param md_cols Default = \code{NULL}. A vector of colours used to label the
#'   clusters. Only used in md_title is NULL.
#' @param md_log Default=FALSE. Logical vector of length one indicating whether
#'   \code{md} should be log-transformed. Only to be used when \code{md} is
#'   numeric.
#' @param label Default=NULL. The output of \code{\link{tsne_labels}} to have
#'   cluster names overlaid on the plot.
#' @param sel_cells Optional. A character vector of cell names (rownames of
#'   \code{cell_coord}) to highlight in the plot.
#' @param sel_cells_A Optional. Alternative highlighting method to sel_cells,
#'   can be used in conjunction. Meant for indicating a selected set of cells
#'   when building manual cell set comparisons, in conjunction with
#'   \code{sel_cells_B}.
#' @param sel_cells_B Optional. See \code{sel_cells_A}.
#'
#' @examples
#' \dontrun{
#' # Cluster overlay:
#' plot_tsne(cell_coord=getEmb(input_data_obj,"tsne"),
#'           md=Clusters(sCVdata),
#'           md_title=NULL,
#'           label=tsne_labels(sCVd=sCVdata,
#'                             cell_coord=getEmb(input_data_obj,"tsne"),
#'                             lab_type="ClusterNames"))
#'
#' # Metadata overlay:
#' plot_tsne(cell_coord=getEmb(input_data_obj,"tsne"),
#'           md=getMD(input_data_obj)$total_counts,
#'           md_title="Library Size",
#'           md_log=TRUE,
#'           label=tsne_labels(sCVd=sCVdata,
#'                             cell_coord=getEmb(input_data_obj,"tsne"),
#'                             lab_type="ClusterNames"))
#'
#' # Gene expression overlay:
#' plot_tsne(cell_coord=getEmb(input_data_obj,"tsne"),
#'           md=getExpr(input_data_obj,Param(sCVdata,"assayType"))["Actb",],
#'           md_title="Actb")
#' }
#'
#' @export

plot_tsne <- function(cell_coord,md,md_title,
                      md_cols=NULL,md_log=F,label=NULL,
                      sel_cells,sel_cells_A,sel_cells_B) {
  if (is.null(md_title)) {
    id <- as.factor(md)
    if (is.null(md_cols)) {
      idcol <- colorspace::qualitative_hcl(length(levels(id)),
                                           palette="Dark 3")
    } else {
      idcol <- md_cols
    }
    if (any(is.na(id))) {
      levels(id) <- c(levels(id),"Unselected")
      id[is.na(id)] <- "Unselected"
      idcol <- c(idcol,"grey80")
    }
    par(mar=c(3,3,2.5,1),mgp=2:0)
  } else if (is.factor(md) | is.character(md)) {
    id <- as.factor(md)
    idcol <- colorspace::qualitative_hcl(length(levels(id)),palette="Dark 3")
    par(mar=c(3,3,ceiling(length(levels(id))/4)+1,1),mgp=2:0)
  } else if (any(md < 0)) {
    if (md_log) {
      warning("Can't log-scale md because it contains negative values.")
    }
    temp_down <- cut(c(0,md[md <= 0]),50,labels=F)[-1]
    temp_up <- cut(c(0,md[md > 0]),50,labels=F)[-1]
    id <- rep(NA,length(md))
    id[md <= 0] <- temp_down
    id[md > 0] <- temp_up + 50
    idcol <- colorspace::diverge_hcl(100,palette="Blue-Red")
    par(mar=c(3,3,2.5,1),mgp=2:0)
  } else{
    if (md_log) {
      id <- cut(log10(md),100)
    } else {
      id <- cut(md,100)
    }
    idcol <- colorspace::sequential_hcl(100,palette="Viridis",rev=T)
    par(mar=c(3,3,2.5,1),mgp=2:0)
  }
  if (missing(sel_cells)) { sel_cells <- character() }
  if (nrow(cell_coord) > 1e4) {
    temp_pch <- "."
    temp_cex <- 2
  } else {
    temp_pch <- 21
    temp_cex <- 1
  }
  
  plot(x=NULL,y=NULL,xlab=colnames(cell_coord)[1],ylab=colnames(cell_coord)[2],
       xlim=range(cell_coord[,1]),ylim=range(cell_coord[,2]))
  if (length(sel_cells) > 0) {
    points(cell_coord[!rownames(cell_coord) %in% sel_cells,,drop=F],pch=temp_pch,cex=temp_cex,
           col=alpha(idcol,.6)[id[!rownames(cell_coord) %in% sel_cells]],
           bg=alpha(idcol,0.3)[id[!rownames(cell_coord) %in% sel_cells]])
    points(cell_coord[sel_cells,,drop=F],pch=temp_pch,cex=temp_cex + .5,
           col=alpha(idcol,1)[id[rownames(cell_coord) %in% sel_cells]],
           bg=alpha(idcol,0.6)[id[rownames(cell_coord) %in% sel_cells]])
  } else {
    points(cell_coord,pch=temp_pch,cex=temp_cex,col=alpha(idcol,.8)[id],bg=alpha(idcol,0.4)[id])
  }
  
  if (!missing(sel_cells_A) & !missing(sel_cells_B)) {
    points(x=cell_coord[sel_cells_A,1],
           y=cell_coord[sel_cells_A,2],
           pch=19,col="#a50026")
    points(x=cell_coord[sel_cells_B,1],
           y=cell_coord[sel_cells_B,2],
           pch=19,col="#313695")
    points(x=cell_coord[intersect(sel_cells_A,sel_cells_B),1],
           y=cell_coord[intersect(sel_cells_A,sel_cells_B),2],
           pch=19,col="#ffffbf")
    points(x=cell_coord[intersect(sel_cells_A,sel_cells_B),1],
           y=cell_coord[intersect(sel_cells_A,sel_cells_B),2],
           pch=4,col="red")
  }
  if (!is.null(label)) {
    text(label,labels=rownames(label),font=2,cex=1.2)
  }
  if (is.null(md_title)) {
  } else if (is.factor(md) | is.character(md)) {
    legend(x=par("usr")[2],y=par("usr")[4],
           xjust=1,yjust=0.2,xpd=NA,bty="n",
           ncol=switch(as.character(length(levels(id)) < 4),"TRUE"=length(levels(id)),"FALSE"=4),
           legend=levels(id),pch=21,col=idcol,pt.bg=alpha(idcol,0.5))
    mtext(md_title,side=3,adj=0,font=2,line=ceiling(length(levels(id))/4)-1,cex=1.2)
  } else if (any(md < 0)) {
    temp_x <- c(
      seq(from=par("usr")[1] + (par("usr")[2] - par("usr")[1]) * .15,
          to=par("usr")[1] + (par("usr")[2] - par("usr")[1]) / 2 - strwidth("0"),
          length.out=51),
      seq(from=par("usr")[2] - (par("usr")[2] - par("usr")[1]) / 2 + strwidth("0"),
          to=par("usr")[2] - (par("usr")[2] - par("usr")[1]) * .15,
          length.out=51)
    )
    for (i in 1:50) {
      rect(xleft=temp_x[i],xright=temp_x[i+1],
           ybottom=par("usr")[4] + (par("usr")[4] - par("usr")[3]) * .001,
           ytop=par("usr")[4] + strheight(md_title),
           col=idcol[i],border=NA,xpd=NA)
    }
    for (i in 52:102) {
      rect(xleft=temp_x[i],xright=temp_x[i+1],
           ybottom=par("usr")[4] + (par("usr")[4] - par("usr")[3]) * .001,
           ytop=par("usr")[4] + strheight(md_title),
           col=idcol[i-1],border=NA,xpd=NA)
    }
    mtext(round(min(md),2),side=3,line=0,at=temp_x[1],adj=1.1)
    mtext(round(max(md),2),side=3,line=0,at=temp_x[102],adj=-0.1)
    mtext(0,side=3,line=0,adj=.5,
          at=par("usr")[1] + (par("usr")[2] - par("usr")[1]) / 2)
    mtext(md_title,side=3,line=1,adj=.5,font=2,cex=1.2,
          at=par("usr")[1] + (par("usr")[2] - par("usr")[1]) / 2)
  } else {
    if (md_log) { md_title <- paste(md_title,"(log scale)") } 
    temp_x <- seq(from=par("usr")[1] + (par("usr")[2] - par("usr")[1]) * .15,
                  to=par("usr")[2] - (par("usr")[2] - par("usr")[1]) * .15,
                  length.out=101)
    for (i in seq_along(idcol)) {
      rect(xleft=temp_x[i],xright=temp_x[i+1],
           ybottom=par("usr")[4] + (par("usr")[4] - par("usr")[3]) * .001,
           ytop=par("usr")[4] + strheight(md_title),
           col=idcol[i],border=NA,xpd=NA)
    }
    mtext(round(min(md),2),side=3,line=0,at=temp_x[1],adj=1.1)
    mtext(round(max(md),2),side=3,line=0,at=temp_x[101],adj=-0.1)
    mtext(md_title,side=3,line=1,at=temp_x[51],adj=.5,font=2,cex=1.2)
  }
}


# Metadata plots ----------
# ^ mdCompare -----------
plot_mdScatter <- function(MD,sel_clust,md_log) {
  if (nrow(MD) > 1e4) {
    temp_pch <- "."
    temp_cex <- 2
  } else {
    temp_pch <- 21
    temp_cex <- 1
  }
  temp_par <- par(no.readonly=T)
  layout(matrix(c(2,1,0,3),2),c(5,1),c(1,5))
  par(mar=c(3,3,0,0),mgp=2:0,cex=1.1)
  plot(MD[!MD$sel_cells,1:2,drop=F],log=md_log,xlim=range(MD[,1]),ylim=range(MD[,2]),
       pch=temp_pch,cex=temp_cex,col=alpha("black",0.2),bg=alpha("black",0.1))
  points(MD[MD$sel_cells,1:2,drop=F],pch=temp_pch,cex=temp_cex + .5,
         col=alpha("red",0.4),bg=alpha("red",0.2))
  par(mar=c(0,3,1,0))
  boxplot(tapply(MD[,1],MD$sel_cells,c),log=sub("y","",md_log),
          horizontal=T,xaxt="n",yaxt="n",border=c("black","red"))
  if (any(MD$sel_cells)) {
    legend(x=switch(sub("y","",md_log),"x"=10^par("usr")[1],par("usr")[1]),y=par("usr")[4],
           xjust=0,yjust=0.2,xpd=NA,bty="n",pch=21,col="red",pt.bg=alpha("red",0.5),
           legend=paste("Cluster",names(sel_clust),sel_clust))
  }
  par(mar=c(3,0,0,1))
  boxplot(tapply(MD[,2],MD$sel_cells,c),log=sub("x","",md_log),
          horizontal=F,xaxt="n",yaxt="n",border=c("black","red"))
  par(temp_par)
}

plot_mdBoxplotX <- function(MD,sel_clust,md_log) {
  temp_par <- par(no.readonly=T)
  par(mar=c(3,3,2,1),mgp=2:0,cex=1.1)
  if (any(MD$sel_cells)) {
    temp1 <- tapply(MD[!MD$sel_cells,2],as.factor(MD[!MD$sel_cells,1]),c)
    temp2 <- tapply(MD[MD$sel_cells,2],as.factor(MD[MD$sel_cells,1]),c)
    plot(x=NULL,y=NULL,ylim=range(MD[,2]),
         xlim=c(0,length(levels(as.factor(MD[,1]))) * 3),
         log=md_log,xaxt="n",
         xlab=names(MD)[1],ylab=names(MD)[2])
    boxplot(temp1,add=T,xaxt="n",
            at=seq(1,length(levels(as.factor(MD[,1]))) * 3,by=3))
    boxplot(temp2,add=T,xaxt="n",border="red",
            at=seq(2,length(levels(as.factor(MD[,1]))) * 3,by=3))
    axis(side=1,at=seq(1.5,length(levels(as.factor(MD[,1]))) * 3,by=3),
         labels=names(temp1))
    legend(x=par("usr")[1],y=switch(sub("x","",md_log),"y"=10^par("usr")[4],par("usr")[4]),
           xjust=0,yjust=0.2,xpd=NA,bty="n",pch=0,col="red",pt.bg=alpha("red",0.5),
           legend=paste("Cluster",names(sel_clust),sel_clust))
    
  } else {
    boxplot(tapply(MD[,2],as.factor(MD[,1]),c),log=md_log,
            xlab=names(MD)[1],ylab=names(MD)[2])
  }
  par(temp_par)
}

plot_mdBoxplotY <- function(MD,sel_clust,md_log) {
  temp_par <- par(no.readonly=T)
  par(mar=c(3,3,2,1),mgp=2:0,cex=1.1)
  if (any(MD$sel_cells)) {
    temp1 <- tapply(MD[!MD$sel_cells,1],as.factor(MD[!MD$sel_cells,2]),c)
    temp2 <- tapply(MD[MD$sel_cells,1],as.factor(MD[MD$sel_cells,2]),c)
    plot(x=NULL,y=NULL,xlim=range(MD[,1]),
         ylim=c(0,length(levels(as.factor(MD[,2]))) * 3),
         log=md_log,yaxt="n",
         xlab=names(MD)[1],ylab=names(MD)[2])
    boxplot(temp1,add=T,yaxt="n",horizontal=T,
            at=seq(1,length(levels(as.factor(MD[,2]))) * 3,by=3))
    boxplot(temp2,add=T,yaxt="n",border="red",horizontal=T,
            at=seq(2,length(levels(as.factor(MD[,2]))) * 3,by=3))
    axis(side=2,at=seq(1.5,length(levels(as.factor(MD[,2]))) * 3,by=3),
         labels=names(temp1))
    legend(x=switch(sub("y","",md_log),"x"=10^par("usr")[1],par("usr")[1]),y=par("usr")[4],
           xjust=0,yjust=0.2,xpd=NA,bty="n",pch=0,col="red",pt.bg=alpha("red",0.5),
           legend=paste("Cluster",names(sel_clust),sel_clust))
    
  } else {
    boxplot(tapply(MD[,1],as.factor(MD[,2]),c),log=md_log,
            horizontal=T,xlab=names(MD)[1],ylab=names(MD)[2])
  }
  par(temp_par)
}


#' scClustViz plot: Plot to compare cell metadata
#'
#' This function makes scatter/boxplots comparing cellular metadata.
#'
#' @param MD A dataframe of cellular metadata. See \code{\link{getMD}}.
#' @param mdX A character vector of one refering to the variable name from
#'   \code{MD} to plot on the x-axis.
#' @param mdY A character vector of one refering to the variable name from
#'   \code{MD} to plot on the y-axis.
#' @param sel_cells Optional. A character vector of cell names (rownames of
#'   \code{MD}) to highlight in the plot.
#' @param sel_clust Optional. The name of the selected cluster
#'   (\code{sel_cells}) to include in the legend. If
#'   \code{\link{labelCellTypes}} has been run, pass the appropriate element of
#'   \code{attr(Clusters(sCV),"ClusterNames")} to this argument to show both
#'   cluster number and cell type label in the legend.
#' @param md_log Optional. A character vector indicating which axes should be
#'   log scaled. \code{c("x","y")} to log-scale both axes.
#'
#' @examples
#' \dontrun{
#' plot_mdCompare(MD=getMD(input_data_obj),
#'                mdX="total_counts",
#'                mdY="total_features",
#'                sel_cells=names(Clusters(sCVdata))[Clusters(sCVdata) == "1"],
#'                sel_clust="1",
#'                md_log="x")
#' }
#'
#' @export

plot_mdCompare <- function(MD,mdX,mdY,sel_cells,sel_clust,md_log) {
  if (missing(sel_cells)) { sel_cells <- "" }
  if (missing(sel_clust)) { sel_clust <- "" }
  if (missing(md_log)) { md_log <- "" }
  MD <- data.frame(MD[,c(mdX,mdY)])
  MD$sel_cells <- rownames(MD) %in% sel_cells
  if ("x" %in% md_log & !(is.factor(MD[,1]) | is.character(MD[,1]))) {
    tempLX <- "x"
    if (any(MD[,1] <= 0)) {
      names(MD)[1] <- paste(names(MD)[1],
                            paste0("(log scale: ",sum(MD[,1] <= 0),
                                   " values <= 0 omitted)"))
      MD <- MD[MD[,1] > 0,]
    } else {
      names(MD)[1] <- paste(names(MD)[1],"(log scale)")
    }
  } else {
    tempLX <- ""
  }
  if ("y" %in% md_log & !(is.factor(MD[,2]) | is.character(MD[,2]))) { 
    tempLY <- "y" 
    if (any(MD[,2] <= 0)) {
      names(MD)[2] <- paste(names(MD)[2],
                            paste0("(log scale: ",sum(MD[,2] <= 0),
                                   " values <= 0 omitted)"))
      MD <- MD[MD[,2] > 0,]
    } else {
      names(MD)[2] <- paste(names(MD)[2],"(log scale)")
    } 
  } else {
    tempLY <- ""
  }
  md_log <- paste(c(tempLX,tempLY),collapse="")
  
  if ((is.factor(MD[,1]) | is.character(MD[,1])) &
      (is.factor(MD[,2]) | is.character(MD[,2]))) {
    plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA)
    text(.5,.5,"This figure is not designed to compare to categorical variables.")
  } else if (is.factor(MD[,1]) | is.character(MD[,1])) {
    plot_mdBoxplotX(MD,sel_clust,md_log)
  } else if (is.factor(MD[,2]) | is.character(MD[,2])) {
    plot_mdBoxplotY(MD,sel_clust,md_log)
  } else {
    plot_mdScatter(MD,sel_clust,md_log)
  }
  
}


# ^ mdPerClust -------
plot_mdBarplot <- function(MD,opt,cols) {
  temp_par <- par(no.readonly=T)
  if (is.null(cols)) {
    cols <- colorspace::qualitative_hcl(length(levels(MD$cl)),
                                        palette="Dark 3")
  }
  id0 <- as.factor(MD[,1])
  id <- switch(opt,
               "relative"=tapply(id0,MD$cl,function(X) table(X) / length(X)),
               "absolute"=tapply(id0,MD$cl,table))
  if (is.list(id)) { id <- do.call(cbind,id) }
  idylab <- switch(opt,
                   "relative"="Proportion of cells per cluster",
                   "absolute"="Number of cells per cluster")
  idcol <- colorspace::qualitative_hcl(length(levels(id0)),palette="Dark 3")
  par(mar=c(3,3,ceiling(length(levels(id0))/4)+.5,1),mgp=2:0,cex=1.1)
  barplot(id,col=idcol,ylab=idylab,xlab="Clusters",yaxt="n",mgp=c(2,0,0),
          legend.text=levels(id0),font=2,
          args.legend=list(x=par("usr")[2],y=par("usr")[4],
                           xjust=1,yjust=0.2,xpd=NA,ncol=4,bty="n"))
  axis(2)
  barplot(rep(par("usr")[3]*4.6,length(levels(MD$cl))),add=T,
          col=alpha(cols,alpha=.5),
          border=alpha(cols,alpha=.5))
  abline(h=0)
  mtext(names(MD)[1],side=3,adj=0,font=2,line=ceiling(length(levels(id0))/4)-.8,cex=1.2)
  par(temp_par)
}

plot_mdBoxplot <- function(MD,opt,cols) {
  temp_par <- par(no.readonly=T)
  if (is.null(cols)) {
    cols <- colorspace::qualitative_hcl(length(levels(MD$cl)),
                                        palette="Dark 3")
  }
  par(mar=c(3,3,2,1),mgp=2:0,cex=1.1)
  boxplot(tapply(MD[,1],MD$cl,c),log=opt,
          ylab=names(MD)[1],xlab="Clusters",
          border=cols,
          col=alpha(cols,alpha=.3))
  par(temp_par)
}


#' scClustViz plot: Plot to view cellular metadata by cluster
#'
#' This function makes boxplots / stacked barplots of cellular metadata
#' separated by cluster.
#'
#' @param MD A dataframe of cellular metadata. See \code{\link{getMD}}.
#' @param sel A character vector of one refering to the variable name from
#'   \code{MD} to plot.
#' @param cl A factor of cluster assignments. See \code{\link{Cluster}}.
#' @param opt Default="absolute". A character vector of plotting options. One of
#'   \code{"absolute"}, \code{"relative"}, or \code{"y"}. \code{"y"} sets
#'   log-scales the data for postive numerical metadata. For categorical
#'   metadata, \code{"absolute"} plots a stacked barplot of raw counts, whereas
#'   \code{"relative"} plots the proportion of each cluster represented by each
#'   category.
#' @param cols Default = \code{NULL}. A vector of colours used to label the clusters.
#'
#' @examples
#' \dontrun{
#' plot_mdPerClust(MD=getMD(input_data_obj),
#'                 sel="cyclonePhases",
#'                 cl=Clusters(sCVdata),
#'                 opt="relative")
#' }
#'
#' @export

plot_mdPerClust <- function(MD,sel,cl,opt="absolute",cols=NULL) {
  MD <- MD[sel]
  MD$cl <- cl
  if ("y" %in% opt & !(is.factor(MD[,1]) | is.character(MD[,1]))) { 
    if (any(MD[,1] <= 0)) {
      names(MD)[1] <- paste(names(MD)[1],
                            paste0("(log scale: ",sum(MD[,1] <= 0),
                                   " values <= 0 omitted)"))
      MD <- MD[MD[,1] > 0,]
    } else {
      names(MD)[1] <- paste(names(MD)[1],"(log scale)")
    } 
  }
  if (is.factor(MD[,1]) | is.character(MD[,1])) {
    plot_mdBarplot(MD,opt,cols)
  } else {
    plot_mdBoxplot(MD,opt,cols)
  }
}


# DE gene dotplot -----------

#' scClustViz plot helper function: Return DE genes per cluster
#'
#' This function returns a named numeric vector of FDR-corrected p-values for
#' statistically significant differentially expressed genes for a set comparison
#' type and FDR threshold. For \code{"DEmarker"}, the returned value is the max
#' of all comparisons.
#'
#' @param sCVd The sCVdata object.
#' @param DEtype One of: \code{"DEvsRest"} - see \code{\link{DEvsRest}};
#'   \code{"DEneighb"} - see \code{\link{DEneighb}}; \code{"DEmarker"} - see
#'   \code{\link{DEmarker}}.
#' @param FDRthresh A numeric vector of length 1 setting a false discovery rate
#'   threshold for statistical significance.
#'
#' @examples
#' \dontrun{
#' dotplotDEgenes(sCVdata,
#'                DEtype="DEneighb",
#'                FDRthresh=0.01)
#' }
#'
#' @export

dotplotDEgenes <- function(sCVd,DEtype,FDRthresh) {
  if (missing(FDRthresh)) { FDRthresh <- 1 }
  if (DEtype == "DEvsRest") {
    return(lapply(DEvsRest(sCVd),function(X) {
      temp <- X[which(X$FDR <= FDRthresh),"FDR",drop=F]
      out <- unlist(temp,use.names=F)
      names(out) <- rownames(temp)
      return(sort(out))
    }))
  } else if (DEtype == "DEneighb") {
    outL <- lapply(DEneighb(sCVd,FDRthresh), function(X) {
      if (nrow(X) < 1) { return(numeric(0)) }
      out <- X[,grep("^FDR_",names(X))]
      names(out) <- rownames(X)
      return(sort(out))
    })
    names(outL) <- levels(Clusters(sCVd))
    return(outL)
  } else if (DEtype == "DEmarker") {
    outL <- lapply(DEmarker(sCVd,FDRthresh), function(X) {
      if (nrow(X) < 1) { return(numeric(0)) }
      out <- apply(X[,grep("^FDR_",names(X)),drop=F],1,max)
      return(sort(out))
    })
    return(outL)
  }
}


#' scClustViz plot: Plot gene expression dotplots.
#'
#' This function makes dotplots (a heatmap analogue) showing gene expression for
#' a set of genes across all clusters.
#'
#' When generated in an interactive context (i.e. RStudio), this can sometimes
#' result in a \code{figure margins too large} error. See example for suggested
#' dimensions of the graphic device.
#'
#' @param sCVd The sCVdata object.
#' @param DEgenes The output of \code{\link{dotplotDEgenes}}.
#' @param DEnum Single integer representing the maximum number of DE genes per
#'   cluster to include in the dotplot.
#'
#' @examples
#' \dontrun{
#' pdf("filepath.pdf",width=11,height=7)
#' plot_deDotplot(sCVd=sCVdata,
#'                DEgenes=dotplotDEgenes(sCVdata,
#'                                       DEtype="DEneighb",
#'                                       FDRthresh=0.01)
#'                DEnum=5)
#' dev.off()
#' }
#'
#' @export

plot_deDotplot <- function(sCVd,DEgenes,DEnum) {
  # ^ Setup ----
  heatGenes <- unique(unlist(lapply(DEgenes,function(X) names(X)[1:DEnum])))
  heatGenes <- heatGenes[!is.na(heatGenes)]
  
  if (is.null(heatGenes)) {
    plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA)
    text(.5,.5,
         "No genes were statistically significant at the current false discovery rate")
    return(invisible())
  }
  
  temp_DR <- sapply(ClustGeneStats(sCVd),function(X) X[heatGenes,"DR"])
  if (is.vector(temp_DR)) {
    temp_DR <- matrix(temp_DR,1,dimnames=list(NULL,names(temp_DR))) 
  }
  temp_MDGE <- sapply(ClustGeneStats(sCVd),function(X) X[heatGenes,"MDGE"])
  if (is.vector(temp_MDGE)) {
    temp_MDGE <- matrix(temp_MDGE,1,dimnames=list(NULL,names(temp_MDGE))) 
  }
  rownames(temp_DR) <- rownames(temp_MDGE) <- heatGenes
  
  if (nrow(temp_DR) > 1) {
    hG <- hclust(dist(temp_DR),"complete")
  } else {
    hG <- list(order=1)
  }
  
  hC <- hclust(dist(t(sapply(ClustGeneStats(sCVd),function(X) X$MGE))))
  #  if (length(ClustGeneStats(sCVd)) > 2) {
  # hC <- hclust(as.dist(DEdist(sCVd)),"single")
  # } else {
  #    hC <- hclust(dist(t(temp_DR)))
  #  }  
  
  if (is.null(attr(Clusters(sCVd),"ClusterColours"))) {
    clustCols <- colorspace::qualitative_hcl(length(levels(Clusters(sCVd))),
                                             palette="Dark 3")
  } else {
    clustCols <- attr(Clusters(sCVd),"ClusterColours")
  }
  
  dC <- dendrapply(as.dendrogram(hC),function(X) {
    if (is.leaf(X)) {
      attr(X,"edgePar") <- list(
        lwd=2,
        col=clustCols[which(attr(X,"label") == levels(Clusters(sCVd)))]
      )
      attr(X,"nodePar") <- list(
        pch=NA,lab.font=2,lab.cex=1.2,
        lab.col=clustCols[which(attr(X,"label") == levels(Clusters(sCVd)))])
      if (attr(X,"label") != "Unselected") {
        if (attr(X,"label") %in% names(DEgenes)) {
          attr(X,"label") <- paste0(attr(X,"label"),": ",
                                    length(DEgenes[[attr(X,"label")]])," DE")
        } else {
          attr(X,"label") <- paste0(
            attr(X,"label"),": ",
            length(DEgenes[[which(attr(X,"label") ==
                                    sapply(strsplit(names(DEgenes),"-"),
                                           function(X) X[1]))]]),
            " DE")
        }
      }
    }
    return(X)
  })
  
  if ("genes" %in% names(ClustGeneStats(sCVd)[[1]])) {
    tempLabCol <- ClustGeneStats(sCVd)[[1]][heatGenes,"genes"]
  } else {
    tempLabCol <- rownames(ClustGeneStats(sCVd)[[1]][heatGenes,])
  }
  DR <- temp_DR[hG$order,hC$order,drop=F]
  temp <- range(sapply(ClustGeneStats(sCVd),function(X) X[,"MDGE"]))
  temp <- seq(temp[1],temp[2],length.out=101)
  MDGE <- findInterval(as.vector(temp_MDGE[hG$order,hC$order]),
                       vec=temp,all.inside=T)
  
  # ^ Plot dotplot ----
  temp_par <- par(no.readonly=T)
  if (length(levels(Clusters(sCVd))) <= 1) {
    plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA)
    text(.5,.5,paste("Heatmap cannot be computed",
                     "with less than two clusters.",sep="\n"))
  } else if (length(heatGenes) < 1) {
    plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA)
    text(.5,.5,paste("There are no differentially expressed genes at",
                     "false discovery rate threshold."))
  } else {
    layout(matrix(c(0,2,3,1),2),widths=c(1,5),heights=c(1,5))
    par(mar=c(9,0,0,.5))
    plot(x=NULL,y=NULL,xlim=c(0.5,nrow(DR)+.5),ylim=c(0.5,ncol(DR)+.5),
         xaxs="i",yaxs="i",xaxt="n",yaxt="n",xlab=NA,ylab=NA,bty="n")
    abline(v=1:nrow(DR),col="grey90")
    symbols(x=rep(1:nrow(DR),ncol(DR)),
            y=as.vector(sapply(1:ncol(DR),function(X) rep(X,nrow(DR)))),
            circles=as.vector(DR)/2,inches=F,add=T,xpd=NA,
            fg=colorspace::sequential_hcl(100,palette="Viridis",rev=T)[MDGE],
            bg=colorspace::sequential_hcl(100,palette="Viridis",rev=T)[MDGE])
    axis(side=1,at=1:nrow(DR),lwd=0,labels=tempLabCol[hG$order],las=2,cex.axis=1.2)
    
    # Legend:
    tx0 <- par("usr")[1]
    tx <- (par("usr")[2] - par("usr")[1])
    ty0 <- par("usr")[3]
    ty <- par("usr")[4] - par("usr")[3]
    segments(x0=tx0 - seq(.15,.03,length.out=1000) * tx,
             y0=ty0 - 0.02 * ty,y1=ty0 - 0.05 * ty,
             col=colorspace::sequential_hcl(1000,palette="Viridis",rev=T),xpd=NA)
    text(x=tx0 - c(.15,.09,.03) * tx,
         y=ty0 - c(0.035,0.02,0.035) * ty,
         labels=c(round(min(temp_MDGE),2),
                  "Mean detected expression",
                  round(max(temp_MDGE),2)),pos=2:4,xpd=NA)
    symbols(x=tx0 - c(.15,.09,.03) * tx,
            y=ty0 - rep(.14,3) * ty,add=T,xpd=NA,
            circles=c(0.25,0.5,0.75)/2,inches=F,bg="black")
    text(x=tx0 - c(.149,.089,0.029,.09) * tx,
         y=ty0 - c(rep(.23,3),.26) * ty,xpd=NA,
         labels=c("25%","50%","75%","Detection Rate"))
    
    
    par(mar=c(9,0,0,7))
    plot(dC,horiz=T,xpd=NA,
         ylim=c(0.5,length(hC$order)+.5),yaxs="i",yaxt="n")
    
    par(mar=c(0,0,0,.5))
    if (class(hG) == "hclust") {
      plot(as.dendrogram(hG),leaflab="none",
           xlim=c(0.5,length(hG$order)+.5),xaxs="i",yaxt="n")
    }
  }
  par(temp_par)
}

#plot_deDotplot(sCVdL$res.0.4,NULL,"deMarker",5,0.001)


# Cluster-wise gene expression scatterplot -----
# ^ dot types
upper.half.circle <- function(col1){  
  rs <- seq(0,pi,len=100) + pi/2
  xc <- 0+cos(rs) 
  yc <- 0+sin(rs) 
  polygon(xc,yc,col=col1,border=NA)
} 
lower.half.circle <- function(col2){ 
  rs <- seq(0,pi,len=100) + pi/2
  xc <- 0-cos(rs) 
  yc <- 0-sin(rs) 
  polygon(xc,yc,col=col2,border=NA)
}
doubleDot <- function(col1,col2) {
  upper.half.circle(col1)
  lower.half.circle(col2)
  rs <- seq(0,2*pi,len=200)
  polygon(cos(rs),sin(rs),border="white")
}
singleDot <- function(col1){  
  rs <- seq(0,2*pi,len=200)
  xc <- 0+cos(rs) 
  yc <- 0+sin(rs) 
  polygon(xc,yc,col=col1,border="white")
} 


#' scClustViz plot: Plot within-cluster gene expression highlighting marker
#' genes
#'
#' This function makes a scatterplot of gene detection rate vs. mean detected
#' gene abundance, highlighting genes identified as cell type specific markers
#' by the user. \strong{This function will not work unless
#' \code{\link{addCellMarkersToCGS}} has been run on the sCVdata object prior.}
#'
#' @param sCVd The sCVdata object.
#' @param selClust A named character vector representing the cluster to be
#'   displayed. If \code{\link{labelCellTypes}} has been run, pass the
#'   appropriate element of \code{attr(Clusters(sCV),"ClusterNames")} to this
#'   argument to show both cluster number and cell type label in the legend.
#' @param cellMarkers The \code{cellMarkers} argument from
#'   \code{\link{runShiny}}. A list of marker genes for expected cell types.
#'
#' @examples
#' \dontrun{
#' cellMarkers <- list("Cortical precursors"=c("Mki67","Sox2","Pax6",
#'                                                   "Pcna","Nes","Cux1","Cux2"),
#'                           "Interneurons"=c("Gad1","Gad2","Npy","Sst","Lhx6",
#'                                            "Tubb3","Rbfox3","Dcx"),
#'                           "Cajal-Retzius neurons"="Reln",
#'                           "Intermediate progenitors"="Eomes",
#'                           "Projection neurons"=c("Tbr1","Satb2","Fezf2",
#'                                                  "Bcl11b","Tle4","Nes",
#'                                                  "Cux1","Cux2","Tubb3",
#'                                                  "Rbfox3","Dcx")
#'                           )
#' sCVdata <- labelCellTypes(sCVdata,
#'                           cellMarkers=cellMarkers,
#'                           symbolMap=NULL)
#' 
#' pdf("filepath.pdf",width=12,height=7)
#' plot_clusterGenes_markers(sCVd=sCVdata,
#'                           selClust="1",
#'                           cellMarkers=cellMarkers)
#' dev.off()
#' }
#'
#' @export

plot_clusterGenes_markers <- function(sCVd,selClust,cellMarkers) {
  if (length(cellMarkers) < 1) {
    cellMarkersS <- cellMarkersU <- list()
  } else {
    cellMarkersS <- apply(combn(seq_along(cellMarkers),2),2,
                          function(X) do.call(intersect,unname(cellMarkers[X])))
    try(names(cellMarkersS) <- apply(combn(seq_along(cellMarkers),2),2,
                                     function(X) paste(X,collapse="&")),silent=T)
    cellMarkersS <- cellMarkersS[sapply(cellMarkersS,length) > 0]
    cellMarkersU <- lapply(cellMarkers,function(X) X[!X %in% unlist(cellMarkersS)])
  }
  
  cellMarkCols <- colorspace::qualitative_hcl(length(cellMarkersU),palette="Dark 3")
  par(mar=c(3,3,3,20),mgp=2:0)
  if (selClust == "") {
    plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA)
    text(.5,.5,paste("Click a cell from a cluster on the tSNE plot above",
                     "or select a cluster from the drop-down list above left",
                     "to see gene expression for that cluster.",sep="\n"))
  } else {
    CGS <- ClustGeneStats(sCVd)[[selClust]]
    # if (nrow(CGS) > 1e4) {
      temp_pch <- "."
      temp_cex <- 3
    # } else {
    #   temp_pch <- 20
    #   temp_cex <- 1
    # }
    temp_ylab <- switch(as.character(Param(sCVd,"exponent") == exp(1)),
                        "TRUE"="(natural log scale)",
                        "FALSE"=paste0("(log",Param(sCVd,"exponent")," scale)"),
                        "NA"="(log2 scale)")
    if (is.null(attr(Clusters(sCVd),"ClusterColours"))) {
      temp_clustCol <- colorspace::qualitative_hcl(length(levels(Clusters(sCVd))),
                                                   palette="Dark 3")
    } else {
      temp_clustCol <- attr(Clusters(sCVd),"ClusterColours")
    }
    plot(MDGE~DR,
         data=CGS[!((CGS$cMu | CGS$cMs) & CGS$overCut),],
         xlim=range(CGS$DR),ylim=range(CGS$MDGE),
         col=alpha("black",0.2),pch=temp_pch,cex=temp_cex,
         xlab="Proportion of cells in which gene was detected",
         ylab=paste("Mean normalized gene expression where detected",temp_ylab))
    title(paste0("Cluster ", selClust,": ",attr(Clusters(sCVd),"ClusterNames")[selClust]),cex=1.2)
    mtext(paste("Cells:",sum(Clusters(sCVd) == selClust),
                "   Genes detected:",sum(CGS$DR > 0)),side=3,line=0,cex=0.9)
    box(col=temp_clustCol[selClust],lwd=2)
    for (x in which(CGS$cMu)) {
      TeachingDemos::my.symbols(x=CGS$DR[x],y=CGS$MDGE[x],
                                symb=singleDot,inches=0.1,
                                MoreArgs=list(
                                  col1=cellMarkCols[which(
                                    sapply(cellMarkersU,function(X) 
                                      CGS$genes[x] %in% X)
                                  )]
                                ))
    }
    for (x in which(CGS$cMs)) {
      temp <- unlist(strsplit(names(which(sapply(cellMarkersS,function(X) 
        CGS$genes[x] %in% X))),"&"))
      TeachingDemos::my.symbols(x=CGS$DR[x],
                                y=CGS$MDGE[x],
                                symb=doubleDot,inches=0.1,
                                MoreArgs=list(col1=cellMarkCols[as.integer(temp[1])],
                                              col2=cellMarkCols[as.integer(temp[2])]))
    }
    tempLabels <- spreadLabels2(CGS[(CGS$cMu | CGS$cMs) & CGS$overCut,"DR"],
                                CGS[(CGS$cMu | CGS$cMs) & CGS$overCut,"MDGE"],
                                CGS[(CGS$cMu | CGS$cMs) & CGS$overCut,"genes"],
                                str.cex=1.2,str.font=2)
    rownames(tempLabels) <- CGS[(CGS$cMu | CGS$cMs) & CGS$overCut,"genes"]                        
    for (gn in CGS[CGS$cMu & CGS$overCut,"genes"]) {
      rect(xleft=tempLabels[gn,1] - strwidth(gn,cex=1.2,font=2) * .5,
           xright=tempLabels[gn,1] + strwidth(gn,cex=1.2,font=2) * .5,
           ybottom=tempLabels[gn,2] - strheight(gn,cex=1.2,font=2) * .5,
           ytop=tempLabels[gn,2] + strheight(gn,cex=1.2,font=2) * .5,
           border=NA,col=alpha("white",0.5))
      text(tempLabels[gn,,drop=F],labels=gn,cex=1.2,font=2,
           col=cellMarkCols[which(sapply(cellMarkersU,function(X) 
             gn %in% X))])
    }
    for (gn in CGS[CGS$cMs & CGS$overCut,"genes"]) {
      rect(xleft=tempLabels[gn,1] - strwidth(gn,cex=1.2,font=2) * .5,
           xright=tempLabels[gn,1] + strwidth(gn,cex=1.2,font=2) * .5,
           ybottom=tempLabels[gn,2] - strheight(gn,cex=1.2,font=2) * .5,
           ytop=tempLabels[gn,2] + strheight(gn,cex=1.2,font=2) * .5,
           border=NA,col=alpha("white",0.5))
      text(tempLabels[gn,,drop=F],labels=gn,cex=1.2,font=2,
           col=cellMarkCols[as.integer(temp[2])])
    }
    legend(x=1.05,y=max(CGS$MDGE),xpd=NA,bty="n",ncol=1,
           pch=19,col=cellMarkCols,legend=names(cellMarkersU))
  }
}


#' scClustViz plot: Plot within-cluster gene expression highlighting DE genes
#'
#' This function makes a scatterplot of gene detection rate vs. mean detected
#' gene abundance, highlighting differentially expressed genes.
#'
#' @param sCVd The sCVdata object.
#' @param selClust A named character vector representing the cluster to be
#'   displayed. If \code{\link{labelCellTypes}} has been run, pass the
#'   appropriate element of \code{attr(Clusters(sCV),"ClusterNames")} to this
#'   argument to show both cluster number and cell type label in the legend.
#' @param DEgenes The output of \code{\link{dotplotDEgenes}}.
#' @param DEnum Single integer representing the maximum number of DE genes per
#'   cluster to include in the plot.
#' @param DEtype One of: \code{"DEvsRest"} - see \code{\link{DEvsRest}};
#'   \code{"DEneighb"} - see \code{\link{DEneighb}}; \code{"DEmarker"} - see
#'   \code{\link{DEmarker}}.
#'
#' @examples
#' \dontrun{
#' pdf("filepath.pdf",width=12,height=7)
#' plot_clusterGenes_DEgenes(sCVd=sCVdata,
#'                           selClust="1",
#'                           DEgenes=dotplotDEgenes(sCVdata,
#'                                                  DEtype="DEneighb",
#'                                                  FDRthresh=0.01),
#'                           DEnum=5,
#'                           DEtype="DEneighb")
#' dev.off()
#' }
#'
#' @export

plot_clusterGenes_DEgenes <- function(sCVd,selClust,DEgenes,DEnum,DEtype) {
  par(mar=c(3,3,3,20),mgp=2:0)
  if (selClust == "") {
    plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA)
    text(.5,.5,paste("Click a cell from a cluster on the tSNE plot above",
                     "or select a cluster from the drop-down list above left",
                     "to see gene expression for that cluster.",sep="\n"))
  } else {
    CGS <- ClustGeneStats(sCVd)[[selClust]]
    # if (nrow(CGS) > 1e4) {
    temp_pch <- "."
    temp_cex <- 3
    # } else {
    #   temp_pch <- 20
    #   temp_cex <- 1
    # }
    temp_ylab <- switch(as.character(Param(sCVd,"exponent") == exp(1)),
                        "TRUE"="(natural log scale)",
                        "FALSE"=paste0("(log",Param(sCVd,"exponent")," scale)"),
                        "NA"="(log2 scale)")
    if (is.null(attr(Clusters(sCVd),"ClusterColours"))) {
      temp_clustCol <- colorspace::qualitative_hcl(length(levels(Clusters(sCVd))),
                                                   palette="Dark 3")
    } else {
      temp_clustCol <- attr(Clusters(sCVd),"ClusterColours")
    }
    plot(MDGE~DR,
         data=CGS[!rownames(CGS) %in% names(DEgenes[[selClust]])[1:DEnum],],
         xlim=range(CGS$DR),ylim=range(CGS$MDGE),
         col=alpha("black",0.2),pch=temp_pch,cex=temp_cex,
         xlab="Proportion of cells in which gene was detected",
         ylab=paste("Mean normalized gene expression where detected",temp_ylab))
    title(paste0("Cluster ", selClust,": ",attr(Clusters(sCVd),"ClusterNames")[selClust]),cex=1.2)
    mtext(paste("Cells:",sum(Clusters(sCVd) == selClust),
                "   Genes detected:",sum(CGS$DR > 0)),side=3,line=0,cex=0.9)
    box(col=temp_clustCol[selClust],lwd=2)
    if (length(DEgenes[[selClust]]) > 0) {
      DEG <- names(DEgenes[[selClust]])[1:DEnum]
      DEG <- DEG[!is.na(DEG)]
      points(x=CGS[DEG,"DR"],y=CGS[DEG,"MDGE"],
             pch=16,cex=1.2,col="firebrick2")
      if (!"overCut" %in% names(CGS)) { CGS$overCut <- T }
      if (any(CGS[DEG,"overCut"])) {
        labelDF <- CGS[DEG,]
        labelDF <- labelDF[labelDF$overCut,]
        if (!"genes" %in% names(labelDF)) { labelDF$genes <- rownames(labelDF) }
        tempLabels <- spreadLabels2(x=labelDF$DR,y=labelDF$MDGE,
                                    label=labelDF$genes,
                                    str.cex=1.2,str.font=2)
        rect(xleft=tempLabels[,1] - 
               strwidth(labelDF$genes,cex=1.2,font=2) * .5,
             xright=tempLabels[,1] + 
               strwidth(labelDF$genes,cex=1.2,font=2) * .5,
             ybottom=tempLabels[,2] - 
               strheight(labelDF$genes,cex=1.2,font=2) * .5,
             ytop=tempLabels[,2] +
               strheight(labelDF$genes,cex=1.2,font=2) * .5,
             border=NA,col=alpha("white",0.5))
        text(tempLabels,cex=1.2,font=2,col="firebrick2",
             labels=labelDF$genes)
      }
    }
    temp_n <- length(DEgenes[[selClust]])
    temp_lab <- switch(DEtype,
                       DEvsRest=" DE genes vs rest of cells in sample",
                       DEmarker=" marker genes",
                       DEneighb=" DE genes vs nearest neighbouring cluster")
    legend("top",bty="n",pch=16,col="firebrick2",
           legend=paste0(temp_n,temp_lab," (showing top ",
                         min(temp_n,DEnum),")"))
    
  }
}


#' scClustViz plot: Plot within-cluster gene expression highlighting selected genes
#'
#' This function makes a scatterplot of gene detection rate vs. mean detected
#' gene abundance, highlighting specified genes.
#'
#' @param sCVd The sCVdata object.
#' @param selClust A named character vector representing the cluster to be
#'   displayed. If \code{\link{labelCellTypes}} has been run, pass the
#'   appropriate element of \code{attr(Clusters(sCV),"ClusterNames")} to this
#'   argument to show both cluster number and cell type label in the legend.
#' @param GOI A character vector of gene names to highlight.
#'
#' @examples
#' \dontrun{
#' pdf("filepath.pdf",width=12,height=7)
#' plot_clusterGenes_search(sCVd=sCVdata,
#'                          selClust="1",
#'                          GOI=c("Actb","Sox2"))
#' dev.off()
#' }
#'
#' @export

plot_clusterGenes_search <- function(sCVd,selClust,GOI) {
  par(mar=c(3,3,3,20),mgp=2:0)
  if (selClust == "") {
    plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA)
    text(.5,.5,paste("Click a cell from a cluster on the tSNE plot above",
                     "or select a cluster from the drop-down list above left",
                     "to see gene expression for that cluster.",sep="\n"))
  } else {
    CGS <- ClustGeneStats(sCVd)[[selClust]]
    # if (nrow(CGS) > 1e4) {
    temp_pch <- "."
    temp_cex <- 3
    # } else {
    #   temp_pch <- 20
    #   temp_cex <- 1
    # }
    if (!"genes" %in% names(CGS)) { CGS$genes <- rownames(CGS) }
    temp_ylab <- switch(as.character(Param(sCVd,"exponent") == exp(1)),
                        "TRUE"="(natural log scale)",
                        "FALSE"=paste0("(log",Param(sCVd,"exponent")," scale)"),
                        "NA"="(log2 scale)")
    if (is.null(attr(Clusters(sCVd),"ClusterColours"))) {
      temp_clustCol <- colorspace::qualitative_hcl(length(levels(Clusters(sCVd))),
                                                   palette="Dark 3")
    } else {
      temp_clustCol <- attr(Clusters(sCVd),"ClusterColours")
    }
    plot(MDGE~DR,
         data=CGS[!CGS$genes %in% GOI,],
         col=alpha("black",0.2),pch=temp_pch,cex=temp_cex,
         xlim=range(CGS$DR),ylim=range(CGS$MDGE),
         xlab="Proportion of cells in which gene was detected",
         ylab=paste("Mean normalized gene expression where detected",temp_ylab))
    title(paste0("Cluster ", selClust,": ",attr(Clusters(sCVd),"ClusterNames")[selClust]),cex=1.2)
    mtext(paste("Cells:",sum(Clusters(sCVd) == selClust),
                "   Genes detected:",sum(CGS$DR > 0)),side=3,line=0,cex=0.9)
    box(col=temp_clustCol[selClust],lwd=2)
    GOI <- GOI[GOI %in% CGS$genes]
    if (length(GOI) > 0) {
      points(x=CGS[GOI,"DR"],y=CGS[GOI,"MDGE"],
             pch=16,cex=1.2,col="firebrick2")
      
      labelDF <- CGS[GOI,]
      if (!"genes" %in% names(labelDF)) { labelDF$genes <- rownames(labelDF) }
      tempLabels <- spreadLabels2(x=labelDF$DR,y=labelDF$MDGE,
                                  label=labelDF$genes,
                                  str.cex=1.2,str.font=2)
      rect(xleft=tempLabels[,1] - 
             strwidth(labelDF$genes,cex=1.2,font=2) * .5,
           xright=tempLabels[,1] + 
             strwidth(labelDF$genes,cex=1.2,font=2) * .5,
           ybottom=tempLabels[,2] - 
             strheight(labelDF$genes,cex=1.2,font=2) * .5,
           ytop=tempLabels[,2] +
             strheight(labelDF$genes,cex=1.2,font=2) * .5,
           border=NA,col=alpha("white",0.5))
      text(tempLabels,cex=1.2,font=2,col="firebrick2",
           labels=labelDF$genes)
    }
  }
}


# Gene search function -------
geneSearch <- function(txt,st,CGS) {
  if (length(txt) < 1) { txt <- ""}
  geneNames <- rownames(CGS)
  names(geneNames) <- toupper(CGS$genes)
  temp <- switch(st,
                 comma={
                   temp_in <- strsplit(txt,split="[\\s,]",perl=T)[[1]]
                   temp_out <- geneNames[toupper(temp_in)]
                   names(temp_out) <- CGS[temp_out,"genes"]
                   temp_out
                 },
                 regex={
                   temp_in <- grep(txt,names(geneNames),ignore.case=T)
                   temp_out <- geneNames[temp_in]
                   names(temp_out) <- CGS[temp_out,"genes"]
                   temp_out
                 })
  temp <- temp[!is.na(temp)]
  if (length(temp) > 0) {
    return(temp)
  } else {
    return(switch(st,
                  comma={
                    temp_in <- strsplit(txt,split="[\\s,]",perl=T)[[1]]
                    return(geneNames[which(toupper(geneNames) %in% toupper(temp_in))])
                  },
                  regex=grep(txt,geneNames,value=T,ignore.case=T)))
  }
}


# Gene expression boxplots --------

#' scClustViz plot: Compare gene expression across clusters
#'
#' This function generates boxplots comparing normalized gene abundance across
#' all clusters.
#'
#' @param nge The gene expression matrix, see \code{\link{getExprs}}.
#' @param sCVd The sCVdata object.
#' @param gene The gene to display.
#' @param geneName Optional. A named character vector of length one. The element
#'   is the full gene name, and the name is the gene symbol.
#' @param opts Default=\code{c("sct","dr")}. A character vector with plotting
#'   options. If it includes \code{"sct"}, data points will be overlaid as a
#'   jitter over the boxplot. If it includes \code{"dr"}, detection rate per
#'   cluster will be plotted as a small black bar over each boxplot, with the
#'   corresponding axis on the right.
#'
#' @examples
#' \dontrun{
#' plot_GEboxplot(getExpr(input_data_obj),
#'                sCVd=sCVdata,
#'                gene="Actb")
#' }
#'
#' @export

plot_GEboxplot <- function(nge,sCVd,gene,geneName,opts=c("sct","dr")) {
  if (gene == "") {
    plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA)
    text(.5,.5,paste("Select a gene by either clicking on the plot above",
                     "or searching for genes of interest in the search bar above,",
                     "then pick the gene from the list just above this figure",
                     "to see a comparison of that gene's expression across all clusters.",
                     sep="\n"))
  } else {
    # ^ setup -----
    hC <- hclust(as.dist(DEdist(sCVd)),"single")
    temp_ylab <- switch(as.character(Param(sCVd,"exponent") == exp(1)),
                        "TRUE"="(natural log scale)",
                        "FALSE"=paste0("(log",Param(sCVd,"exponent")," scale)"),
                        "NA"="(log2 scale)")
    temp_pos <- switch(as.character(length(levels(Clusters(sCVd))) > 1),
                       "TRUE"=hC$order,"FALSE"=1)
    if (is.null(attr(Clusters(sCVd),"ClusterColours"))) {
      clustCol <- colorspace::qualitative_hcl(length(levels(Clusters(sCVd))),
                                              palette="Dark 3")
    } else {
      clustCol <- attr(Clusters(sCVd),"ClusterColours")
    }
    if ("sct" %in% opts) {
      bxpCol <- alpha(clustCol,alpha=.2)
    } else {
      bxpCol <- alpha(clustCol,alpha=.8)
    }
    
    # ^ plot boxplot -----
    temp_par <- par(no.readonly=T)
    layout(matrix(2:1,nrow=2),heights=c(1,4))
    par(mar=c(3,3,0,3),mgp=2:0)
    suppressWarnings(boxplot(
      vector("list",length(levels(Clusters(sCVd))[levels(Clusters(sCVd)) != "Unselected"])),
      ylim=switch(any(is.na(c(Param(sCVd,"exponent"),Param(sCVd,"pseudocount")))) + 1,
                  range(nge[gene,]),
                  range(log2(nge[gene,] + 1))),
      xaxt="n",xlab=NA,
      ylab=paste(gene,"normalized gene expression",temp_ylab)
    ))
    mtext(levels(Clusters(sCVd))[temp_pos],side=1,line=0,at=seq_along(temp_pos))
    mtext("Clusters, ordered by heatmap dendrogram",side=1,line=1)
    if (missing(geneName)) { geneName <- NULL }
    if (is.null(geneName)) { 
      mtext(paste(gene,collapse="\n"),side=1,line=2,font=2) 
    } else {
      mtext(paste(paste0(names(geneName),": ",geneName),collapse="\n"),
            side=1,line=2,font=2) 
    }
    for (i in temp_pos) {
      if ("sct" %in% opts) {
        points(x=jitter(rep(which(temp_pos == i),
                          sum(Clusters(sCVd) %in% levels(Clusters(sCVd))[i])),
                      amount=.2),
               y=switch(any(is.na(c(Param(sCVd,"exponent"),Param(sCVd,"pseudocount")))) + 1,
                        nge[gene,Clusters(sCVd) %in% levels(Clusters(sCVd))[i]],
                        log2(nge[gene,Clusters(sCVd) %in% levels(Clusters(sCVd))[i]] + 1)),
               pch=".",cex=3,
               col=alpha(clustCol,alpha=.4)[i])
      }
      boxplot(switch(any(is.na(c(Param(sCVd,"exponent"),Param(sCVd,"pseudocount")))) + 1,
                     nge[gene,Clusters(sCVd) %in% levels(Clusters(sCVd))[i]],
                     log2(nge[gene,Clusters(sCVd) %in% levels(Clusters(sCVd))[i]] + 1)),
              add=T,at=which(temp_pos == i),col=bxpCol[i],outline=F)
    }
    if ("dr" %in% opts) {
      points(x=seq_along(ClustGeneStats(sCVd)),
             y=sapply(ClustGeneStats(sCVd)[temp_pos],function(X) X[gene,"DR"]) * 
               switch(any(is.na(c(Param(sCVd,"exponent"),Param(sCVd,"pseudocount")))) + 1,
                      max(nge[gene,]) + min(nge[gene,]),
                      log2(max(nge[gene,]) + 1) + log2(min(nge[gene,]) + 1)),
             pch="-",cex=2)
      axis(at=seq(0,1,.25) * 
             switch(any(is.na(c(Param(sCVd,"exponent"),Param(sCVd,"pseudocount")))) + 1,
                    max(nge[gene,]) + min(nge[gene,]),
                    log2(max(nge[gene,]) + 1) + log2(min(nge[gene,]) + 1)),
           side=4,labels=paste0(seq(0,1,.25) * 100,"%"))
      mtext(side=4,line=2,
            text=expression("Gene detection rate per cluster ( "~bold("-")~")"))
    }
    if (length(temp_pos) > 1) { 
      par(new=F,mar=c(0,3,1,3))
      plot(as.dendrogram(hC),leaflab="none") 
    }
    par(temp_par)
  }
}


# Cluster comparisons --------
compareClusts_DF <- function(sCVd,clA,clB,dataType) {
  if (dataType %in% c("MGE","MDGE","DR")) {
    loc1 <- c(paste(clA,clB,sep="-"),paste(clB,clA,sep="-"))
    loc <- loc1[loc1 %in% names(DEcombn(sCVd))]
    loc1 <- which(loc1 %in% names(DEcombn(sCVd)))
    if (loc1 == 2) { loc1 <- -1 }
    if ("Wstat" %in% colnames(DEcombn(sCVd)[[loc]])) {
      tempW <- DEcombn(sCVd)[[loc]]$Wstat - 
        DEcombn(sCVd)[[loc]]$Wstat[which.max(DEcombn(sCVd)[[loc]]$pVal)]
    } else {
      tempW <- DEcombn(sCVd)[[loc]]$logGER
    }
    temp <- data.frame(x_diff=ClustGeneStats(sCVd)[[clA]][,dataType] - 
                         ClustGeneStats(sCVd)[[clB]][,dataType],
                       y_mean=rowMeans(cbind(ClustGeneStats(sCVd)[[clA]][,dataType],
                                             ClustGeneStats(sCVd)[[clB]][,dataType])),
                       logGER=NA,FDR=NA,dir=NA)
    rownames(temp) <- rownames(ClustGeneStats(sCVd)[[clA]])
    temp[rownames(DEcombn(sCVd)[[loc]]),"logGER"] <- DEcombn(sCVd)[[loc]]$logGER
    temp[rownames(DEcombn(sCVd)[[loc]]),"FDR"] <- DEcombn(sCVd)[[loc]]$FDR
    temp[rownames(DEcombn(sCVd)[[loc]]),"dir"] <- c(clB,clA)[(tempW * loc1 > 0) + 1]
    return(temp)
  } else if (dataType %in% c("GERvDDR","logGER","dDR")) {
    loc1 <- which(c(paste(clA,clB,sep="-"),paste(clB,clA,sep="-")) %in% names(DEcombn(sCVd)))
    if (loc1 == 2) { loc1 <- -1 }
    loc <- which(names(DEcombn(sCVd)) %in% c(paste(clA,clB,sep="-"),paste(clB,clA,sep="-")))
    temp <- DEcombn(sCVd)[[loc]][,c("logGER","dDR","FDR")]
    temp <- as.data.frame(mapply("*",temp,c(loc1,loc1,1))) 
    rownames(temp) <- rownames(DEcombn(sCVd)[[loc]])
    if ("Wstat" %in% colnames(DEcombn(sCVd)[[loc]])) {
      tempW <- DEcombn(sCVd)[[loc]]$Wstat - 
        DEcombn(sCVd)[[loc]]$Wstat[which.max(DEcombn(sCVd)[[loc]]$pVal)]
    } else {
      tempW <- DEcombn(sCVd)[[loc]]$logGER
    }
    temp$dir <- c(clB,clA)[(tempW * loc1 > 0) + 1]
    return(temp)
  } 
}

plot_compareClusts_MAplot <- function(sCVd,clA,clB,dataType,labType,labNum,labGenes) {
  # ^ setup -----
  CGS <- compareClusts_DF(sCVd,clA,clB,dataType)
  temp_exp <- switch(as.character(Param(sCVd,"exponent") == exp(1)),
                     "TRUE"="(natural log scale)",
                     "FALSE"=paste0("(log",Param(sCVd,"exponent")," scale)"),
                     "NA"="(log2 scale)")
  temp_label <- switch(dataType,
                       "MGE"=paste("mean normalized gene expression",temp_exp),
                       "MDGE"=paste("mean normalized gene expression where detected",temp_exp),
                       "DR"="proportion of cells in which gene was detected")
  if (labType == "diff") {
    gnA <- rownames(head(CGS[order(CGS$x_diff,decreasing=T),],labNum))
    gnB <- rownames(tail(CGS[order(CGS$x_diff,decreasing=T),],labNum))
  } else if (labType == "de") {
    ts <- order(CGS$FDR,na.last=T)
    gnA <- rownames(CGS)[ts[CGS[ts,"dir"] == clA][1:labNum]]
    gnB <- rownames(CGS)[ts[CGS[ts,"dir"] == clB][1:labNum]]
  }
  if (is.null(attr(Clusters(sCVd),"ClusterColours"))) {
    clustCols <- colorspace::qualitative_hcl(length(levels(Clusters(sCVd))),
                                        palette="Dark 3")
  } else {
    clustCols <- attr(Clusters(sCVd),"ClusterColours")
  }
  
  # ^ plot -----
  par(mar=c(3,3,3.5,1),mgp=c(2,1,0))
  plot(y_mean~x_diff,data=CGS,
       xlab=paste0("Difference in ",temp_label," (",clA," - ",clB,")"),
       ylab=paste0("Average of ",temp_label," between ",clA," & ",clB),
       main=paste0("Modified MA plot of ",
                   switch(dataType,
                          "MGE"="mean gene expression",
                          "MDGE"="mean detected gene expression",
                          "DR"="detection rate"),
                   " (",clA," vs. ",clB,")"),
       pch=20,col=alpha("black",0.3))
  abline(v=0,col="gray50")
  lines(x=c(par("usr")[2],par("usr")[2]),y=c(par("usr")[3],par("usr")[4]),lwd=2,xpd=NA,
        col=clustCols[which(levels(Clusters(sCVd)) == clA)])
  lines(x=c(par("usr")[1],par("usr")[1]),y=c(par("usr")[3],par("usr")[4]),xpd=NA,lwd=2,
        col=clustCols[which(levels(Clusters(sCVd)) == clB)])
  if (labType == "search") {
    if (length(labGenes) > 0) {
      labGenes <- labGenes[labGenes %in% rownames(CGS)]
      points(y_mean~x_diff,data=CGS[labGenes,],pch=16,col=alpha("firebrick2",0.8))
      tempLabel <- spreadLabels2(x=CGS[labGenes,"x_diff"],y=CGS[labGenes,"y_mean"],
                                 label=labGenes,str.cex=1.2,str.font=2)
      text(tempLabel,labels=labGenes,col="firebrick2",cex=1.2,font=2)
    }
  } else {
    points(y_mean~x_diff,data=CGS[gnA,],pch=16,
           col=alpha(clustCols,alpha=.8)[which(levels(Clusters(sCVd)) == clA)])
    points(y_mean~x_diff,data=CGS[gnB,],pch=16,
           col=alpha(clustCols,alpha=.8)[which(levels(Clusters(sCVd)) == clB)])
    tempLabel <- spreadLabels2(x=CGS[c(gnA,gnB),"x_diff"],y=CGS[c(gnA,gnB),"y_mean"],
                               label=c(gnA,gnB),str.cex=1.2,str.font=2)
    rownames(tempLabel) <- c(gnA,gnB)
    text(tempLabel[gnA,],labels=gnA,cex=1.2,font=2,
         col=clustCols[which(levels(Clusters(sCVd)) == clA)])
    text(tempLabel[gnB,],labels=gnB,cex=1.2,font=2,
         col=clustCols[which(levels(Clusters(sCVd)) == clB)])
  }
  mtext(paste("Higher in",clA),side=1,line=-1.1,adj=.99,font=2,
        col=clustCols[which(levels(Clusters(sCVd)) == clA)])
  mtext(paste("Higher in",clB),side=1,line=-1.1,adj=0.01,font=2,
        col=clustCols[which(levels(Clusters(sCVd)) == clB)])
  mtext(paste(
    paste("Cosine similarity of comparison:",
          round(cosineSim(ClustGeneStats(sCVd)[[clA]][,dataType], 
                          ClustGeneStats(sCVd)[[clB]][,dataType]),2)),
    paste("Spearman's Rho of comparison:",
          round(cor(x=ClustGeneStats(sCVd)[[clA]][,dataType], 
                    y=ClustGeneStats(sCVd)[[clB]][,dataType],
                    method="spearman"),2)),
    sep="  -  "),
    side=3)
} 

plot_compareClusts_DEscatter <- function(sCVd,clA,clB,dataType,labType,
                                         labTypeDiff,labNum,labGenes) {
  # ^ setup -----
  CGS <- compareClusts_DF(sCVd,clA,clB,dataType)
  temp_exp <- switch(as.character(Param(sCVd,"exponent") == exp(1)),
                     "TRUE"="(natural log scale)",
                     "FALSE"=paste0("(log",Param(sCVd,"exponent")," scale)"),
                     "NA"="(log2 scale)")
  if (labType == "diff") {
    gnA <- rownames(head(CGS[order(CGS[[labTypeDiff]],decreasing=T),],labNum))
    gnB <- rownames(tail(CGS[order(CGS[[labTypeDiff]],decreasing=T),],labNum))
  } else if (labType == "de") {
    ts <- order(CGS$FDR,na.last=T)
    gnA <- rownames(CGS)[ts[CGS[ts,"dir"] == clA][1:labNum]]
    gnB <- rownames(CGS)[ts[CGS[ts,"dir"] == clB][1:labNum]]
  }
  # Adding a colour scale for FDR
  # CGS <- CGS[order(CGS$FDR,decreasing=T,na.last=F),]
  # temp_col <- colorspace::sequential_hcl(100,palette="Viridis",alpha=0.3,rev=T)[cut(-log10(CGS$FDR),100,labels=F)]
  # temp_col[is.na(temp_col)] <- alpha("grey90",0.3)
  if (is.null(attr(Clusters(sCVd),"ClusterColours"))) {
    clustCols <- colorspace::qualitative_hcl(length(levels(Clusters(sCVd))),
                                             palette="Dark 3")
  } else {
    clustCols <- attr(Clusters(sCVd),"ClusterColours")
  }
  
  # ^ plot -----
  par(mar=c(3,3,3.5,1),mgp=c(2,1,0))
  plot(logGER~dDR,data=CGS,
       xlab=paste0("Difference in detection rate (",clA," - ",clB,")"),
       ylab=paste0("Gene expression ratio (",clA," : ",clB,") ",temp_exp),
       main=paste0("Expression difference effect sizes (",clA," vs. ",clB,")"),
       pch=20,col=alpha("black",0.3)) # col=temp_col)
  abline(v=0,h=0,col="gray50")
  lines(x=c(par("usr")[2],par("usr")[2]),y=c(0,par("usr")[4]),lwd=2,xpd=NA,
        col=clustCols[which(levels(Clusters(sCVd)) == clA)])
  lines(x=c(0,par("usr")[2]),y=c(par("usr")[4],par("usr")[4]),lwd=2,xpd=NA,
        col=clustCols[which(levels(Clusters(sCVd)) == clA)])
  lines(x=c(par("usr")[1],par("usr")[1]),y=c(par("usr")[3],0),xpd=NA,lwd=2,
        col=clustCols[which(levels(Clusters(sCVd)) == clB)])
  lines(x=c(par("usr")[1],0),y=c(par("usr")[3],par("usr")[3]),xpd=NA,lwd=2,
        col=clustCols[which(levels(Clusters(sCVd)) == clB)])
  if (labType == "search") {
    if (length(labGenes) > 0) {
      labGenes <- labGenes[labGenes %in% rownames(CGS)]
      points(logGER~dDR,data=CGS[labGenes,],pch=16,col=alpha("firebrick2",0.8))
      tempLabel <- spreadLabels2(CGS[labGenes,"dDR"],CGS[labGenes,"logGER"],
                                 label=labGenes,str.cex=1.2,str.font=2)
      text(tempLabel,labels=labGenes,col="firebrick2",cex=1.2,font=2)
    }
  } else {
    points(logGER~dDR,data=CGS[gnA,],pch=16,
           col=alpha(clustCols,alpha=.8)[which(levels(Clusters(sCVd)) == clA)])
    points(logGER~dDR,data=CGS[gnB,],pch=16,
           col=alpha(clustCols,alpha=.8)[which(levels(Clusters(sCVd)) == clB)])
    tempLabel <- spreadLabels2(CGS[c(gnA,gnB),"dDR"],CGS[c(gnA,gnB),"logGER"],
                               label=c(gnA,gnB),str.cex=1.2,str.font=2)
    rownames(tempLabel) <- c(gnA,gnB)
    text(tempLabel[gnA,],labels=gnA,cex=1.2,font=2,
         col=clustCols[which(levels(Clusters(sCVd)) == clA)])
    text(tempLabel[gnB,],labels=gnB,cex=1.2,font=2,
         col=clustCols[which(levels(Clusters(sCVd)) == clB)])
  }
  mtext(paste("Higher in",clA),side=3,line=-1.1,adj=.99,font=2,
        col=clustCols[which(levels(Clusters(sCVd)) == clA)])
  mtext(paste("Higher in",clB),side=1,line=-1.1,adj=0.01,font=2,
        col=clustCols[which(levels(Clusters(sCVd)) == clB)])
}

plot_compareClusts_volcano <- function(sCVd,clA,clB,dataType,labType,labNum,labGenes) {
  # ^ setup -----
  CGS <- compareClusts_DF(sCVd,clA,clB,dataType)
  CGS <- CGS[!is.na(CGS$FDR),]
  CGS$FDR <- -log10(CGS$FDR)
  CGS$FDR[CGS$FDR == Inf] <- 300 # Min positive non-zero value is 1e-300
  temp_exp <- switch(as.character(Param(sCVd,"exponent") == exp(1)),
                     "TRUE"="(natural log scale)",
                     "FALSE"=paste0("(log",Param(sCVd,"exponent")," scale)"),
                     "NA"="(log2 scale)")
  if (labType == "diff") {
    gnA <- rownames(head(CGS[order(CGS[[dataType]],decreasing=T),],labNum))
    gnB <- rownames(tail(CGS[order(CGS[[dataType]],decreasing=T),],labNum))
  } else if (labType == "de") {
    ts <- order(CGS$FDR,decreasing=T,na.last=T)
    gnA <- rownames(CGS)[ts[CGS[ts,"dir"] == clA][1:labNum]]
    gnB <- rownames(CGS)[ts[CGS[ts,"dir"] == clB][1:labNum]]
  }
  if (is.null(attr(Clusters(sCVd),"ClusterColours"))) {
    clustCols <- colorspace::qualitative_hcl(length(levels(Clusters(sCVd))),
                                             palette="Dark 3")
  } else {
    clustCols <- attr(Clusters(sCVd),"ClusterColours")
  }
  
  # ^ plot -----
  par(mar=c(3,3,3.5,1),mgp=c(2,1,0))
  plot(x=CGS[[dataType]],y=CGS$FDR,
       xlab=switch(dataType,
                   dDR=paste0("Difference in detection rate (",clA," - ",clB,")"),
                   logGER=paste0("Gene expression ratio (",clA," : ",clB,") ",temp_exp)),
       ylab="-log10 FDR-adjusted p-value",
       main=paste0("Volcano plot of differentially expressed genes (",clA," vs. ",clB,")"),
       pch=20,col=alpha("black",0.3))
  abline(v=0,col="gray50")
  lines(x=c(par("usr")[2],par("usr")[2]),y=c(par("usr")[3],par("usr")[4]),lwd=2,xpd=NA,
        col=clustCols[which(levels(Clusters(sCVd)) == clA)])
  lines(x=c(par("usr")[1],par("usr")[1]),y=c(par("usr")[3],par("usr")[4]),xpd=NA,lwd=2,
        col=clustCols[which(levels(Clusters(sCVd)) == clB)])
  if (labType == "search") {
    if (length(labGenes) > 0) {
      labGenes <- labGenes[labGenes %in% rownames(CGS)]
      points(x=CGS[labGenes,dataType],y=CGS[labGenes,"FDR"],
             pch=16,col=alpha("firebrick2",0.8))
      tempLabel <- spreadLabels2(x=CGS[labGenes,dataType],y=CGS[labGenes,"FDR"],
                                 label=labGenes,str.cex=1.2,str.font=2)
      text(tempLabel,labels=labGenes,col="firebrick2",cex=1.2,font=2)
    }
  } else {
    points(CGS[gnA,dataType],y=CGS[gnA,"FDR"],pch=16,
           col=alpha(clustCols,alpha=.8)[which(levels(Clusters(sCVd)) == clA)])
    points(CGS[gnB,dataType],y=CGS[gnB,"FDR"],pch=16,
           col=alpha(clustCols,alpha=.8)[which(levels(Clusters(sCVd)) == clB)])
    tempLabel <- spreadLabels2(CGS[c(gnA,gnB),dataType],CGS[c(gnA,gnB),"FDR"],
                               label=c(gnA,gnB),str.cex=1.2,str.font=2)
    rownames(tempLabel) <- c(gnA,gnB)
    text(tempLabel[gnA,],labels=gnA,cex=1.2,font=2,
         col=clustCols[which(levels(Clusters(sCVd)) == clA)])
    text(tempLabel[gnB,],labels=gnB,cex=1.2,font=2,
         col=clustCols[which(levels(Clusters(sCVd)) == clB)])
  }
  mtext(paste("Higher in",clA),side=1,line=-1.1,adj=.99,font=2,
        col=clustCols[which(levels(Clusters(sCVd)) == clA)])
  mtext(paste("Higher in",clB),side=1,line=-1.1,adj=0.01,font=2,
        col=clustCols[which(levels(Clusters(sCVd)) == clB)])
}


#' scClustViz plot: Volcano and MA-style plots to compare clusters
#'
#' This function generates scatterplots inspired by volcano and MA plots for
#' comparing gene expression between pairs of clusters.
#'
#' @param sCVd The sCVdata object.
#' @param clA Cluster identifier for side A of the comparison.
#' @param clB Cluster identifier for side B of the comparison.
#' @param dataType For MA-style plots comparing difference and mean of gene
#'   summary statistics, one of: \code{"DR"} (detection rate); \code{"MGE"}
#'   (mean gene expression); \code{"MDGE"} (mean detected gene expression). For
#'   volcano plots, the effect size measure can be one of: \code{"dDR"}
#'   (difference in detection rate); \code{"logGER"} (log gene expression
#'   ratio). To compare relationship between difference in detection rate and
#'   log gene expression ratio, use \code{"GERvDDR"}.
#' @param labType Default="de". A character vector indicating which genes to
#'   highlight. One of \code{"de"} (most statistically significant genes),
#'   \code{"diff"} (most different by dataType shown), or \code{"search"}
#'   (specified genes).
#' @param labGenes Only required if \code{labType="search"}. Gene names to
#'   highlight.
#' @param labNum Default=5. Number of genes to highlight per side.
#' @param labTypeDiff Default="logGER". Only required if
#'   \code{dataType="GERvDDR"} and \code{labType="diff"}. Which axis to use for
#'   difference calculation. One of \code{"dDR"} (difference in detection rate)
#'   or \code{"logGER"} (log gene expression ratio).
#'
#' @examples
#' \dontrun{
#' plot_compareClusts(sCVdata,
#'                    clA="1",
#'                    clB="2",
#'                    dataType="GERvDDR",
#'                    labType="search",
#'                    labGenes="Actb")
#' }
#'
#' @export

plot_compareClusts <- function(sCVd,clA,clB,dataType,
                               labType="de",labGenes,
                               labNum=5,labTypeDiff="logGER") {
  if (clA %in% levels(Clusters(sCVd)) & 
      clB %in% levels(Clusters(sCVd))) {
    if (dataType %in% c("MGE","MDGE","DR")) {
      plot_compareClusts_MAplot(sCVd,clA,clB,dataType,labType,labNum,labGenes)
    } else if (dataType == "GERvDDR") {
      plot_compareClusts_DEscatter(sCVd,clA,clB,dataType,labType,labTypeDiff,labNum,labGenes)
    } else if (dataType %in% c("logGER","dDR")) {
      plot_compareClusts_volcano(sCVd,clA,clB,dataType,labType,labNum,labGenes)
    }
  } else {
    plot(x=NA,y=NA,xlim=0:1,ylim=0:1,xaxt="n",yaxt="n",xlab=NA,ylab=NA)
    text(.5,.5,paste("Select two clusters to compare in this plot",
                     "using the pulldown menus on the right.",
                     "(Cluster A & Cluster B)",sep="\n"))
  }
}
BaderLab/scClustViz documentation built on Sept. 10, 2023, 11:51 p.m.