R/plot.R
In grandR: Comprehensive Analysis of Nucleotide Conversion Sequencing Data

Documented in density2d FormatCorrelation PlotHeatmap PlotPCA PlotScatter Transform.logFC Transform.no Transform.VST Transform.Z

#' Density estimation in 2d
#'
#' Estimate point densities on a regular grid for.
#'
#' @param x x coordinates
#' @param y y coordinates
#' @param facet factor: estimate for each unique factor; can be NULL
#' @param n size of the grid
#' @param margin one of 'n','x' or 'y'; should the density be computed along both axes ('n'), or along 'x' or 'y' axis only
#'
#' @return a density value for each point
#' @export
#'
#' @concept helper
density2d=function(x, y, facet=NULL, n=100, margin='n') {
  bandwidth.nrd.ex=function (x)
  {
    r <- range(x)
    h <- (r[2L] - r[1L])/1.34
    4 * 1.06 * min(sqrt(var(x)), h) * length(x)^(-1/5)
  }

  if (is.null(facet)) {
        r=rep(NA,length(x))
        use=is.finite(x+y)
        if (sum(use)==0) return(r)
        if (min(x[use])==max(x[use])) {
          x[use]=0:1
        }
        if (min(y[use])==max(y[use])) {
          y[use]=0:1
        }
        bw.x=MASS::bandwidth.nrd(x[use])
        if (bw.x==0) bw.x=bandwidth.nrd.ex(x[use])
        bw.y=MASS::bandwidth.nrd(y[use])
        if (bw.y==0) bw.y=bandwidth.nrd.ex(y[use])
        d=MASS::kde2d(x[use], y[use], h=c(bw.x,bw.y), n=n)
        if (margin=='x') d$z=d$z/apply(d$z,1,max)
        else if (margin=='y') d$z=t(t(d$z)/apply(d$z,2,max))
        else d$z=d$z/max(d$z)
        r[use]=d$z[cbind(findInterval(x[use], d$x),findInterval(y[use], d$y))]
	r=r/max(r,na.rm=T)
        return(r)
    }
    re<-rep(NA,length(x))
    for (f in unique(facet)) re[f==facet]=density2d(x[f==facet],y[f==facet],n=n)
    re
}


#' Make a PCA plot
#' @param data the grandR object that contains the data to plot
#' @param mode.slot the mode and slot of data to plot; slot in the grandr object (eg "count")
#' @param ntop how many genes to use
#' @param aest parameter to set the visual attributes
#' @param x number of principal component to show on the x axis (numeric)
#' @param y number of principal component to show on the y axis (numeric)
#' @param columns which columns (i.e. samples or cells) to perform PCA on (see details)
#' @param do.vst perform a variance stabilizing transformation for count data?
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a PCA plot
#' @export
#'
#' @concept globalplot
PlotPCA=function(data, mode.slot=DefaultSlot(data), ntop=500,aest=NULL,x=1,y=2,columns=NULL,do.vst=TRUE) {

  aest=setup.default.aes(data,aest)

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  mat=as.matrix(GetTable(data,type=mode.slot,columns=columns,ntr.na = FALSE))

	cd=do.call("rbind",lapply(1:length(mode.slot), function(i) cbind(data$coldata,data.frame(type=mode.slot[i]))))


	mat=mat[,columns]
	cd=cd[columns,]

	rm.na=!apply(is.na(mat),2,sum)==nrow(mat)
	mat=mat[,rm.na]
	cd=cd[rm.na,]
	if (do.vst) {
	  checkPackages("DESeq2")
	  mat <- DESeq2::vst(cnt(mat))
	}

	checkPackages("matrixStats")
	rv <- matrixStats::rowVars(mat)

  	select <- order(rv, decreasing=TRUE)[seq_len(min(ntop, length(rv)))]
  	pca <- prcomp(t(mat[select,]))
	percentVar <- pca$sdev^2 / sum( pca$sdev^2 )
	d <- as.data.frame(pca$x)
	names(d)=paste0("PC",1:dim(d)[2])
	d=cbind(d, cd)
	ggplot(d,utils::modifyList(aes(!!sym(paste0("PC",x)),!!sym(paste0("PC",y))),aest))+cowplot::theme_cowplot()+
	  geom_point(size=3)+xlab(paste0("PC",x,": ",round(percentVar[x] * 100),"% variance"))+ylab(paste0("PC",y,": ",round(percentVar[y] * 100),"% variance"))+coord_fixed()
}

Transform=function(name,label=NULL){
  FUN=get(paste0("Transform.",name))()
  function(mat) {
    re=FUN(mat)
    if (!is.null(label)) attr(re,"label")=label
    re
  }
}

#' Transformations for PlotHeatmap
#'
#' Functions to perform transformations on the matrix used for \link{PlotHeatmap}.
#'
#' @param label label that is used for the heatmap legend
#' @param center perform centering when computing Z scores (see \link{scale})
#' @param scale perform scaling when computing Z scores (see \link{scale})
#' @param LFC.fun function to compute log fold changes (default: \link[lfc]{PsiLFC}, other viable option: \link[lfc]{NormLFC})
#' @param columns which columns (i.e. samples or cells) to use as reference when computing log fold changes (see details)
#' @param ... further parameters passed down to LFC.fun
#'
#' @details These functions should be used as transform parameter to \link{PlotHeatmap}. Available data transformations are
#' \itemize{
#'  \item{transform=Transform.Z(): compute z scores for each row; you can omit the usual centering or scaling by setting the respective parameters to false; see \link{scale}}
#'  \item{transform=Transform.VST(): do a variance stabilizing transformation using \link[DESeq2]{vst}}
#'  \item{transform=Transform.logFC(): compute log2 fold changes to one or several reference columns; see below how to define them; fold changes are computed using the lfc package)}
#'  \item{transform=Transform.no(): do not transform}
#' }
#'
#' @details The label to be used in the heatmap legend can be changed by specifying the label parameter.
#'
#' @details For Transform.logFC, columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#'
#' @return A function that transforms a matrix.
#'
#' @export
#'
#' @concept globalplot
Transform.no=function(label=" ") function(m) {re=m; attr(re,"label")=label; re}
#' @rdname Transform.no
#' @export
Transform.Z=function(label="z score",center=TRUE,scale=TRUE) function(m) {re=t(scale(t(m),center=center,scale=scale)); attr(re,"label")=label; re}
#' @rdname Transform.no
#' @export
Transform.VST=function(label="VST") function(m) {
  if (nrow(m)<1000) re = DESeq2::varianceStabilizingTransformation(cnt(m)) else re=DESeq2::vst(cnt(m))
  attr(re,"label")=label
  re
  }
#' @rdname Transform.no
#' @export
Transform.logFC=function(label="log2 FC",LFC.fun=NULL,columns=NULL,...) {
  function(m) {
    if (is.null(columns)) columns=1:ncol(m)
    ref=rowMeans(m[,columns,drop=F])
    if (is.null(LFC.fun)) LFC.fun=lfc::PsiLFC
    re=apply(m,2,function(v) LFC.fun(v,ref,normalizeFun=function(vv) vv))
    attr(re,"label")=label
    re
  }
}


make.continuous.colors=function(values,colors=NULL,breaks=NULL) {
  if (quantile(values,0.25,na.rm=TRUE)<0) {
    quant=c(50,95) #c(seq(0,1,length.out=nq+1)[c(-1,-nq-1)]*100,95)
    if (identical(breaks,"minmax")) {
      ll = max(c(values,-values))
      breaks = seq(-ll,ll,length.out = 5)
    } else if (length(breaks)==1) {
      quant=c(seq(0,1,length.out=breaks+1)[c(-1,-breaks-1)]*100,95)
      breaks=NULL
    }
    if (is.null(breaks)) {
      upper=quantile(values[values>0],quant/100,na.rm=TRUE)
      lower=quantile(-values[values<0],quant/100,na.rm=TRUE)
      breaks=c(-rev(pmax(upper,lower)),0,pmax(upper,lower))
    }
    if (is.null(colors)) colors="RdBu"
  } else {
    quant=c(5,25,50,75,95)
    if (identical(breaks,"minmax")) {
        breaks = seq(min(values),max(values),length.out = 5)
    } else if (length(breaks)==1) {
        quant=c(5,seq(0,1,length.out=breaks)[c(-1,-breaks)]*100,95)
        breaks=NULL
    }
    if (is.null(breaks)) {
      breaks=quantile(values,quant/100,na.rm=TRUE)
    }
    if (is.null(colors)) colors="YlOrRd"
  }

  if (length(colors)==1)  {
    checkPackages(c("RColorBrewer","viridisLite"))
    rev = FALSE
    if (substr(colors,1,3)=='rev') {
      rev = TRUE
      colors = substr(colors,4,nchar(colors))
    }
    if (colors %in% rownames(RColorBrewer::brewer.pal.info)) {
      colors = RColorBrewer::brewer.pal(length(breaks),colors)
    } else  {
      colors = viridisLite::viridis(length(breaks),option = colors)
    }
    if (rev) colors=rev(colors)
  }
  list(breaks = breaks,colors=colors)
}


#' Create heatmaps from grandR objects
#'
#' Convenience method to compare among more two variables (slot data or analyses results).
#'
#' @param data the grandR object that contains the data to plot
#' @param type Either a mode.slot (see details) or a regex to be matched against analysis names. Can also be a vector
#' @param columns a vector of columns (either condition/cell names if the type is a mode.slot, or names in the output table from an analysis; use \link{Columns}(data,<analysis>) to learn which columns are available); all condition/cell names if NULL
#' @param genes the genes to be included in the plot (default: all genes)
#' @param summarize Should replicates by summarized? Can only be specified if columns is NULL; either a summarization matrix (\link{GetSummarizeMatrix}) or TRUE (in which case \link{GetSummarizeMatrix}(data) is called)
#' @param transform apply a transformation to the selected data; can be a function, or a character (see details)
#' @param cluster.genes should genes be clustered?
#' @param cluster.columns should samples (or cells) be clustered?
#' @param label.genes should genes be labeled?
#' @param xlab The names to show at the x axis (only works if type is a single slot)
#' @param breaks vector of color breaks; can be NULL (see details)
#' @param colors an RColorBrewer palette name; can be NULL (see details)
#' @param title the title for the plot; can be NULL
#' @param return.matrix if TRUE, return a list containing the data matrix and the heatmap instead of the heatmap alone
#' @param na.to convert NA values in the matrix to this value immediately before computing the heatmap
#' @param ... additional parameters forwarded to \link[ComplexHeatmap]{Heatmap}
#'
#' @details This is just a convenience function which
#' \enumerate{
#' \item{Calls \link{GetTable} with the parameter \code{type,columns,summarize,genes}}
#' \item{Transforms the returned table using the \code{transform} parameter}
#' \item{Determines reasonable colors using \code{breaks} and \code{colors}}
#' \item{and then calls ComplexHeatmap::Heatmap}
#' }
#'
#' @details \code{type} and \code{columns} can refer to values from data slots values from analyses (and can be mixed).
#' If there are types from both data and analyses, columns must be NULL.
#' Otherwise columns must either be condition/cell names (if type refers to one or several data slots), or regular expressions
#' to match against the names in the analysis tables.
#'
#' @details Columns definitions for data slots can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @details To refer to data slots, the mode.slot syntax can be used: Each name is either a data slot, or one of (new,old,total)
#' followed by a dot followed by a slot. For new or old, the data slot value is multiplied by ntr or 1-ntr. This can be used e.g. to obtain the \emph{new counts}.
#'
#' @details The transform parameter either is a function that transforms a matrix (which can conveniently be done using the Transform.XXX functions described next), or
#' a character (which must be the XXX to find such a function). Available data transformations are
#' \itemize{
#'  \item{transform=Transform.Z() or transform="Z": compute z scores for each row (see \link{Transform.Z})}
#'  \item{transform=Transform.VST() or transform="VST": do a variance stabilizing transformation (see \link{Transform.VST})}
#'  \item{transform=Transform.logFC() or transform="logFC": compute log2 fold changes to one or several reference columns; which must be defined via parameters (see \link{Transform.logFC})}
#'  \item{transform=Transform.no() or transform="no": do not transform (see \link{Transform.no})}
#' }
#'
#' @details Reasonable coloring is chosen depending on the value distribution in the matrix. If the values are zero centered (e.g. z scores or most often log fold changes), then
#' by default the 50% and 95% quantiles of all positive and all negative values are determined. Let q95 be the 95% quantile with the larger absolute value, and q50 likewise the 50%
#' quantile with the larger value. The breaks are -q90,q50,0,q50,q90, and, by default, the red to blue "RdBu" palette from RColorBrewer is taken. If the values are not zero centered,
#' the 5%,25%,50%,75%, and 95% quantiles are used as breaks and the yellow-orange-red ("YlOrRd") palette is taken. Breaks can also be specified (as absolute values).
#'
#' @details xlab can be given as a character vector or an expression that evaluates into a character vector.
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables.
#'
#' @return a ComplexHeatmap object
#'
#' @seealso \link{GetTable},\link[ComplexHeatmap]{Heatmap}
#'
#' @export
#'
#' @concept globalplot
PlotHeatmap=function(data,
                     type=DefaultSlot(data),
                     columns=NULL,
                     genes=NULL,
                     summarize=NULL,
                     transform="Z",
                     cluster.genes=TRUE,
                     cluster.columns=FALSE,
                     label.genes=NULL,
                     xlab=NULL,
                     breaks=NULL,
                     colors=NULL,
                     title=NULL,return.matrix=FALSE,
                     na.to=NA,...) {

  checkPackages(c("ComplexHeatmap","circlize"))


  mode.slot=check.mode.slot(data,type)
  if (any(mode.slot)) {
    columns=substitute(columns)
    columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
    columns=Columns(data,columns,reorder=TRUE)

    if (length(type)==1) {
      xlab=substitute(xlab)
      xlab=if (is.null(xlab)) NULL else rep(eval(xlab,Coldata(data),parent.frame()),length.out=length(columns))
    }
  }

  mat=as.matrix(GetTable(data,type=type,genes = genes,columns=columns,summarize = summarize,ntr.na = FALSE,reorder.columns=TRUE))
  if (is.character(transform)) transform=Transform(transform)
  mat=transform(mat)


  name=attr(mat,"label")
  col=make.continuous.colors(mat,colors = colors,breaks=breaks)
  col=circlize::colorRamp2(breaks=col$breaks,colors=col$colors)

  if (length(xlab)==ncol(mat)) colnames(mat)=xlab
  if (is.null(label.genes)) label.genes=nrow(mat)<=50

  mat[is.na(mat)]=na.to
  hm=ComplexHeatmap::Heatmap(mat,name=name,
                          cluster_rows = cluster.genes,
                          cluster_columns = cluster.columns,
                          show_row_names = label.genes,
                          col = col,
                          column_title=if (is.null(title)) "" else title,
                          row_title=sprintf("n=%d",nrow(mat)),
                          ...)

  if (return.matrix) return(list(Matrix=mat,Heatmap=hm))
  hm
}

#PlotTestOverlap=function(data,names=NULL,alpha=0.05,type=c("venn","euler")) {
#  # R CMD check guard for non-standard evaluation
#  name <- NULL#

#  mat=GetAnalysisTable(data,gene.info=FALSE,genes=names,columns='^Q$')
#	df=setNames(as.data.frame(mat<alpha & !is.na(mat)),gsub(".Q$","",names(mat)))
#	pl=switch(type[1],euler=eulerr::euler(df),venn=eulerr::venn(df))
#	plot(pl,main=name)
#}

#' Formatting function for correlations
#'
#' Returns a function that takes x and y and returns a formatted output to describe the correlation of x and y
#'
#' @param method how to compute correlation coefficients (can be pearson, spearman or kendall)
#' @param n.format format string for the number of data points (see \link{sprintf}); can be NULL (don't output the number of data points)
#' @param coeff.format format string for the correlation coefficient (see \link{sprintf}); can be NULL (don't output the correlation coefficient)
#' @param p.format  format string for the P value (see \link{sprintf}); can be NULL (don't output the P value)
#' @param slope.format format string for the slope (see \link{sprintf}); can be NULL (don't output the slope)
#' @param rmsd.format format string for the root mean square deviation (see \link{sprintf}); can be NULL (don't output the rmsd)
#' @param min.obs minimum number of observations (no output outerwise)
#'
#' @details Use this for the \code{correlation} parameter of \link{PlotScatter}
#'
#' @details The slope is computed via a principal component analysis and *not* by linear regression
#'
#' @return a function
#' @export
#'
#' @examples
#'
#' set.seed(42)
#' data <- data.frame(u=runif(500))  # generate some correlated data
#' data$x <- rnorm(500,mean=data$u)
#' data$y <- rnorm(500,mean=data$u)
#'
#' fun <- FormatCorrelation()
#' fun(data$x,data$y)
#'
#' fun <- FormatCorrelation(method="spearman",p.format="%.4g")
#' fun(data$x,data$y)
#'
#' @concept globalplot
FormatCorrelation=function(method="pearson",n.format=NULL,coeff.format="%.2f",p.format="%.2g",slope.format=NULL,rmsd.format=NULL,min.obs=5) {
  function(x,y) {
    if (length(x)!=length(y)) stop("Cannot compute correlation, unequal lengths!")
    use=is.finite(x)&is.finite(y)
    if (sum(use)<length(x)) warning(sprintf("Removed %d/%d non finite values while computing correlation!",sum(!use),length(x)))
    x=x[use]
    y=y[use]
    if (length(x)<min.obs) return(NULL)
    cc=cor.test(x,y,method=method)
    p.name=switch(method,pearson="R",spearman="\U03C1",kendall="\U03C4")
    formatted.n=if (!is.null(n.format)) sprintf(sprintf("n=%s",n.format),length(x))
    formatted.p=if (!is.null(p.format)) sprintf(sprintf("p%s%s",if (cc$p.value<2.2e-16) "<" else "=",p.format),if (cc$p.value<2.2e-16) 2.2e-16 else cc$p.value)
    formatted.coeff=if (!is.null(coeff.format)) sprintf(sprintf("%s=%s",p.name,coeff.format),cc$estimate)
    formatted.slope=if (!is.null(slope.format)) {
      pca=prcomp(cbind(x,y))$rotation[,1]
      sprintf(sprintf("s=%s",slope.format),pca[2]/pca[1])
    }
    formatted.rmsd=if (!is.null(rmsd.format)) {
      sprintf(sprintf("rmsd=%s",rmsd.format),sqrt(mean((x-y)^2)))
    }
    paste(c(formatted.n,formatted.coeff,formatted.p,formatted.slope,formatted.rmsd),collapse="\n")
  }
}


#' Make a scatter plot
#'
#' Convenience method to compare two variables (slot data or analyses results).
#'
#' @param data the grandR object (can also be a plain data frame)
#' @param x an expression to compute the x value or a character corresponding to a sample (or cell) name or a fully qualified analysis result name (see details)
#' @param y an expression to compute the y value or a character corresponding to a sample (or cell) name or a fully qualified analysis result name (see details)
#' @param analysis the name of an analysis table (can be NULL; see details)
#' @param mode.slot the mode.slot (only relevant if data is a dense grandR object and analysis=NULL)
#' @param xcol a character corresponding to a sample (or cell) name or a fully qualified analysis result name (see details)
#' @param ycol a character corresponding to a sample (or cell) name or a fully qualified analysis result name (see details)
#' @param xlab the label for x (can be NULL, then the x parameter is used)
#' @param ylab the label for y (can be NULL, then the y parameter is used)
#' @param log if TRUE, use log scales for x and y axis
#' @param log.x if TRUE, use log scale for the x axis
#' @param log.y if TRUE, use log scale for the y axis
#' @param axis if FALSE, don't show x and y axes
#' @param axis.x if FALSE, don't show the x axis
#' @param axis.y if FALSE, don't show the y axis
#' @param remove.outlier configure how outliers are selected (is the coef parameter to \link[grDevices]{boxplot.stats}); can be FALSE, in which case no points are considered outliers (see details)
#' @param show.outlier if TRUE, show outlier as gray points at the border of the plotting plane
#' @param lim define the both x and y axis limits (vector of length 2 defining the lower and upper bound, respectively)
#' @param xlim define the x axis limits (vector of length 2 defining the lower and upper bound, respectively)
#' @param ylim define the y axis limits (vector of length 2 defining the lower and upper bound, respectively)
#' @param size the point size to use
#' @param cross add horizontal and vertical lines through the origin?
#' @param diag if TRUE, add main diagonal; if numeric vector, add these diagonals
#' @param filter restrict to these rows; is evaluated for the data frame, and should result in a logical vector
#' @param genes restrict to these genes; can be either numeric indices, gene names, gene symbols or a logical vector
#' @param highlight highlight these genes; can be either numeric indices, gene names, gene symbols or a logical vector (see details)
#' @param label label these genes; can be either numeric indices, gene names, gene symbols or a logical vector (see details)
#' @param label.repel force to repel labels from points and each other (increase if labels overlap)
#' @param facet an expression (evaluated in the same environment as x and y); for each unique value a panel (facet) is created; can be NULL
#' @param color either NULL (use point density colors), or a name of the \link{GeneInfo} table (use scale_color_xxx to define colors), or a color for all points
#' @param colorpalette either NULL (use default colors), or a palette name from color brewer or viridis
#' @param colorbreaks either NULL (use default algorithm of using quantiles of the values), or "minmax" for 5 breaks in between the minimum and maximum of the values, or the actual color breaks to distribute the colors from the palette
#' @param color.label the label for the color legend
#' @param density.margin for density colors, one of 'n','x' or 'y'; should the density be computed along both axes ('n'), or along 'x' or 'y' axis only
#' @param density.n how many bins to use for density calculation (see \link[MASS]{kde2d})
#' @param rasterize use ggrastr to rasterize points? (can be NULL, see details)
#' @param correlation a function to format correlation statistics to be annotated (see details)
#' @param correlation.x x coordinate to put the correlation annotation in the plot (see details)
#' @param correlation.y y coordinate to put the correlation annotation in the plot (see details)
#' @param correlation.hjust x adjustment to put the correlation annotation in the plot (see details)
#' @param correlation.vjust y adjustment to put the correlation annotation in the plot (see details)
#' @param layers.below list of ggplot geoms to add before adding the layer containing the points
#'
#' @details Both the x and y parameter are either expressions or names. Names are either sample (or cell, in case of single cell experiments) names or
#' fully qualified analysis results (analysis name followed by a dot and the analysis result table column). If the analysis parameter is given, the analysis
#' name must be omitted from x and y. These names can be used within expressions using non-standard evaluation.
#' Defining by names only works with character literals like "kinetics.Synthesis", but if you give an expression (e.g. a variable name that contains a character),
#' the situation is more complicated, since PlotScatter will try to evaluate this for defining the values, not the name of the column. If the expression evaluates
#' into a single character string that is equal to a name (see above!), PlotScatter knows what to do. For more complicated situations that cannot be resolved by this,
#' you can use the xcol and ycol parameters instead of the x and y parameters!
#'
#' @details By default the limits of x and y axis are chosen after removing outliers (using the same algorithm used for \link{boxplot}). Thus, larger numbers filter
#' less stringently. remove.outlier can also be set to FALSE (no outlier filtering). If xlim or ylim are set, this overrides outlier filtering. Points outside of the limits
#' (i.e. outliers or points outside of xlim or ylim) are set to infinity (such that they are shown at the border of the plot in gray)
#'
#' @details By default, all genes are shown. This can be restricted using the \code{genes} parameter (see \link{ToIndex}). It is also possible to highlight a subset of the genes
#' using \code{highlight}. This parameter either describes a subset of the genes (either numeric indices, gene names, gene symbols or a logical vector), in which case these genes
#' are plotted in red and with larger points size, or it can be a list of such vectors. The names of this list must be valid colors. Genes can also be labeled (make sure that this
#' is really only a small subset of the genes).
#'
#' @details When rendering to vector based devices (such as svg or pds), a genome-wide scatterplot often is painfully big (and rendering therefore slow). The \code{rasterize}
#' parameter can be used to automatically rasterize the points only (via the ggrastr package). If this parameter is NULL, ggrastr is used if more than 1000 points are plotted!
#'
#' @details Often scatter plots show that x and y coordinates are correlated. Correlations can be annotated using the \link{FormatCorrelation} function. Most often you will use
#' \code{PlotScatter(data,x,y,correlation=FormatCorrelation())}. To use a different correlation measure, other formats for correlation coefficient and P values or omit one of these
#' statistics, parametrize \code{FormatCorrelation}. Use correlation.x and correlation.y to place the annotation in the plot, and correlation.hjust/correlation.vjust to align the
#' annotation at the given x,y coordinates. Infinite values for correlation.x/correlation.y will put the annotation at the border of the plot.
#'
#' @return a ggplot object with the data frame used as the df attribute
#' @export
#'
#' @concept globalplot
PlotScatter=function(data,
                     x=NULL, y=NULL, analysis=NULL,mode.slot=NULL,xcol=NULL,ycol=NULL, xlab=NULL, ylab=NULL,
                     log=FALSE, log.x=log, log.y=log,
                     axis=TRUE, axis.x=axis, axis.y=axis,
                     remove.outlier=1.5, show.outlier=TRUE,lim=NULL,xlim=lim, ylim=lim,
                     size=0.3,
                     cross=NULL,diag=NULL,
                     filter=NULL,
                     genes=NULL,highlight=NULL, label=NULL, label.repel=1,
                     facet=NULL,
                     color=NULL, colorpalette=NULL, colorbreaks=NULL, color.label=NULL,
                     density.margin = 'n', density.n = 100,
                     rasterize=NULL,
                     correlation=NULL,correlation.x=-Inf,correlation.y=Inf,correlation.hjust=0.5,correlation.vjust=0.5,
                     layers.below=NULL) {
  if (is.grandR(data)) {
    if (!is.null(analysis)) {
      df=cbind(GetAnalysisTable(data,analyses = analysis,regex=FALSE,prefix.by.analysis = FALSE,gene.info = FALSE),GeneInfo(data))
    }else if (IsSparse(data)) {
      df=GetAnalysisTable(data)
    }else{
      if (is.null(mode.slot)) mode.slot = DefaultSlot(data)
      df=cbind(GetAnalysisTable(data,gene.info = FALSE),GetTable(data,type=mode.slot),GeneInfo(data))
    }
  if (!is.null(genes)) df=df[ToIndex(data,genes),]
  } else {
    df=as.data.frame(data)
  }


  filter=substitute(filter)
  if (!is.null(filter)) {
    filter = eval(filter,df,parent.frame())
    df=df[filter,]
  }

  facet=substitute(facet)
  if (!is.null(facet)) {
    df$facet=eval(facet,df,parent.frame())
  }

  if (!is.data.frame(df)) stop("df must be a data frame (or at least coercable into a data frame)")
  adaptInf=function(df,rx,ry) {
    # workaround to also "brush" infinity points at the border of the plane
    if (log.x) rx=log10(rx)
    rx=c(rx[1]-0.04*(rx[2]-rx[1]),rx[2]+0.04*(rx[2]-rx[1]))
    if (log.x) rx=10^rx
    df$A[is.infinite(df$A) & df$A>0]=rx[2]
    df$A[is.infinite(df$A) & df$A<0]=rx[1]

    if (log.y) ry=log10(ry)
    ry=c(ry[1]-0.04*(ry[2]-ry[1]),ry[2]+0.04*(ry[2]-ry[1]))
    if (log.y) ry=10^ry
    df$B[is.infinite(df$B) & df$B>0]=ry[2]
    df$B[is.infinite(df$B) & df$B<0]=ry[1]

    df
  }


  rn=rownames(df)

  x=substitute(x)
  if (is.null(x) && is.null(xcol))  {
    A=df[[1]]
    x=1
  } else if (is.null(x)) {
    if (!xcol %in% colnames(df)) stop(sprintf("%s is not a column!",xcol))
    A=df[[xcol]]
    x=xcol
  } else if (is.null(xcol)) {
    A=if (is.character(x) || is.numeric(x)) df[[x]] else eval(x,df,parent.frame())
    if (length(A)==1 && is.character(A)) {
      if (A %in% names(df)) A=df[[A]] else stop("Cannot make heads or tails out of the x parameter. Try to use xcol to access a specific column!")
    }
    if (length(A)==1) stop("Cannot make heads or tails out of the x parameter. Try to use xcol to access a specific column!")
  } else {
    stop("You must not specify both x and xcol!")
  }

  y=substitute(y)
  if (is.null(y) && is.null(ycol))  {
    B=df[[2]]
    y=2
  } else if (is.null(y)) {
    if (!ycol %in% colnames(df)) stop(sprintf("%s is not a column!",ycol))
    B=df[[ycol]]
    y=ycol
  } else if (is.null(ycol)) {
    B=if (is.character(y) || is.numeric(y)) df[[y]] else eval(y,df,parent.frame())
    if (length(B)==1 && is.character(B)) {
      if (B %in% names(df)) B=df[[B]] else stop("Cannot make heads or tails out of the y parameter. Try to use ycol to access a specific column!")
    }
    if (length(B)!=nrow(df)) stop("Cannot make heads or tails out of the y parameter. Try to use xcol to access a specific column!")
  } else {
    stop("You must not specify both y and ycol!")
  }

  if (is.null(xlab)) { if (is.character(x)) xlab=x else if (is.numeric(x)) xlab=names(df)[x] else xlab=deparse(x)}
  if (is.null(ylab)) { if (is.character(y)) ylab=y else if (is.numeric(y)) ylab=names(df)[y] else ylab=deparse(y)}
  if (is.na(xlab[1]) || xlab=="") xlab=NULL
  if (is.na(ylab[1]) || ylab=="") ylab=NULL

  df$A=A
  df$B=B
  rownames(df)=rn


  df$A.trans=if(log.x) log10(df$A) else df$A
  df$B.trans=if(log.y) log10(df$B) else df$B
  df$A.trans.unclipped=df$A.trans
  df$B.trans.unclipped=df$B.trans

  set.coord=remove.outlier!=FALSE || !is.null(xlim) || !is.null(ylim)
  if (set.coord) {
    if (is.null(xlim) && remove.outlier) {
      xlim=grDevices::boxplot.stats(df$A.trans[is.finite(df$A.trans)],coef=remove.outlier)$stats[c(1,5)] #(quantile(df[,1]-xmean,pnorm(c(-2,2)))*1.5)+xmean
      xlim=c(df$A[which(df$A.trans==xlim[1])[1]],df$A[which(df$A.trans==xlim[2])[1]])
    }
    if (is.null(ylim) && remove.outlier) {
      ylim=grDevices::boxplot.stats(df$B.trans[is.finite(df$B.trans)],coef=remove.outlier)$stats[c(1,5)] #(quantile(df[,2]-ymean,pnorm(c(-2,2)))*1.5)+ymean
      ylim=c(df$B[which(df$B.trans==ylim[1])[1]],df$B[which(df$B.trans==ylim[2])[1]])
    }
    if (is.null(xlim)) xlim=range(df$A[is.finite(df$A)])
    if (is.null(ylim)) ylim=range(df$B[is.finite(df$B)])

    clip=function(v,ch,lim,minus) ifelse(!is.finite(v),v,ifelse(ch<lim[1],minus,ifelse(ch>lim[2],Inf,v)))
    df$A.trans=clip(df$A.trans,df$A,xlim,-Inf)
    df$B.trans=clip(df$B.trans,df$B,ylim,-Inf)
    df$A=clip(df$A,df$A,xlim,if (log.x) 0 else -Inf)
    df$B=clip(df$B,df$B,ylim,if (log.y) 0 else -Inf)
    if (!show.outlier) {
      use = is.finite(df$A) & is.finite(df$B)
      df=df[use,]
    }
  }
  else {
    xlim=range(df$A[!is.infinite(df$A)])
    ylim=range(df$B[!is.infinite(df$B)])
  }

  color=substitute(color)
  if (is.null(color)) {
    if (is.null(df$facet)) {
      df$color=density2d(df$A.trans, df$B.trans, n = density.n,margin = density.margin)
    } else {
      df$color=density2d(df$A.trans, df$B.trans, n = density.n,margin = density.margin,facet = df$facet)
    }
    if (is.null(colorpalette)) {
      colorscale=scale_color_viridis_c(name = "Density",guide="none")
    } else {
      col=make.continuous.colors(values=df$color,colors = colorpalette, breaks=colorbreaks)
      colorscale = scale_color_gradientn(name="Density",guide="none",colors=col$colors)
    }
#  } else if (length(color)==1 && as.character(color) %in% names(GeneInfo(data))) {
#    df$color=GeneInfo(data,color)[ToIndex(data,genes)]
#    colorscale=scale_color_discrete(deparse(color))
  } else {
    ccol=if (length(color)==1 && as.character(color) %in% names(df)) df[[as.character(color)]] else eval(color,df,parent.frame())
    if (length(ccol)==1 && is.character(ccol) && ccol %in% names(df)) {
      ccol=df[[ccol]]
    }
    df$color=ccol
    if (is.character(df$color)) {
      colorscale=scale_color_identity(guide="none")
    } else if (is.factor(df$color)) {
      colorscale=if (!is.null(colorpalette)) scale_color_manual(color.label,values = colorpalette, guide=guide_legend(override.aes = list(size = 2))) else scale_color_discrete(color.label, guide=guide_legend(override.aes = list(size = 2)))
    } else {
      col=make.continuous.colors(values = df$color,colors=colorpalette, breaks=colorbreaks)
      df$color=pmin(pmax(df$color,min(col$breaks)),max(col$breaks))
      colorscale = scale_color_gradientn(color.label,colors=col$colors,breaks=scales::breaks_pretty(n=5)(df$color),limits=c(min(col$breaks),max(col$breaks)))
    }
  }

  g=ggplot(df,aes(A,B,color=color))+cowplot::theme_cowplot()
  if (!is.null(cross)) g=g+geom_vline(xintercept = 0,linetype=2,color='gray')+geom_hline(yintercept = 0,linetype=2,color='gray')
  if (!is.null(diag)) {
    if (is.logical(diag)) diag=0
      g=g+geom_abline(linetype=2,color='gray',intercept = diag)
  }

  if (!is.null(layers.below)) for (e in layers.below) g=g+e

  if ((is.null(rasterize) && nrow(df)>1000) || rlang::is_true(rasterize)) {
    checkPackages("ggrastr")
    g=g+ggrastr::rasterize(geom_point(size=size))
  } else {
    g=g+geom_point(size=size)
  }

  g=g+xlab(xlab)+ylab(ylab)

  if (!is.null(df$facet)) g=g+facet_wrap(~facet)
  if (!is.null(colorscale)) g=g+colorscale

  if (!is.null(highlight)) {
    if (is.list(highlight)){
      for (col in names(highlight)) {
        g=g+geom_point(data=df[highlight[[col]],],color=col,size=size*3)
      }
    } else {
      g=g+geom_point(data=df[highlight,],color='red',size=size*3)
    }
  }
  if (!is.null(label)) {
    df2=df[if (is.grandR(data)) ToIndex(data,label) else label,]
    df2$label=if (is.character(label)) label else rownames(df2)[label]
    if (checkPackages("ggrepel",error = FALSE,warn = FALSE)) {
      g=g+ggrepel::geom_label_repel(data=df2,mapping=aes(label=label),show.legend = FALSE,max.overlaps = Inf,min.segment.length = 0,force=label.repel)
    } else {
      singleMessage("Install the ggrepel package to have nicer labels!")
      g=g+ggplot2::geom_label(data=df2,mapping=aes(label=label),show.legend = FALSE)
    }
  }
  if (!is.null(correlation)) {
    if (correlation.x==-Inf) correlation.hjust=-0.05
    if (correlation.x==Inf) correlation.hjust=1.05
    if (correlation.y==-Inf) correlation.vjust=-0.05
    if (correlation.y==Inf) correlation.vjust=1.05

    if(log.x && correlation.x==-Inf) correlation.x=0
    if(log.y && correlation.y==-Inf) correlation.y=0

    if (is.null(df$facet)) {
      cor.format=correlation(df$A.trans.unclipped,df$B.trans.unclipped)
      g=g+annotate("text",x=correlation.x,y=correlation.y,label=cor.format,hjust=correlation.hjust,vjust=correlation.vjust)
    } else {
      cor.format = plyr::ddply(df,plyr::.(facet),function(sub) data.frame(label=correlation(sub$A.trans.unclipped,sub$B.trans.unclipped)))
      cor.format$correlation.x=correlation.x
      cor.format$correlation.y=correlation.y
      g=g+geom_text(data=cor.format,aes(x=correlation.x,y=correlation.y,label=label),inherit.aes=FALSE, parse=FALSE, hjust=correlation.hjust,vjust=correlation.vjust)
    }
  }
  if (set.coord) g=g+coord_cartesian(ylim=ylim,xlim=xlim)
  if (log.x) g=g+scale_x_log10()
  if (log.y) g=g+scale_y_log10()

  if (!axis.x) {
    g=g+theme(
      axis.line.x = ggplot2::element_blank(),
      axis.text.x = ggplot2::element_blank(),
      axis.ticks.x = ggplot2::element_blank()
    )
  }

  if (!axis.y) {
    g=g+theme(
      axis.line.y = ggplot2::element_blank(),
      axis.text.y = ggplot2::element_blank(),
      axis.ticks.y = ggplot2::element_blank()
    )
  }

  attr(g, 'df') <- adaptInf(df,xlim,ylim)
  g
}

#PlotToxicityTest=function(data,w4sU,no4sU,ylim=c(-1,1),LFC.fun=PsiLFC,hl.quantile=0.8) {
#	w=GetData(data,"count",conditions=w4sU,table=T)[,1]
#	n=if (is.numeric(no4sU)) no4sU[data$gene.info$Gene] else GetData(data,"count",conditions=no4sU,table=T)[,1]
#	ntr=GetData(data,"ntr",conditions=w4sU,table=T)[,1]
#	use=!is.na(w+n+ntr)
#	w=w[use]
#	n=n[use]
#	ntr=ntr[use]
#
#	phl=comp.hl(ntr)
#	df=data.frame(lfc=LFC.fun(w,n),PHL=phl)[ntr<1,]
#	df=df[df$PHL<quantile(df$PHL[is.finite(df$PHL)],hl.quantile),]
#	ggplot(df,aes(PHL,lfc,color=density2d(PHL, lfc, n = 100)))+
#			scale_color_viridis_c(name = "Density",guide='none')+
#			geom_point(alpha=1)+
#			geom_hline(yintercept=0)+
##			geom_smooth(method="loess")+
#			xlab("RNA half-life")+ylab("log FC 4sU/no4sU")+
#			scale_x_continuous(breaks=c())+
#			coord_cartesian(ylim=ylim)
#}



PlotExpressionTest=function(data,w4sU,no4sU,ylim=c(-1,1),LFC.fun=lfc::PsiLFC,hl.quantile=0.8) {
  # R CMD check guard for non-standard evaluation
  M <- lfc <- NULL

  w=GetTable(data,type="count",columns=w4sU)[,1]
	nn=no4sU
	n=if (is.numeric(no4sU)) no4sU[data$gene.info$Gene] else GetTable(data,type="count",columns=nn)[,1]
	use=!is.na(w+n)
	w=w[use]
	n=n[use]

	df=data.frame(lfc=LFC.fun(w,n),M=(log10(w+1)+log10(n+1))/2)
	ggplot(df,aes(M,lfc,color=density2d(M, lfc, n = 100)))+
	  cowplot::theme_cowplot()+
	  scale_color_viridis_c(name = "Density",guide='none')+
			geom_point(alpha=1)+
			geom_hline(yintercept=0)+
#			geom_smooth(method="loess")+
			xlab("Mean expression")+ylab("log FC 4sU/no4sU")+
			coord_cartesian(ylim=ylim)
}

#' Convenience function to make the same type of plot for multple analyses.
#' @param data the grandR object that contains the data to be plotted
#' @param plot.fun the plottinf function to apply
#' @param analyses the analyses to plot (default: all)
#' @param add additional ggplot (e.g., geoms) objects to add
#' @param ... passed further to plot.fun
#' @return ggplot objects
#' @export
#' @concept globalplot
PlotAnalyses=function(data,plot.fun,analyses=Analyses(data),add=NULL,...) {
  lapply(analyses,function(analysis) {
    re=plot.fun(data,analysis=analysis,...)
    if (!is.null(add)) for (e in if (is.list(add)) add else list(add)) re=re+e
    re
  })
}

#' Make a Vulcano plot
#'
#' Plot log2 fold changes against -log10 multiple testing adjusted P values
#'
#' @param data the grandR object that contains the data to be plotted
#' @param analysis the analysis to plot (default: first analysis)
#' @param p.cutoff p-value cutoff (default: 0.05)
#' @param lfc.cutoff log fold change cutoff (default: 1)
#' @param annotate.numbers if TRUE, label the number of genes
#' @param ... further parameters passed to \link{PlotScatter}
#' @return a ggplot object
#' @export
#' @concept globalplot
VulcanoPlot=function(data,analysis=Analyses(data)[1],p.cutoff=0.05,lfc.cutoff=1,
                     annotate.numbers=TRUE,...) {
  # R CMD check guard for non-standard evaluation
  Q <- NULL

  df=GetAnalysisTable(data,analyses=analysis,regex=FALSE,columns=c("LFC|Q"),gene.info = FALSE)
  if (is.numeric(analysis)) analysis=Analyses(data)[analysis]
  names(df)=gsub(".*.Q","Q",gsub(".*.LFC","LFC",names(df)))
  g=PlotScatter(df,x=LFC,y=-log10(Q),remove.outlier = FALSE,...)+
    xlab(bquote(log[2]~FC))+
    ylab(bquote("-"~log[10]~FDR))+
    geom_hline(yintercept=-log10(p.cutoff),linetype=2)+
    geom_vline(xintercept=if (lfc.cutoff==0) 0 else c(-lfc.cutoff,lfc.cutoff),linetype=2)+
    ggtitle(analysis)

  if (annotate.numbers) {
    if (lfc.cutoff!=0) {
      n=table(cut(df$LFC,breaks=c(-Inf,-lfc.cutoff,lfc.cutoff,Inf)),factor(df$Q>p.cutoff,levels=c("FALSE","TRUE")))
      g=g+annotate("label",x=c(-Inf,0,Inf,-Inf,0,Inf),y=c(Inf,Inf,Inf,-Inf,-Inf,-Inf),label=paste0("n=",as.numeric(n)),hjust=c(-0.1,0.5,1.1,-0.1,0.5,1.1),vjust=c(1.1,1.1,1.1,-0.1,-0.1,-0.1))
    } else {
      n=table(cut(df$LFC,breaks=c(-Inf,lfc.cutoff,Inf)),factor(df$Q>p.cutoff,levels=c("FALSE","TRUE")))
      g=g+annotate("label",x=c(-Inf,Inf,-Inf,Inf),y=c(Inf,Inf,-Inf,-Inf),label=paste0("n=",as.numeric(n)),hjust=c(-0.1,1.1,-0.1,1.1),vjust=c(1.1,1.1,-0.1,-0.1))
    }
  }
  g
}


#' Make an MA plot
#'
#' Plot average expression vs. log2 fold changes
#'
#' @param data the grandR object that contains the data to be plotted
#' @param analysis the analysis to plot (default: first analysis)
#' @param p.cutoff p-value cutoff (default: 0.05)
#' @param lfc.cutoff log fold change cutoff (default: 1)
#' @param annotate.numbers if TRUE, label the number of genes
#' @param ... further parameters passed to \link{PlotScatter}
#' @return a ggplot object
#' @export
#' @concept globalplot
MAPlot=function(data,analysis=Analyses(data)[1],p.cutoff=0.05,lfc.cutoff=1,
                     annotate.numbers=TRUE,...) {
  # R CMD check guard for non-standard evaluation
  M <- Q <- NULL

  df=GetAnalysisTable(data,analyses=analysis,regex=FALSE,columns=c("M|LFC|Q"),gene.info = FALSE)
  if (is.numeric(analysis)) analysis=Analyses(data)[analysis]
  names(df)=gsub(".*.Q","Q",gsub(".*.LFC","LFC",gsub(".*.M","M",names(df))))
  if (is.null(df$Q)) df$Q=1
  df$Q[is.na(df$Q)]=1
  g=PlotScatter(df,x=M+1,y=LFC,color=ifelse(Q<p.cutoff,'black','gray50'),log.x=TRUE,remove.outlier = FALSE,...)+
    xlab("Total expression")+
    ylab(bquote(log[2]~FC))+
    geom_hline(yintercept=if (lfc.cutoff==0) 0 else c(-lfc.cutoff,lfc.cutoff),linetype=2)+
    ggtitle(analysis)

  if (annotate.numbers) {
    n=c(sum(df$LFC>lfc.cutoff & df$Q<p.cutoff,na.rm = TRUE),sum(df$LFC< -lfc.cutoff & df$Q<p.cutoff,na.rm = TRUE))
    g=g+annotate("label",x=c(Inf,Inf),y=c(Inf,-Inf),label=paste0("n=",n),hjust=c(1.1,1.1),vjust=c(1.1,-0.1))
  }

  g
}

#' Plot the distribution of gene types
#' @param data the grandR object to get the data to be plotted from
#' @param mode.slot which mode and slot to use
#' @param relative show percentage values?
#'
#' @return a ggplot object
#'
#' @export
#' @concept globalplot
PlotTypeDistribution=function(data,mode.slot=DefaultSlot(data),relative=FALSE) {
  # R CMD check guard for non-standard evaluation
  value <- Type <- NULL

  df=GetTable(data,type=mode.slot)
	df=sapply(levels(data$gene.info$Type),function(type) colSums(df[ data$gene.info$Type==type,]))
	df=df[,colSums(df)>0]
	if (relative) {
		df=df/rowSums(df)*100
		mode.slot=sprintf("%s [%%]",mode.slot)
	}
	df=reshape2::melt(df,varnames=c("Condition","Type"))
	ggplot(df,aes(Condition,value,fill=Type))+cowplot::theme_cowplot()+
	  geom_bar(stat="Identity")+scale_fill_brewer(palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ylab(mode.slot)+xlab(NULL)
}


setup.default.aes=function(data,aest) {
  # R CMD check guard for non-standard evaluation
  Replicate <- NULL

  if (is.null(aest)) aest=aes()
  if (!is.null(Condition(data)) && !any(c("color","colour") %in% names(aest))) aest=utils::modifyList(ggplot2::aes(color=Condition),aest)
  if (!is.null(Coldata(data)$Replicate)&& !any(c("shape") %in% names(aest))) aest=utils::modifyList(ggplot2::aes(shape=Replicate),aest)
  aest
}

#' Gene plot comparing old vs new RNA
#'
#' Plot the old vs new RNA values of a gene
#'
#' @param data the grandR object to get the data to be plotted from
#' @param gene the gene to plot
#' @param slot the slot of the grandR object to get the data from
#' @param columns which columns (i.e. samples or cells) to show (see details)
#' @param log show both axes in log scale
#' @param show.CI show confidence intervals; one of TRUE/FALSE (default: FALSE)
#' @param aest parameter to set the visual attributes of the plot
#' @param size the point size used for plotting; overridden if size is defined via aest
#'
#' @details The value of the aest parameter must be an \emph{Aesthetic mapping} as generated by \link[ggplot2]{aes}.
#'
#' @details The table used for plotting is the table returned by \link{GetData} with coldata set to TRUE, i.e. you can use all names from the \link{Coldata} table for aest.
#'
#' @details By default, aest is set to aes(color=Condition,shape=Replicate) (if both Condition and Replicate are names in the Coldata table).
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a ggplot object.
#'
#' @seealso \link{GetData}, \link{PlotGeneTotalVsNtr},\link{PlotGeneGroupsPoints},\link{PlotGeneGroupsBars}
#'
#' @export
#' @concept geneplot
PlotGeneOldVsNew=function(data,gene,slot=DefaultSlot(data),columns=NULL,log=TRUE,show.CI=FALSE,
                          aest=NULL,size=2) {
  # R CMD check guard for non-standard evaluation
  lower <- upper <- NULL

  aest=setup.default.aes(data,aest)
  if (length(slot)!=1) stop("Provide a single slot name!")
  slot=get.mode.slot(data,slot,allow.ntr = FALSE)$slot
  new=paste0("new.",slot)
  old=paste0("old.",slot)

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  df=GetData(data,genes=gene,mode.slot=c(old,new),columns=columns,by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)
  g=ggplot(df,utils::modifyList(aes(!!sym(old),!!sym(new)),aest))+cowplot::theme_cowplot()
  if (!is.null(aest$size)) g=g+geom_point() else g=g+geom_point(size=size)
  if (log) {
    g=g+scale_x_log10()+
      scale_y_log10()
  } else {
    g=g+scale_x_continuous()+
      scale_y_continuous()
  }
  g=g+xlab(paste0("Old RNA (",slot,")"))
  g=g+ylab(paste0("New RNA (",slot,")"))

  if (show.CI) {
    if (!all(c("lower","upper") %in% Slots(data))) stop("Compute lower and upper slots first! (ComputeNtrCI)")
    df=cbind(df,GetData(data,genes=gene,mode.slot=c("lower","upper",slot),by.rows=FALSE,coldata=T,ntr.na = FALSE)[,c("lower","upper",slot)])
    g=g+geom_errorbar(data=df,mapping=aes(ymin=lower*!!sym(slot),ymax=upper*!!sym(slot)))
    g=g+geom_errorbarh(data=df,mapping=aes(xmin=(1-upper)*!!sym(slot),xmax=(1-lower)*!!sym(slot)))
  }
  g
}

#' Gene plot comparing total RNA vs the NTR
#'
#' Plot the total RNA expression vs the new-to-total RNA ratio for a gene
#'
#' @param data the grandR object to get the data to be plotted from
#' @param gene the gene to plot
#' @param slot the slot of the grandR object to get the data from
#' @param columns which columns (i.e. samples or cells) to show (see details)
#' @param log show the x axis (total RNA) in log scale
#' @param show.CI show confidence intervals; one of TRUE/FALSE (default: FALSE)
#' @param aest parameter to set the visual attributes of the plot
#' @param size the point size used for plotting; overridden if size is defined via aest
#'
#' @details The value of the aest parameter must be an \emph{Aesthetic mapping} as generated by \link[ggplot2]{aes}.
#'
#' @details The table used for plotting is the table returned by \link{GetData} with coldata set to TRUE, i.e. you can use all names from the \link{Coldata} table for aest.
#'
#' @details By default, aest is set to aes(color=Condition,shape=Replicate) (if both Condition and Replicate are names in the Coldata table).
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a ggplot object.
#'
#' @seealso \link{GetData}, \link{PlotGeneOldVsNew},\link{PlotGeneGroupsPoints},\link{PlotGeneGroupsBars}
#'
#' @export
#' @concept geneplot
PlotGeneTotalVsNtr=function(data,gene,slot=DefaultSlot(data),columns=NULL,log=TRUE,show.CI=FALSE,
                            aest=NULL,size=2) {
  # R CMD check guard for non-standard evaluation
  lower <- upper <- ntr <- NULL

  aest=setup.default.aes(data,aest)
  if (length(slot)!=1) stop("Provide a single slot name!")
  slot=get.mode.slot(data,slot,allow.ntr = FALSE)$slot

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  df=GetData(data,genes=gene,mode.slot=c("ntr",slot),columns=columns,by.rows=FALSE,coldata=T,ntr.na = FALSE)
  g=ggplot(df,utils::modifyList(aes(!!sym(slot),ntr),aest))+cowplot::theme_cowplot()
  if (!is.null(aest$size)) g=g+geom_point() else g=g+geom_point(size=size)
  if (log) {
    g=g+scale_x_log10()
  } else {
    g=g+scale_x_continuous()
  }
  g=g+scale_y_continuous("NTR")
  g=g+xlab(paste0("Total RNA (",slot,")"))

  if (show.CI) {
    if (!all(c("lower","upper") %in% Slots(data))) stop("Compute lower and upper slots first! (ComputeNtrCI)")
    df=cbind(df,GetData(data,genes=gene,mode.slot=c("lower","upper"),by.rows=FALSE,coldata=T,ntr.na = FALSE)[,c("lower","upper")])
    g=g+geom_errorbar(data=df,mapping=aes(ymin=lower,ymax=upper))
  }
  g
}

#' Plot gene groups as points
#'
#' Plot either old, new or total RNA of a gene in a row, per condition.
#'
#' @param data the grandR object to get the data to be plotted from
#' @param gene the gene to plot
#' @param group how to group the genes (default: Condition)
#' @param mode.slot the  mode.slot of the grandR object to get the data from
#' @param columns which columns (i.e. samples or cells) to show (see details)
#' @param log show the y axis in log scale
#' @param show.CI show confidence intervals; one of TRUE/FALSE (default: FALSE)
#' @param aest parameter to set the visual attributes of the plot
#' @param size the point size used for plotting; overridden if size is defined via aest
#' @param transform function that is called on the data frame directly before plotting (can be NULL)
#'
#' @details The value of the aest parameter must be an \emph{Aesthetic mapping} as generated by \link[ggplot2]{aes}.
#'
#' @details To refer to data slots, the mode.slot syntax can be used: Each name is either a data slot, or one of (new,old,total) followed by a
#' dot followed by a slot. For new or old, the data slot value is multiplied by ntr or 1-ntr. This can be used e.g. to obtain the \emph{new counts}.
#'
#' @details The table used for plotting is the table returned by \link{GetData} with coldata set to TRUE, i.e. you can use all names from the \link{Coldata} table for aest.
#'
#' @details By default, aest is set to aes(color=Condition,shape=Replicate) (if both Condition and Replicate are names in the Coldata table).
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a ggplot object.
#'
#' @seealso \link{GetData}, \link{PlotGeneTotalVsNtr},\link{PlotGeneOldVsNew},\link{PlotGeneGroupsBars}
#'
#' @export
#' @concept geneplot
PlotGeneGroupsPoints=function(data,gene,group="Condition",mode.slot=DefaultSlot(data),
                              columns=NULL,
                              log=TRUE,
                              show.CI=FALSE,
                              aest=NULL,size=2,transform=NULL) {
  # R CMD check guard for non-standard evaluation
  lower <- upper <- value <- NULL

  aest=setup.default.aes(data,aest)

  if (length(group)!=1 && !group %in% names(Coldata(data))) stop("Group must be a name in the Coldata table!")

  if (length(mode.slot)!=1) stop("Provide a single slot name!")
  mode.slot=get.mode.slot(data,mode.slot)
  slot=mode.slot$slot
  mode=mode.slot$mode

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  if (slot=="ntr") {
    df=GetData(data,genes=gene,mode.slot="ntr",columns=columns,by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)
    df$ntr=df$Value
    df$value=df[[slot]]
    log=FALSE
  } else {
    df=GetData(data,genes=gene,mode.slot=c(slot,"ntr"),columns=columns,by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)
    df$value=switch(mode[1],total=df[[slot]],new=df[[slot]]*df[["ntr"]],old=df[[slot]]*(1-df[["ntr"]]),stop(paste0(mode," unknown!")))
  }

  if (!is.null(transform)) df=transform(df)

  g=ggplot(df,utils::modifyList(aes(!!sym(group),value),aest))+cowplot::theme_cowplot()
  if (!is.null(aest$size)) g=g+geom_point(position=if(show.CI) position_dodge(width=0.4) else "identity") else g=g+geom_point(size=size,position=if(show.CI) position_dodge(width=0.4) else "identity")
  g=g+xlab(NULL)+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  if (log) g=g+scale_y_log10()

  if (slot=="ntr")g=g+ylab("NTR") else g=g+ylab(paste0(toupper(substr(mode,1,1)),substr(mode,2,nchar(mode))," RNA (",slot,")"))

  if (show.CI) {
    if (!all(c("lower","upper") %in% Slots(data))) stop("Compute lower and upper slots first! (ComputeNtrCI)")
    df=cbind(df,GetData(data,genes=gene,mode.slot=c("lower","upper"),by.rows=FALSE,coldata=FALSE,ntr.na = FALSE)[,c("lower","upper")])
    if (slot=="ntr") {
      g=g+geom_errorbar(data=df,mapping=aes(ymin=lower,ymax=upper),width=0,position=position_dodge(width=0.4))
    } else {
      g=switch(mode[1],
               total=g,
               new=g+geom_errorbar(data=df,mapping=aes(ymin=lower*!!sym(slot),ymax=upper*!!sym(slot)),width=0,position=position_dodge(width=0.4)),
               old=g+geom_errorbar(data=df,mapping=aes(ymin=(1-upper)*!!sym(slot),ymax=(1-lower)*!!sym(slot)),width=0,position=position_dodge(width=0.4)),
               stop(paste0(mode," unknown!")))
    }
  }
  g
}

#' Plot gene values as bars
#'
#' Plot old and new RNA of a gene in a row.
#'
#' @param data the grandR object to get the data to be plotted from
#' @param gene the gene to plot
#' @param slot the slot of the grandR object to get the data from
#' @param columns which columns (i.e. samples or cells) to show (see details)
#' @param show.CI show confidence intervals; one of TRUE/FALSE (default: FALSE)
#' @param xlab The names to show at the x axis;
#' @param transform function that is called on the data frame directly before plotting (can be NULL)
#'
#' @details xlab can be given as a character vector or an expression that evaluates into a character vector.
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently it.
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a ggplot object.
#'
#' @seealso \link{GetData}, \link{PlotGeneTotalVsNtr},\link{PlotGeneOldVsNew},\link{PlotGeneGroupsBars}
#'
#' @export
#' @concept geneplot
PlotGeneGroupsBars=function(data,gene,slot=DefaultSlot(data),columns=NULL,show.CI=FALSE,xlab=NULL,transform=NULL) {
  # R CMD check guard for non-standard evaluation
  Name <- Value <- mode.slot <- upper <- lower <- NULL

  if (length(slot)!=1) stop("Provide a single slot name!")
  slot=get.mode.slot(data,slot,allow.ntr = FALSE)$slot

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  df=GetData(data,genes=gene,mode.slot=paste0(c("new.","old."),slot),columns=columns,by.rows=TRUE,coldata=TRUE,ntr.na = FALSE)

  xlab=substitute(xlab)
  xlab=if (is.null(xlab)) df$Name else eval(xlab,df,parent.frame())
  df$xlab=xlab

  if (!is.null(transform)) df=transform(df)

  g=ggplot(df,aes(Name,Value,fill=mode.slot))+
    cowplot::theme_cowplot()+
    geom_bar(stat="identity",position=position_stack())+
    scale_fill_manual(NULL,values = c('red','gray'),guide="none")+
    xlab(NULL)+
    scale_x_discrete(breaks=df$Name,labels=df$xlab)+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  g=g+ylab(paste0("Total RNA (",slot,")"))
  if (show.CI) {
    if (!all(c("lower","upper") %in% Slots(data))) stop("Compute lower and upper slots first! (ComputeNtrCI)")
    df2=GetData(data,genes=gene,mode.slot=c("lower","upper",slot),by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)
    g=g+geom_errorbar(data=df2,mapping=aes(y=!!sym(slot),fill=NULL,ymin=(1-upper)*!!sym(slot),ymax=(1-lower)*!!sym(slot)),width=0)
  }
  g
}

#' Gene plot for snapshot timecourse data
#'
#' Plot the total RNA expression vs the new-to-total RNA ratio for a gene
#'
#' @param data the grandR object to get the data to be plotted from
#' @param gene the gene to plot
#' @param time the times to show on the x axis (see details)
#' @param mode.slot the mode.slot of the grandR object to get the data from
#' @param columns which columns (i.e. samples or cells) to show (see details)
#' @param average.lines add average lines?
#' @param exact.tics use axis labels directly corresponding to the available temporal values?
#' @param log show the y axis in log scale
#' @param show.CI show confidence intervals; one of TRUE/FALSE (default: FALSE)
#' @param aest parameter to set the visual attributes of the plot
#' @param size the point size used for plotting; overridden if size is defined via aest
#'
#' @details The x axis of this plot will show a temporal dimension. The time parameter defines a name in the \link{Coldata} table containing the temporal values for each sample.
#'
#' @details The value of the aest parameter must be an \emph{Aesthetic mapping} as generated by \link[ggplot2]{aes}.
#'
#' @details The table used for plotting is the table returned by \link{GetData} with coldata set to TRUE, i.e. you can use all names from the \link{Coldata} table for aest.
#'
#' @details By default, aest is set to aes(color=Condition,shape=Replicate) (if both Condition and Replicate are names in the Coldata table).
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a ggplot object.
#'
#' @seealso \link{GetData}, \link{PlotGeneOldVsNew},\link{PlotGeneGroupsPoints},\link{PlotGeneGroupsBars}
#'
#' @export
#' @concept geneplot
PlotGeneSnapshotTimecourse=function(data,gene,time=Design$dur.4sU,
                            mode.slot=DefaultSlot(data),
                            columns=NULL,
                            average.lines=TRUE,
                            exact.tics=TRUE,
                            log=TRUE,
                            show.CI=FALSE,aest=NULL,size=2) {
  # R CMD check guard for non-standard evaluation
  x <- colour <- group <- Value <- ntr <- lower <- upper <- linetype <- NULL

  aest=setup.default.aes(data,aest)
  if (length(slot)!=1) stop("Provide a single slot name!")

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  df=GetData(data,genes=gene,mode.slot=mode.slot,columns=columns,by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)

  aes=utils::modifyList(aes(x=!!sym(time),y=Value),aest)
  if (!is.null(Condition(data))) aes=utils::modifyList(aes,aes(group=Condition))
  breaks=if (exact.tics) sort(unique(df[[time]])) else scales::breaks_extended(5)(df[[time]])

  oslot=mode.slot
  mode.slot=get.mode.slot(data,mode.slot)
  slot=mode.slot$slot
  mode=mode.slot$mode
  if (slot=="ntr") log=FALSE

  g=ggplot(df,mapping=aes)+cowplot::theme_cowplot()
  if (!is.null(aest$size)) g=g+geom_point(position=if(show.CI) position_dodge(width=0.4) else "identity") else g=g+geom_point(size=size,position=if(show.CI) position_dodge(width=0.4) else "identity")
  g=g+scale_x_continuous(NULL,labels = scales::number_format(accuracy = max(0.01,my.precision(breaks)),suffix="h"),breaks=breaks)
  if (slot=="ntr")g=g+ylab("NTR") else g=g+ylab(paste0(toupper(substr(mode,1,1)),substr(mode,2,nchar(mode))," RNA (",slot,")"))
  if (log) g=g+scale_y_log10()
  if (average.lines) {
    # compute average line:
    if (!is.null(Condition(data))) {
      ddf=as.data.frame(lapply(aes,function(col) rlang::eval_tidy(col,data=df)))
      ddf=plyr::ddply(ddf,intersect(names(ddf),c("x","colour","linetype","group")),function(s) c(Value=mean(s$y,na.rm=TRUE)))
      daes=aes(x,Value,group=group)
      if ("colour" %in% names(ddf)) daes=utils::modifyList(daes,aes(color=colour))
      if ("linetype" %in% names(ddf)) daes=utils::modifyList(daes,aes(linetype=linetype))
      g=g+geom_line(data=ddf,mapping=daes,inherit.aes=FALSE)
    } else {
      ddf=as.data.frame(lapply(aes,function(col) rlang::eval_tidy(col,data=df)))
      ddf=plyr::ddply(ddf,plyr::.(x),function(s) c(Value=mean(s$y,na.rm=TRUE)))
      g=g+geom_line(data=ddf,mapping=aes(x,Value),inherit.aes=F)
    }
  }
  if (show.CI) {
    if (!all(c("lower","upper") %in% Slots(data))) stop("Compute lower and upper slots first! (ComputeNtrCI)")
    df2=GetData(data,genes=gene,mode.slot=unique(c("lower","upper",slot,oslot)),by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)
    if (slot=="ntr") {
      g=g+geom_errorbar(data=df2,mapping=aes(y=ntr,ymin=lower,ymax=upper),width=0,position=position_dodge(width=0.4))
    } else {
      g=switch(mode[1],
               total=g,
               new=g+geom_errorbar(data=df2,mapping=aes(y=!!sym(oslot),ymin=lower*!!sym(slot),ymax=upper*!!sym(slot)),width=0,position=position_dodge(width=0.4)),
               old=g+geom_errorbar(data=df2,mapping=aes(y=oslot,ymin=(1-upper)*!!sym(slot),ymax=(1-lower)*!!sym(slot)),width=0,position=position_dodge(width=0.4)),
               stop(paste0(mode," unknown!")))
    }
  }
  g
}

#' Rotate x axis labels
#'
#' Add this to a ggplot object to rotate the x axis labels
#'
#' @param angle the angle by which to rotate
#'
#' @return a ggplot theme object
#' @export
#'
#' @concept helper
RotatateAxisLabels=function(angle=90) {
  theme(axis.text.x = element_text(angle = angle, vjust = if (abs(angle-90)<10) 0.5 else 1, hjust=1))
}
Any scripts or data that you put into this service are public.
grandR documentation built on April 4, 2025, 2:27 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
grandR
Comprehensive Analysis of Nucleotide Conversion Sequencing Data

R/plot.R
In grandR: Comprehensive Analysis of Nucleotide Conversion Sequencing Data

Defines functions PlotScatter FormatCorrelation PlotHeatmap make.continuous.colors Transform.logFC Transform.VST Transform.Z Transform.no Transform PlotPCA density2d

Documented in density2d FormatCorrelation PlotHeatmap PlotPCA PlotScatter Transform.logFC Transform.no Transform.VST Transform.Z

Try the grandR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

grandR Comprehensive Analysis of Nucleotide Conversion Sequencing Data

R/plot.R In grandR: Comprehensive Analysis of Nucleotide Conversion Sequencing Data

Defines functions PlotScatter FormatCorrelation PlotHeatmap make.continuous.colors Transform.logFC Transform.VST Transform.Z Transform.no Transform PlotPCA density2d

Documented in density2d FormatCorrelation PlotHeatmap PlotPCA PlotScatter Transform.logFC Transform.no Transform.VST Transform.Z

Try the grandR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

grandR
Comprehensive Analysis of Nucleotide Conversion Sequencing Data

R/plot.R
In grandR: Comprehensive Analysis of Nucleotide Conversion Sequencing Data