R/plot.R

Defines functions PlotScatter FormatCorrelation PlotHeatmap make.continuous.colors Transform.logFC Transform.VST Transform.Z Transform.no Transform PlotPCA density2d

Documented in density2d FormatCorrelation PlotHeatmap PlotPCA PlotScatter Transform.logFC Transform.no Transform.VST Transform.Z

#' Density estimation in 2d
#'
#' Estimate point densities on a regular grid for.
#'
#' @param x x coordinates
#' @param y y coordinates
#' @param facet factor: estimate for each unique factor; can be NULL
#' @param n size of the grid
#' @param margin one of 'n','x' or 'y'; should the density be computed along both axes ('n'), or along 'x' or 'y' axis only
#'
#' @return a density value for each point
#' @export
#'
#' @concept helper
density2d=function(x, y, facet=NULL, n=100, margin='n') {
  bandwidth.nrd.ex=function (x)
  {
    r <- range(x)
    h <- (r[2L] - r[1L])/1.34
    4 * 1.06 * min(sqrt(var(x)), h) * length(x)^(-1/5)
  }

  if (is.null(facet)) {
        r=rep(NA,length(x))
        use=is.finite(x+y)
        if (sum(use)==0) return(r)
        if (min(x[use])==max(x[use])) {
          x[use]=0:1
        }
        if (min(y[use])==max(y[use])) {
          y[use]=0:1
        }
        bw.x=MASS::bandwidth.nrd(x[use])
        if (bw.x==0) bw.x=bandwidth.nrd.ex(x[use])
        bw.y=MASS::bandwidth.nrd(y[use])
        if (bw.y==0) bw.y=bandwidth.nrd.ex(y[use])
        d=MASS::kde2d(x[use], y[use], h=c(bw.x,bw.y), n=n)
        if (margin=='x') d$z=d$z/apply(d$z,1,max)
        else if (margin=='y') d$z=t(t(d$z)/apply(d$z,2,max))
        else d$z=d$z/max(d$z)
        r[use]=d$z[cbind(findInterval(x[use], d$x),findInterval(y[use], d$y))]
	r=r/max(r,na.rm=T)
        return(r)
    }
    re<-rep(NA,length(x))
    for (f in unique(facet)) re[f==facet]=density2d(x[f==facet],y[f==facet],n=n)
    re
}


#' Make a PCA plot
#' @param data the grandR object that contains the data to plot
#' @param mode.slot the mode and slot of data to plot; slot in the grandr object (eg "count")
#' @param ntop how many genes to use
#' @param aest parameter to set the visual attributes
#' @param x number of principal component to show on the x axis (numeric)
#' @param y number of principal component to show on the y axis (numeric)
#' @param columns which columns (i.e. samples or cells) to perform PCA on (see details)
#' @param do.vst perform a variance stabilizing transformation for count data?
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a PCA plot
#' @export
#'
#' @concept globalplot
PlotPCA=function(data, mode.slot=DefaultSlot(data), ntop=500,aest=NULL,x=1,y=2,columns=NULL,do.vst=TRUE) {

  aest=setup.default.aes(data,aest)

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  mat=as.matrix(GetTable(data,type=mode.slot,columns=columns,ntr.na = FALSE))

	cd=do.call("rbind",lapply(1:length(mode.slot), function(i) cbind(data$coldata,data.frame(type=mode.slot[i]))))


	mat=mat[,columns]
	cd=cd[columns,]

	rm.na=!apply(is.na(mat),2,sum)==nrow(mat)
	mat=mat[,rm.na]
	cd=cd[rm.na,]
	if (do.vst) {
	  checkPackages("DESeq2")
	  mat <- DESeq2::vst(cnt(mat))
	}

	checkPackages("matrixStats")
	rv <- matrixStats::rowVars(mat)

  	select <- order(rv, decreasing=TRUE)[seq_len(min(ntop, length(rv)))]
  	pca <- prcomp(t(mat[select,]))
	percentVar <- pca$sdev^2 / sum( pca$sdev^2 )
	d <- as.data.frame(pca$x)
	names(d)=paste0("PC",1:dim(d)[2])
	d=cbind(d, cd)
	ggplot(d,utils::modifyList(aes(!!sym(paste0("PC",x)),!!sym(paste0("PC",y))),aest))+cowplot::theme_cowplot()+
	  geom_point(size=3)+xlab(paste0("PC",x,": ",round(percentVar[x] * 100),"% variance"))+ylab(paste0("PC",y,": ",round(percentVar[y] * 100),"% variance"))+coord_fixed()
}

Transform=function(name,label=NULL){
  FUN=get(paste0("Transform.",name))()
  function(mat) {
    re=FUN(mat)
    if (!is.null(label)) attr(re,"label")=label
    re
  }
}

#' Transformations for PlotHeatmap
#'
#' Functions to perform transformations on the matrix used for \link{PlotHeatmap}.
#'
#' @param label label that is used for the heatmap legend
#' @param center perform centering when computing Z scores (see \link{scale})
#' @param scale perform scaling when computing Z scores (see \link{scale})
#' @param LFC.fun function to compute log fold changes (default: \link[lfc]{PsiLFC}, other viable option: \link[lfc]{NormLFC})
#' @param columns which columns (i.e. samples or cells) to use as reference when computing log fold changes (see details)
#' @param ... further parameters passed down to LFC.fun
#'
#' @details These functions should be used as transform parameter to \link{PlotHeatmap}. Available data transformations are
#' \itemize{
#'  \item{transform=Transform.Z(): compute z scores for each row; you can omit the usual centering or scaling by setting the respective parameters to false; see \link{scale}}
#'  \item{transform=Transform.VST(): do a variance stabilizing transformation using \link[DESeq2]{vst}}
#'  \item{transform=Transform.logFC(): compute log2 fold changes to one or several reference columns; see below how to define them; fold changes are computed using the lfc package)}
#'  \item{transform=Transform.no(): do not transform}
#' }
#'
#' @details The label to be used in the heatmap legend can be changed by specifying the label parameter.
#'
#' @details For Transform.logFC, columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#'
#' @return A function that transforms a matrix.
#'
#' @export
#'
#' @concept globalplot
Transform.no=function(label=" ") function(m) {re=m; attr(re,"label")=label; re}
#' @rdname Transform.no
#' @export
Transform.Z=function(label="z score",center=TRUE,scale=TRUE) function(m) {re=t(scale(t(m),center=center,scale=scale)); attr(re,"label")=label; re}
#' @rdname Transform.no
#' @export
Transform.VST=function(label="VST") function(m) {
  if (nrow(m)<1000) re = DESeq2::varianceStabilizingTransformation(cnt(m)) else re=DESeq2::vst(cnt(m))
  attr(re,"label")=label
  re
  }
#' @rdname Transform.no
#' @export
Transform.logFC=function(label="log2 FC",LFC.fun=NULL,columns=NULL,...) {
  function(m) {
    if (is.null(columns)) columns=1:ncol(m)
    ref=rowMeans(m[,columns,drop=F])
    if (is.null(LFC.fun)) LFC.fun=lfc::PsiLFC
    re=apply(m,2,function(v) LFC.fun(v,ref,normalizeFun=function(vv) vv))
    attr(re,"label")=label
    re
  }
}


make.continuous.colors=function(values,colors=NULL,breaks=NULL) {
  if (quantile(values,0.25,na.rm=TRUE)<0) {
    quant=c(50,95) #c(seq(0,1,length.out=nq+1)[c(-1,-nq-1)]*100,95)
    if (identical(breaks,"minmax")) {
      ll = max(c(values,-values))
      breaks = seq(-ll,ll,length.out = 5)
    } else if (length(breaks)==1) {
      quant=c(seq(0,1,length.out=breaks+1)[c(-1,-breaks-1)]*100,95)
      breaks=NULL
    }
    if (is.null(breaks)) {
      upper=quantile(values[values>0],quant/100,na.rm=TRUE)
      lower=quantile(-values[values<0],quant/100,na.rm=TRUE)
      breaks=c(-rev(pmax(upper,lower)),0,pmax(upper,lower))
    }
    if (is.null(colors)) colors="RdBu"
  } else {
    quant=c(5,25,50,75,95)
    if (identical(breaks,"minmax")) {
        breaks = seq(min(values),max(values),length.out = 5)
    } else if (length(breaks)==1) {
        quant=c(5,seq(0,1,length.out=breaks)[c(-1,-breaks)]*100,95)
        breaks=NULL
    }
    if (is.null(breaks)) {
      breaks=quantile(values,quant/100,na.rm=TRUE)
    }
    if (is.null(colors)) colors="YlOrRd"
  }

  if (length(colors)==1)  {
    checkPackages(c("RColorBrewer","viridisLite"))
    rev = FALSE
    if (substr(colors,1,3)=='rev') {
      rev = TRUE
      colors = substr(colors,4,nchar(colors))
    }
    if (colors %in% rownames(RColorBrewer::brewer.pal.info)) {
      colors = RColorBrewer::brewer.pal(length(breaks),colors)
    } else  {
      colors = viridisLite::viridis(length(breaks),option = colors)
    }
    if (rev) colors=rev(colors)
  }
  list(breaks = breaks,colors=colors)
}


#' Create heatmaps from grandR objects
#'
#' Convenience method to compare among more two variables (slot data or analyses results).
#'
#' @param data the grandR object that contains the data to plot
#' @param type Either a mode.slot (see details) or a regex to be matched against analysis names. Can also be a vector
#' @param columns a vector of columns (either condition/cell names if the type is a mode.slot, or names in the output table from an analysis; use \link{Columns}(data,<analysis>) to learn which columns are available); all condition/cell names if NULL
#' @param genes the genes to be included in the plot (default: all genes)
#' @param summarize Should replicates by summarized? Can only be specified if columns is NULL; either a summarization matrix (\link{GetSummarizeMatrix}) or TRUE (in which case \link{GetSummarizeMatrix}(data) is called)
#' @param transform apply a transformation to the selected data; can be a function, or a character (see details)
#' @param cluster.genes should genes be clustered?
#' @param cluster.columns should samples (or cells) be clustered?
#' @param label.genes should genes be labeled?
#' @param xlab The names to show at the x axis (only works if type is a single slot)
#' @param breaks vector of color breaks; can be NULL (see details)
#' @param colors an RColorBrewer palette name; can be NULL (see details)
#' @param title the title for the plot; can be NULL
#' @param return.matrix if TRUE, return a list containing the data matrix and the heatmap instead of the heatmap alone
#' @param na.to convert NA values in the matrix to this value immediately before computing the heatmap
#' @param ... additional parameters forwarded to \link[ComplexHeatmap]{Heatmap}
#'
#' @details This is just a convenience function which
#' \enumerate{
#' \item{Calls \link{GetTable} with the parameter \code{type,columns,summarize,genes}}
#' \item{Transforms the returned table using the \code{transform} parameter}
#' \item{Determines reasonable colors using \code{breaks} and \code{colors}}
#' \item{and then calls ComplexHeatmap::Heatmap}
#' }
#'
#' @details \code{type} and \code{columns} can refer to values from data slots values from analyses (and can be mixed).
#' If there are types from both data and analyses, columns must be NULL.
#' Otherwise columns must either be condition/cell names (if type refers to one or several data slots), or regular expressions
#' to match against the names in the analysis tables.
#'
#' @details Columns definitions for data slots can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @details To refer to data slots, the mode.slot syntax can be used: Each name is either a data slot, or one of (new,old,total)
#' followed by a dot followed by a slot. For new or old, the data slot value is multiplied by ntr or 1-ntr. This can be used e.g. to obtain the \emph{new counts}.
#'
#' @details The transform parameter either is a function that transforms a matrix (which can conveniently be done using the Transform.XXX functions described next), or
#' a character (which must be the XXX to find such a function). Available data transformations are
#' \itemize{
#'  \item{transform=Transform.Z() or transform="Z": compute z scores for each row (see \link{Transform.Z})}
#'  \item{transform=Transform.VST() or transform="VST": do a variance stabilizing transformation (see \link{Transform.VST})}
#'  \item{transform=Transform.logFC() or transform="logFC": compute log2 fold changes to one or several reference columns; which must be defined via parameters (see \link{Transform.logFC})}
#'  \item{transform=Transform.no() or transform="no": do not transform (see \link{Transform.no})}
#' }
#'
#' @details Reasonable coloring is chosen depending on the value distribution in the matrix. If the values are zero centered (e.g. z scores or most often log fold changes), then
#' by default the 50% and 95% quantiles of all positive and all negative values are determined. Let q95 be the 95% quantile with the larger absolute value, and q50 likewise the 50%
#' quantile with the larger value. The breaks are -q90,q50,0,q50,q90, and, by default, the red to blue "RdBu" palette from RColorBrewer is taken. If the values are not zero centered,
#' the 5%,25%,50%,75%, and 95% quantiles are used as breaks and the yellow-orange-red ("YlOrRd") palette is taken. Breaks can also be specified (as absolute values).
#'
#' @details xlab can be given as a character vector or an expression that evaluates into a character vector.
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables.
#'
#' @return a ComplexHeatmap object
#'
#' @seealso \link{GetTable},\link[ComplexHeatmap]{Heatmap}
#'
#' @export
#'
#' @concept globalplot
PlotHeatmap=function(data,
                     type=DefaultSlot(data),
                     columns=NULL,
                     genes=NULL,
                     summarize=NULL,
                     transform="Z",
                     cluster.genes=TRUE,
                     cluster.columns=FALSE,
                     label.genes=NULL,
                     xlab=NULL,
                     breaks=NULL,
                     colors=NULL,
                     title=NULL,return.matrix=FALSE,
                     na.to=NA,...) {

  checkPackages(c("ComplexHeatmap","circlize"))


  mode.slot=check.mode.slot(data,type)
  if (any(mode.slot)) {
    columns=substitute(columns)
    columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
    columns=Columns(data,columns,reorder=TRUE)

    if (length(type)==1) {
      xlab=substitute(xlab)
      xlab=if (is.null(xlab)) NULL else rep(eval(xlab,Coldata(data),parent.frame()),length.out=length(columns))
    }
  }

  mat=as.matrix(GetTable(data,type=type,genes = genes,columns=columns,summarize = summarize,ntr.na = FALSE,reorder.columns=TRUE))
  if (is.character(transform)) transform=Transform(transform)
  mat=transform(mat)


  name=attr(mat,"label")
  col=make.continuous.colors(mat,colors = colors,breaks=breaks)
  col=circlize::colorRamp2(breaks=col$breaks,colors=col$colors)

  if (length(xlab)==ncol(mat)) colnames(mat)=xlab
  if (is.null(label.genes)) label.genes=nrow(mat)<=50

  mat[is.na(mat)]=na.to
  hm=ComplexHeatmap::Heatmap(mat,name=name,
                          cluster_rows = cluster.genes,
                          cluster_columns = cluster.columns,
                          show_row_names = label.genes,
                          col = col,
                          column_title=if (is.null(title)) "" else title,
                          row_title=sprintf("n=%d",nrow(mat)),
                          ...)

  if (return.matrix) return(list(Matrix=mat,Heatmap=hm))
  hm
}

#PlotTestOverlap=function(data,names=NULL,alpha=0.05,type=c("venn","euler")) {
#  # R CMD check guard for non-standard evaluation
#  name <- NULL#

#  mat=GetAnalysisTable(data,gene.info=FALSE,genes=names,columns='^Q$')
#	df=setNames(as.data.frame(mat<alpha & !is.na(mat)),gsub(".Q$","",names(mat)))
#	pl=switch(type[1],euler=eulerr::euler(df),venn=eulerr::venn(df))
#	plot(pl,main=name)
#}

#' Formatting function for correlations
#'
#' Returns a function that takes x and y and returns a formatted output to describe the correlation of x and y
#'
#' @param method how to compute correlation coefficients (can be pearson, spearman or kendall)
#' @param n.format format string for the number of data points (see \link{sprintf}); can be NULL (don't output the number of data points)
#' @param coeff.format format string for the correlation coefficient (see \link{sprintf}); can be NULL (don't output the correlation coefficient)
#' @param p.format  format string for the P value (see \link{sprintf}); can be NULL (don't output the P value)
#' @param slope.format format string for the slope (see \link{sprintf}); can be NULL (don't output the slope)
#' @param rmsd.format format string for the root mean square deviation (see \link{sprintf}); can be NULL (don't output the rmsd)
#' @param min.obs minimum number of observations (no output outerwise)
#'
#' @details Use this for the \code{correlation} parameter of \link{PlotScatter}
#'
#' @details The slope is computed via a principal component analysis and *not* by linear regression
#'
#' @return a function
#' @export
#'
#' @examples
#'
#' set.seed(42)
#' data <- data.frame(u=runif(500))  # generate some correlated data
#' data$x <- rnorm(500,mean=data$u)
#' data$y <- rnorm(500,mean=data$u)
#'
#' fun <- FormatCorrelation()
#' fun(data$x,data$y)
#'
#' fun <- FormatCorrelation(method="spearman",p.format="%.4g")
#' fun(data$x,data$y)
#'
#' @concept globalplot
FormatCorrelation=function(method="pearson",n.format=NULL,coeff.format="%.2f",p.format="%.2g",slope.format=NULL,rmsd.format=NULL,min.obs=5) {
  function(x,y) {
    if (length(x)!=length(y)) stop("Cannot compute correlation, unequal lengths!")
    use=is.finite(x)&is.finite(y)
    if (sum(use)<length(x)) warning(sprintf("Removed %d/%d non finite values while computing correlation!",sum(!use),length(x)))
    x=x[use]
    y=y[use]
    if (length(x)<min.obs) return(NULL)
    cc=cor.test(x,y,method=method)
    p.name=switch(method,pearson="R",spearman="\U03C1",kendall="\U03C4")
    formatted.n=if (!is.null(n.format)) sprintf(sprintf("n=%s",n.format),length(x))
    formatted.p=if (!is.null(p.format)) sprintf(sprintf("p%s%s",if (cc$p.value<2.2e-16) "<" else "=",p.format),if (cc$p.value<2.2e-16) 2.2e-16 else cc$p.value)
    formatted.coeff=if (!is.null(coeff.format)) sprintf(sprintf("%s=%s",p.name,coeff.format),cc$estimate)
    formatted.slope=if (!is.null(slope.format)) {
      pca=prcomp(cbind(x,y))$rotation[,1]
      sprintf(sprintf("s=%s",slope.format),pca[2]/pca[1])
    }
    formatted.rmsd=if (!is.null(rmsd.format)) {
      sprintf(sprintf("rmsd=%s",rmsd.format),sqrt(mean((x-y)^2)))
    }
    paste(c(formatted.n,formatted.coeff,formatted.p,formatted.slope,formatted.rmsd),collapse="\n")
  }
}


#' Make a scatter plot
#'
#' Convenience method to compare two variables (slot data or analyses results).
#'
#' @param data the grandR object (can also be a plain data frame)
#' @param x an expression to compute the x value or a character corresponding to a sample (or cell) name or a fully qualified analysis result name (see details)
#' @param y an expression to compute the y value or a character corresponding to a sample (or cell) name or a fully qualified analysis result name (see details)
#' @param analysis the name of an analysis table (can be NULL; see details)
#' @param mode.slot the mode.slot (only relevant if data is a dense grandR object and analysis=NULL)
#' @param xcol a character corresponding to a sample (or cell) name or a fully qualified analysis result name (see details)
#' @param ycol a character corresponding to a sample (or cell) name or a fully qualified analysis result name (see details)
#' @param xlab the label for x (can be NULL, then the x parameter is used)
#' @param ylab the label for y (can be NULL, then the y parameter is used)
#' @param log if TRUE, use log scales for x and y axis
#' @param log.x if TRUE, use log scale for the x axis
#' @param log.y if TRUE, use log scale for the y axis
#' @param axis if FALSE, don't show x and y axes
#' @param axis.x if FALSE, don't show the x axis
#' @param axis.y if FALSE, don't show the y axis
#' @param remove.outlier configure how outliers are selected (is the coef parameter to \link[grDevices]{boxplot.stats}); can be FALSE, in which case no points are considered outliers (see details)
#' @param show.outlier if TRUE, show outlier as gray points at the border of the plotting plane
#' @param lim define the both x and y axis limits (vector of length 2 defining the lower and upper bound, respectively)
#' @param xlim define the x axis limits (vector of length 2 defining the lower and upper bound, respectively)
#' @param ylim define the y axis limits (vector of length 2 defining the lower and upper bound, respectively)
#' @param size the point size to use
#' @param cross add horizontal and vertical lines through the origin?
#' @param diag if TRUE, add main diagonal; if numeric vector, add these diagonals
#' @param filter restrict to these rows; is evaluated for the data frame, and should result in a logical vector
#' @param genes restrict to these genes; can be either numeric indices, gene names, gene symbols or a logical vector
#' @param highlight highlight these genes; can be either numeric indices, gene names, gene symbols or a logical vector (see details)
#' @param label label these genes; can be either numeric indices, gene names, gene symbols or a logical vector (see details)
#' @param label.repel force to repel labels from points and each other (increase if labels overlap)
#' @param facet an expression (evaluated in the same environment as x and y); for each unique value a panel (facet) is created; can be NULL
#' @param color either NULL (use point density colors), or a name of the \link{GeneInfo} table (use scale_color_xxx to define colors), or a color for all points
#' @param colorpalette either NULL (use default colors), or a palette name from color brewer or viridis
#' @param colorbreaks either NULL (use default algorithm of using quantiles of the values), or "minmax" for 5 breaks in between the minimum and maximum of the values, or the actual color breaks to distribute the colors from the palette
#' @param color.label the label for the color legend
#' @param density.margin for density colors, one of 'n','x' or 'y'; should the density be computed along both axes ('n'), or along 'x' or 'y' axis only
#' @param density.n how many bins to use for density calculation (see \link[MASS]{kde2d})
#' @param rasterize use ggrastr to rasterize points? (can be NULL, see details)
#' @param correlation a function to format correlation statistics to be annotated (see details)
#' @param correlation.x x coordinate to put the correlation annotation in the plot (see details)
#' @param correlation.y y coordinate to put the correlation annotation in the plot (see details)
#' @param correlation.hjust x adjustment to put the correlation annotation in the plot (see details)
#' @param correlation.vjust y adjustment to put the correlation annotation in the plot (see details)
#' @param layers.below list of ggplot geoms to add before adding the layer containing the points
#'
#' @details Both the x and y parameter are either expressions or names. Names are either sample (or cell, in case of single cell experiments) names or
#' fully qualified analysis results (analysis name followed by a dot and the analysis result table column). If the analysis parameter is given, the analysis
#' name must be omitted from x and y. These names can be used within expressions using non-standard evaluation.
#' Defining by names only works with character literals like "kinetics.Synthesis", but if you give an expression (e.g. a variable name that contains a character),
#' the situation is more complicated, since PlotScatter will try to evaluate this for defining the values, not the name of the column. If the expression evaluates
#' into a single character string that is equal to a name (see above!), PlotScatter knows what to do. For more complicated situations that cannot be resolved by this,
#' you can use the xcol and ycol parameters instead of the x and y parameters!
#'
#' @details By default the limits of x and y axis are chosen after removing outliers (using the same algorithm used for \link{boxplot}). Thus, larger numbers filter
#' less stringently. remove.outlier can also be set to FALSE (no outlier filtering). If xlim or ylim are set, this overrides outlier filtering. Points outside of the limits
#' (i.e. outliers or points outside of xlim or ylim) are set to infinity (such that they are shown at the border of the plot in gray)
#'
#' @details By default, all genes are shown. This can be restricted using the \code{genes} parameter (see \link{ToIndex}). It is also possible to highlight a subset of the genes
#' using \code{highlight}. This parameter either describes a subset of the genes (either numeric indices, gene names, gene symbols or a logical vector), in which case these genes
#' are plotted in red and with larger points size, or it can be a list of such vectors. The names of this list must be valid colors. Genes can also be labeled (make sure that this
#' is really only a small subset of the genes).
#'
#' @details When rendering to vector based devices (such as svg or pds), a genome-wide scatterplot often is painfully big (and rendering therefore slow). The \code{rasterize}
#' parameter can be used to automatically rasterize the points only (via the ggrastr package). If this parameter is NULL, ggrastr is used if more than 1000 points are plotted!
#'
#' @details Often scatter plots show that x and y coordinates are correlated. Correlations can be annotated using the \link{FormatCorrelation} function. Most often you will use
#' \code{PlotScatter(data,x,y,correlation=FormatCorrelation())}. To use a different correlation measure, other formats for correlation coefficient and P values or omit one of these
#' statistics, parametrize \code{FormatCorrelation}. Use correlation.x and correlation.y to place the annotation in the plot, and correlation.hjust/correlation.vjust to align the
#' annotation at the given x,y coordinates. Infinite values for correlation.x/correlation.y will put the annotation at the border of the plot.
#'
#' @return a ggplot object with the data frame used as the df attribute
#' @export
#'
#' @concept globalplot
PlotScatter=function(data,
                     x=NULL, y=NULL, analysis=NULL,mode.slot=NULL,xcol=NULL,ycol=NULL, xlab=NULL, ylab=NULL,
                     log=FALSE, log.x=log, log.y=log,
                     axis=TRUE, axis.x=axis, axis.y=axis,
                     remove.outlier=1.5, show.outlier=TRUE,lim=NULL,xlim=lim, ylim=lim,
                     size=0.3,
                     cross=NULL,diag=NULL,
                     filter=NULL,
                     genes=NULL,highlight=NULL, label=NULL, label.repel=1,
                     facet=NULL,
                     color=NULL, colorpalette=NULL, colorbreaks=NULL, color.label=NULL,
                     density.margin = 'n', density.n = 100,
                     rasterize=NULL,
                     correlation=NULL,correlation.x=-Inf,correlation.y=Inf,correlation.hjust=0.5,correlation.vjust=0.5,
                     layers.below=NULL) {
  if (is.grandR(data)) {
    if (!is.null(analysis)) {
      df=cbind(GetAnalysisTable(data,analyses = analysis,regex=FALSE,prefix.by.analysis = FALSE,gene.info = FALSE),GeneInfo(data))
    }else if (IsSparse(data)) {
      df=GetAnalysisTable(data)
    }else{
      if (is.null(mode.slot)) mode.slot = DefaultSlot(data)
      df=cbind(GetAnalysisTable(data,gene.info = FALSE),GetTable(data,type=mode.slot),GeneInfo(data))
    }
  if (!is.null(genes)) df=df[ToIndex(data,genes),]
  } else {
    df=as.data.frame(data)
  }


  filter=substitute(filter)
  if (!is.null(filter)) {
    filter = eval(filter,df,parent.frame())
    df=df[filter,]
  }

  facet=substitute(facet)
  if (!is.null(facet)) {
    df$facet=eval(facet,df,parent.frame())
  }

  if (!is.data.frame(df)) stop("df must be a data frame (or at least coercable into a data frame)")
  adaptInf=function(df,rx,ry) {
    # workaround to also "brush" infinity points at the border of the plane
    if (log.x) rx=log10(rx)
    rx=c(rx[1]-0.04*(rx[2]-rx[1]),rx[2]+0.04*(rx[2]-rx[1]))
    if (log.x) rx=10^rx
    df$A[is.infinite(df$A) & df$A>0]=rx[2]
    df$A[is.infinite(df$A) & df$A<0]=rx[1]

    if (log.y) ry=log10(ry)
    ry=c(ry[1]-0.04*(ry[2]-ry[1]),ry[2]+0.04*(ry[2]-ry[1]))
    if (log.y) ry=10^ry
    df$B[is.infinite(df$B) & df$B>0]=ry[2]
    df$B[is.infinite(df$B) & df$B<0]=ry[1]

    df
  }


  rn=rownames(df)

  x=substitute(x)
  if (is.null(x) && is.null(xcol))  {
    A=df[[1]]
    x=1
  } else if (is.null(x)) {
    if (!xcol %in% colnames(df)) stop(sprintf("%s is not a column!",xcol))
    A=df[[xcol]]
    x=xcol
  } else if (is.null(xcol)) {
    A=if (is.character(x) || is.numeric(x)) df[[x]] else eval(x,df,parent.frame())
    if (length(A)==1 && is.character(A)) {
      if (A %in% names(df)) A=df[[A]] else stop("Cannot make heads or tails out of the x parameter. Try to use xcol to access a specific column!")
    }
    if (length(A)==1) stop("Cannot make heads or tails out of the x parameter. Try to use xcol to access a specific column!")
  } else {
    stop("You must not specify both x and xcol!")
  }

  y=substitute(y)
  if (is.null(y) && is.null(ycol))  {
    B=df[[2]]
    y=2
  } else if (is.null(y)) {
    if (!ycol %in% colnames(df)) stop(sprintf("%s is not a column!",ycol))
    B=df[[ycol]]
    y=ycol
  } else if (is.null(ycol)) {
    B=if (is.character(y) || is.numeric(y)) df[[y]] else eval(y,df,parent.frame())
    if (length(B)==1 && is.character(B)) {
      if (B %in% names(df)) B=df[[B]] else stop("Cannot make heads or tails out of the y parameter. Try to use ycol to access a specific column!")
    }
    if (length(B)!=nrow(df)) stop("Cannot make heads or tails out of the y parameter. Try to use xcol to access a specific column!")
  } else {
    stop("You must not specify both y and ycol!")
  }

  if (is.null(xlab)) { if (is.character(x)) xlab=x else if (is.numeric(x)) xlab=names(df)[x] else xlab=deparse(x)}
  if (is.null(ylab)) { if (is.character(y)) ylab=y else if (is.numeric(y)) ylab=names(df)[y] else ylab=deparse(y)}
  if (is.na(xlab[1]) || xlab=="") xlab=NULL
  if (is.na(ylab[1]) || ylab=="") ylab=NULL

  df$A=A
  df$B=B
  rownames(df)=rn


  df$A.trans=if(log.x) log10(df$A) else df$A
  df$B.trans=if(log.y) log10(df$B) else df$B
  df$A.trans.unclipped=df$A.trans
  df$B.trans.unclipped=df$B.trans

  set.coord=remove.outlier!=FALSE || !is.null(xlim) || !is.null(ylim)
  if (set.coord) {
    if (is.null(xlim) && remove.outlier) {
      xlim=grDevices::boxplot.stats(df$A.trans[is.finite(df$A.trans)],coef=remove.outlier)$stats[c(1,5)] #(quantile(df[,1]-xmean,pnorm(c(-2,2)))*1.5)+xmean
      xlim=c(df$A[which(df$A.trans==xlim[1])[1]],df$A[which(df$A.trans==xlim[2])[1]])
    }
    if (is.null(ylim) && remove.outlier) {
      ylim=grDevices::boxplot.stats(df$B.trans[is.finite(df$B.trans)],coef=remove.outlier)$stats[c(1,5)] #(quantile(df[,2]-ymean,pnorm(c(-2,2)))*1.5)+ymean
      ylim=c(df$B[which(df$B.trans==ylim[1])[1]],df$B[which(df$B.trans==ylim[2])[1]])
    }
    if (is.null(xlim)) xlim=range(df$A[is.finite(df$A)])
    if (is.null(ylim)) ylim=range(df$B[is.finite(df$B)])

    clip=function(v,ch,lim,minus) ifelse(!is.finite(v),v,ifelse(ch<lim[1],minus,ifelse(ch>lim[2],Inf,v)))
    df$A.trans=clip(df$A.trans,df$A,xlim,-Inf)
    df$B.trans=clip(df$B.trans,df$B,ylim,-Inf)
    df$A=clip(df$A,df$A,xlim,if (log.x) 0 else -Inf)
    df$B=clip(df$B,df$B,ylim,if (log.y) 0 else -Inf)
    if (!show.outlier) {
      use = is.finite(df$A) & is.finite(df$B)
      df=df[use,]
    }
  }
  else {
    xlim=range(df$A[!is.infinite(df$A)])
    ylim=range(df$B[!is.infinite(df$B)])
  }

  color=substitute(color)
  if (is.null(color)) {
    if (is.null(df$facet)) {
      df$color=density2d(df$A.trans, df$B.trans, n = density.n,margin = density.margin)
    } else {
      df$color=density2d(df$A.trans, df$B.trans, n = density.n,margin = density.margin,facet = df$facet)
    }
    if (is.null(colorpalette)) {
      colorscale=scale_color_viridis_c(name = "Density",guide="none")
    } else {
      col=make.continuous.colors(values=df$color,colors = colorpalette, breaks=colorbreaks)
      colorscale = scale_color_gradientn(name="Density",guide="none",colors=col$colors)
    }
#  } else if (length(color)==1 && as.character(color) %in% names(GeneInfo(data))) {
#    df$color=GeneInfo(data,color)[ToIndex(data,genes)]
#    colorscale=scale_color_discrete(deparse(color))
  } else {
    ccol=if (length(color)==1 && as.character(color) %in% names(df)) df[[as.character(color)]] else eval(color,df,parent.frame())
    if (length(ccol)==1 && is.character(ccol) && ccol %in% names(df)) {
      ccol=df[[ccol]]
    }
    df$color=ccol
    if (is.character(df$color)) {
      colorscale=scale_color_identity(guide="none")
    } else if (is.factor(df$color)) {
      colorscale=if (!is.null(colorpalette)) scale_color_manual(color.label,values = colorpalette, guide=guide_legend(override.aes = list(size = 2))) else scale_color_discrete(color.label, guide=guide_legend(override.aes = list(size = 2)))
    } else {
      col=make.continuous.colors(values = df$color,colors=colorpalette, breaks=colorbreaks)
      df$color=pmin(pmax(df$color,min(col$breaks)),max(col$breaks))
      colorscale = scale_color_gradientn(color.label,colors=col$colors,breaks=scales::breaks_pretty(n=5)(df$color),limits=c(min(col$breaks),max(col$breaks)))
    }
  }

  g=ggplot(df,aes(A,B,color=color))+cowplot::theme_cowplot()
  if (!is.null(cross)) g=g+geom_vline(xintercept = 0,linetype=2,color='gray')+geom_hline(yintercept = 0,linetype=2,color='gray')
  if (!is.null(diag)) {
    if (is.logical(diag)) diag=0
      g=g+geom_abline(linetype=2,color='gray',intercept = diag)
  }

  if (!is.null(layers.below)) for (e in layers.below) g=g+e

  if ((is.null(rasterize) && nrow(df)>1000) || rlang::is_true(rasterize)) {
    checkPackages("ggrastr")
    g=g+ggrastr::rasterize(geom_point(size=size))
  } else {
    g=g+geom_point(size=size)
  }

  g=g+xlab(xlab)+ylab(ylab)

  if (!is.null(df$facet)) g=g+facet_wrap(~facet)
  if (!is.null(colorscale)) g=g+colorscale

  if (!is.null(highlight)) {
    if (is.list(highlight)){
      for (col in names(highlight)) {
        g=g+geom_point(data=df[highlight[[col]],],color=col,size=size*3)
      }
    } else {
      g=g+geom_point(data=df[highlight,],color='red',size=size*3)
    }
  }
  if (!is.null(label)) {
    df2=df[if (is.grandR(data)) ToIndex(data,label) else label,]
    df2$label=if (is.character(label)) label else rownames(df2)[label]
    if (checkPackages("ggrepel",error = FALSE,warn = FALSE)) {
      g=g+ggrepel::geom_label_repel(data=df2,mapping=aes(label=label),show.legend = FALSE,max.overlaps = Inf,min.segment.length = 0,force=label.repel)
    } else {
      singleMessage("Install the ggrepel package to have nicer labels!")
      g=g+ggplot2::geom_label(data=df2,mapping=aes(label=label),show.legend = FALSE)
    }
  }
  if (!is.null(correlation)) {
    if (correlation.x==-Inf) correlation.hjust=-0.05
    if (correlation.x==Inf) correlation.hjust=1.05
    if (correlation.y==-Inf) correlation.vjust=-0.05
    if (correlation.y==Inf) correlation.vjust=1.05

    if(log.x && correlation.x==-Inf) correlation.x=0
    if(log.y && correlation.y==-Inf) correlation.y=0

    if (is.null(df$facet)) {
      cor.format=correlation(df$A.trans.unclipped,df$B.trans.unclipped)
      g=g+annotate("text",x=correlation.x,y=correlation.y,label=cor.format,hjust=correlation.hjust,vjust=correlation.vjust)
    } else {
      cor.format = plyr::ddply(df,plyr::.(facet),function(sub) data.frame(label=correlation(sub$A.trans.unclipped,sub$B.trans.unclipped)))
      cor.format$correlation.x=correlation.x
      cor.format$correlation.y=correlation.y
      g=g+geom_text(data=cor.format,aes(x=correlation.x,y=correlation.y,label=label),inherit.aes=FALSE, parse=FALSE, hjust=correlation.hjust,vjust=correlation.vjust)
    }
  }
  if (set.coord) g=g+coord_cartesian(ylim=ylim,xlim=xlim)
  if (log.x) g=g+scale_x_log10()
  if (log.y) g=g+scale_y_log10()

  if (!axis.x) {
    g=g+theme(
      axis.line.x = ggplot2::element_blank(),
      axis.text.x = ggplot2::element_blank(),
      axis.ticks.x = ggplot2::element_blank()
    )
  }

  if (!axis.y) {
    g=g+theme(
      axis.line.y = ggplot2::element_blank(),
      axis.text.y = ggplot2::element_blank(),
      axis.ticks.y = ggplot2::element_blank()
    )
  }

  attr(g, 'df') <- adaptInf(df,xlim,ylim)
  g
}

#PlotToxicityTest=function(data,w4sU,no4sU,ylim=c(-1,1),LFC.fun=PsiLFC,hl.quantile=0.8) {
#	w=GetData(data,"count",conditions=w4sU,table=T)[,1]
#	n=if (is.numeric(no4sU)) no4sU[data$gene.info$Gene] else GetData(data,"count",conditions=no4sU,table=T)[,1]
#	ntr=GetData(data,"ntr",conditions=w4sU,table=T)[,1]
#	use=!is.na(w+n+ntr)
#	w=w[use]
#	n=n[use]
#	ntr=ntr[use]
#
#	phl=comp.hl(ntr)
#	df=data.frame(lfc=LFC.fun(w,n),PHL=phl)[ntr<1,]
#	df=df[df$PHL<quantile(df$PHL[is.finite(df$PHL)],hl.quantile),]
#	ggplot(df,aes(PHL,lfc,color=density2d(PHL, lfc, n = 100)))+
#			scale_color_viridis_c(name = "Density",guide='none')+
#			geom_point(alpha=1)+
#			geom_hline(yintercept=0)+
##			geom_smooth(method="loess")+
#			xlab("RNA half-life")+ylab("log FC 4sU/no4sU")+
#			scale_x_continuous(breaks=c())+
#			coord_cartesian(ylim=ylim)
#}



PlotExpressionTest=function(data,w4sU,no4sU,ylim=c(-1,1),LFC.fun=lfc::PsiLFC,hl.quantile=0.8) {
  # R CMD check guard for non-standard evaluation
  M <- lfc <- NULL

  w=GetTable(data,type="count",columns=w4sU)[,1]
	nn=no4sU
	n=if (is.numeric(no4sU)) no4sU[data$gene.info$Gene] else GetTable(data,type="count",columns=nn)[,1]
	use=!is.na(w+n)
	w=w[use]
	n=n[use]

	df=data.frame(lfc=LFC.fun(w,n),M=(log10(w+1)+log10(n+1))/2)
	ggplot(df,aes(M,lfc,color=density2d(M, lfc, n = 100)))+
	  cowplot::theme_cowplot()+
	  scale_color_viridis_c(name = "Density",guide='none')+
			geom_point(alpha=1)+
			geom_hline(yintercept=0)+
#			geom_smooth(method="loess")+
			xlab("Mean expression")+ylab("log FC 4sU/no4sU")+
			coord_cartesian(ylim=ylim)
}

#' Convenience function to make the same type of plot for multple analyses.
#' @param data the grandR object that contains the data to be plotted
#' @param plot.fun the plottinf function to apply
#' @param analyses the analyses to plot (default: all)
#' @param add additional ggplot (e.g., geoms) objects to add
#' @param ... passed further to plot.fun
#' @return ggplot objects
#' @export
#' @concept globalplot
PlotAnalyses=function(data,plot.fun,analyses=Analyses(data),add=NULL,...) {
  lapply(analyses,function(analysis) {
    re=plot.fun(data,analysis=analysis,...)
    if (!is.null(add)) for (e in if (is.list(add)) add else list(add)) re=re+e
    re
  })
}

#' Make a Vulcano plot
#'
#' Plot log2 fold changes against -log10 multiple testing adjusted P values
#'
#' @param data the grandR object that contains the data to be plotted
#' @param analysis the analysis to plot (default: first analysis)
#' @param p.cutoff p-value cutoff (default: 0.05)
#' @param lfc.cutoff log fold change cutoff (default: 1)
#' @param annotate.numbers if TRUE, label the number of genes
#' @param ... further parameters passed to \link{PlotScatter}
#' @return a ggplot object
#' @export
#' @concept globalplot
VulcanoPlot=function(data,analysis=Analyses(data)[1],p.cutoff=0.05,lfc.cutoff=1,
                     annotate.numbers=TRUE,...) {
  # R CMD check guard for non-standard evaluation
  Q <- NULL

  df=GetAnalysisTable(data,analyses=analysis,regex=FALSE,columns=c("LFC|Q"),gene.info = FALSE)
  if (is.numeric(analysis)) analysis=Analyses(data)[analysis]
  names(df)=gsub(".*.Q","Q",gsub(".*.LFC","LFC",names(df)))
  g=PlotScatter(df,x=LFC,y=-log10(Q),remove.outlier = FALSE,...)+
    xlab(bquote(log[2]~FC))+
    ylab(bquote("-"~log[10]~FDR))+
    geom_hline(yintercept=-log10(p.cutoff),linetype=2)+
    geom_vline(xintercept=if (lfc.cutoff==0) 0 else c(-lfc.cutoff,lfc.cutoff),linetype=2)+
    ggtitle(analysis)

  if (annotate.numbers) {
    if (lfc.cutoff!=0) {
      n=table(cut(df$LFC,breaks=c(-Inf,-lfc.cutoff,lfc.cutoff,Inf)),factor(df$Q>p.cutoff,levels=c("FALSE","TRUE")))
      g=g+annotate("label",x=c(-Inf,0,Inf,-Inf,0,Inf),y=c(Inf,Inf,Inf,-Inf,-Inf,-Inf),label=paste0("n=",as.numeric(n)),hjust=c(-0.1,0.5,1.1,-0.1,0.5,1.1),vjust=c(1.1,1.1,1.1,-0.1,-0.1,-0.1))
    } else {
      n=table(cut(df$LFC,breaks=c(-Inf,lfc.cutoff,Inf)),factor(df$Q>p.cutoff,levels=c("FALSE","TRUE")))
      g=g+annotate("label",x=c(-Inf,Inf,-Inf,Inf),y=c(Inf,Inf,-Inf,-Inf),label=paste0("n=",as.numeric(n)),hjust=c(-0.1,1.1,-0.1,1.1),vjust=c(1.1,1.1,-0.1,-0.1))
    }
  }
  g
}


#' Make an MA plot
#'
#' Plot average expression vs. log2 fold changes
#'
#' @param data the grandR object that contains the data to be plotted
#' @param analysis the analysis to plot (default: first analysis)
#' @param p.cutoff p-value cutoff (default: 0.05)
#' @param lfc.cutoff log fold change cutoff (default: 1)
#' @param annotate.numbers if TRUE, label the number of genes
#' @param ... further parameters passed to \link{PlotScatter}
#' @return a ggplot object
#' @export
#' @concept globalplot
MAPlot=function(data,analysis=Analyses(data)[1],p.cutoff=0.05,lfc.cutoff=1,
                     annotate.numbers=TRUE,...) {
  # R CMD check guard for non-standard evaluation
  M <- Q <- NULL

  df=GetAnalysisTable(data,analyses=analysis,regex=FALSE,columns=c("M|LFC|Q"),gene.info = FALSE)
  if (is.numeric(analysis)) analysis=Analyses(data)[analysis]
  names(df)=gsub(".*.Q","Q",gsub(".*.LFC","LFC",gsub(".*.M","M",names(df))))
  if (is.null(df$Q)) df$Q=1
  df$Q[is.na(df$Q)]=1
  g=PlotScatter(df,x=M+1,y=LFC,color=ifelse(Q<p.cutoff,'black','gray50'),log.x=TRUE,remove.outlier = FALSE,...)+
    xlab("Total expression")+
    ylab(bquote(log[2]~FC))+
    geom_hline(yintercept=if (lfc.cutoff==0) 0 else c(-lfc.cutoff,lfc.cutoff),linetype=2)+
    ggtitle(analysis)

  if (annotate.numbers) {
    n=c(sum(df$LFC>lfc.cutoff & df$Q<p.cutoff,na.rm = TRUE),sum(df$LFC< -lfc.cutoff & df$Q<p.cutoff,na.rm = TRUE))
    g=g+annotate("label",x=c(Inf,Inf),y=c(Inf,-Inf),label=paste0("n=",n),hjust=c(1.1,1.1),vjust=c(1.1,-0.1))
  }

  g
}

#' Plot the distribution of gene types
#' @param data the grandR object to get the data to be plotted from
#' @param mode.slot which mode and slot to use
#' @param relative show percentage values?
#'
#' @return a ggplot object
#'
#' @export
#' @concept globalplot
PlotTypeDistribution=function(data,mode.slot=DefaultSlot(data),relative=FALSE) {
  # R CMD check guard for non-standard evaluation
  value <- Type <- NULL

  df=GetTable(data,type=mode.slot)
	df=sapply(levels(data$gene.info$Type),function(type) colSums(df[ data$gene.info$Type==type,]))
	df=df[,colSums(df)>0]
	if (relative) {
		df=df/rowSums(df)*100
		mode.slot=sprintf("%s [%%]",mode.slot)
	}
	df=reshape2::melt(df,varnames=c("Condition","Type"))
	ggplot(df,aes(Condition,value,fill=Type))+cowplot::theme_cowplot()+
	  geom_bar(stat="Identity")+scale_fill_brewer(palette="Dark2")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ylab(mode.slot)+xlab(NULL)
}


setup.default.aes=function(data,aest) {
  # R CMD check guard for non-standard evaluation
  Replicate <- NULL

  if (is.null(aest)) aest=aes()
  if (!is.null(Condition(data)) && !any(c("color","colour") %in% names(aest))) aest=utils::modifyList(ggplot2::aes(color=Condition),aest)
  if (!is.null(Coldata(data)$Replicate)&& !any(c("shape") %in% names(aest))) aest=utils::modifyList(ggplot2::aes(shape=Replicate),aest)
  aest
}

#' Gene plot comparing old vs new RNA
#'
#' Plot the old vs new RNA values of a gene
#'
#' @param data the grandR object to get the data to be plotted from
#' @param gene the gene to plot
#' @param slot the slot of the grandR object to get the data from
#' @param columns which columns (i.e. samples or cells) to show (see details)
#' @param log show both axes in log scale
#' @param show.CI show confidence intervals; one of TRUE/FALSE (default: FALSE)
#' @param aest parameter to set the visual attributes of the plot
#' @param size the point size used for plotting; overridden if size is defined via aest
#'
#' @details The value of the aest parameter must be an \emph{Aesthetic mapping} as generated by \link[ggplot2]{aes}.
#'
#' @details The table used for plotting is the table returned by \link{GetData} with coldata set to TRUE, i.e. you can use all names from the \link{Coldata} table for aest.
#'
#' @details By default, aest is set to aes(color=Condition,shape=Replicate) (if both Condition and Replicate are names in the Coldata table).
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a ggplot object.
#'
#' @seealso \link{GetData}, \link{PlotGeneTotalVsNtr},\link{PlotGeneGroupsPoints},\link{PlotGeneGroupsBars}
#'
#' @export
#' @concept geneplot
PlotGeneOldVsNew=function(data,gene,slot=DefaultSlot(data),columns=NULL,log=TRUE,show.CI=FALSE,
                          aest=NULL,size=2) {
  # R CMD check guard for non-standard evaluation
  lower <- upper <- NULL

  aest=setup.default.aes(data,aest)
  if (length(slot)!=1) stop("Provide a single slot name!")
  slot=get.mode.slot(data,slot,allow.ntr = FALSE)$slot
  new=paste0("new.",slot)
  old=paste0("old.",slot)

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  df=GetData(data,genes=gene,mode.slot=c(old,new),columns=columns,by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)
  g=ggplot(df,utils::modifyList(aes(!!sym(old),!!sym(new)),aest))+cowplot::theme_cowplot()
  if (!is.null(aest$size)) g=g+geom_point() else g=g+geom_point(size=size)
  if (log) {
    g=g+scale_x_log10()+
      scale_y_log10()
  } else {
    g=g+scale_x_continuous()+
      scale_y_continuous()
  }
  g=g+xlab(paste0("Old RNA (",slot,")"))
  g=g+ylab(paste0("New RNA (",slot,")"))

  if (show.CI) {
    if (!all(c("lower","upper") %in% Slots(data))) stop("Compute lower and upper slots first! (ComputeNtrCI)")
    df=cbind(df,GetData(data,genes=gene,mode.slot=c("lower","upper",slot),by.rows=FALSE,coldata=T,ntr.na = FALSE)[,c("lower","upper",slot)])
    g=g+geom_errorbar(data=df,mapping=aes(ymin=lower*!!sym(slot),ymax=upper*!!sym(slot)))
    g=g+geom_errorbarh(data=df,mapping=aes(xmin=(1-upper)*!!sym(slot),xmax=(1-lower)*!!sym(slot)))
  }
  g
}

#' Gene plot comparing total RNA vs the NTR
#'
#' Plot the total RNA expression vs the new-to-total RNA ratio for a gene
#'
#' @param data the grandR object to get the data to be plotted from
#' @param gene the gene to plot
#' @param slot the slot of the grandR object to get the data from
#' @param columns which columns (i.e. samples or cells) to show (see details)
#' @param log show the x axis (total RNA) in log scale
#' @param show.CI show confidence intervals; one of TRUE/FALSE (default: FALSE)
#' @param aest parameter to set the visual attributes of the plot
#' @param size the point size used for plotting; overridden if size is defined via aest
#'
#' @details The value of the aest parameter must be an \emph{Aesthetic mapping} as generated by \link[ggplot2]{aes}.
#'
#' @details The table used for plotting is the table returned by \link{GetData} with coldata set to TRUE, i.e. you can use all names from the \link{Coldata} table for aest.
#'
#' @details By default, aest is set to aes(color=Condition,shape=Replicate) (if both Condition and Replicate are names in the Coldata table).
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a ggplot object.
#'
#' @seealso \link{GetData}, \link{PlotGeneOldVsNew},\link{PlotGeneGroupsPoints},\link{PlotGeneGroupsBars}
#'
#' @export
#' @concept geneplot
PlotGeneTotalVsNtr=function(data,gene,slot=DefaultSlot(data),columns=NULL,log=TRUE,show.CI=FALSE,
                            aest=NULL,size=2) {
  # R CMD check guard for non-standard evaluation
  lower <- upper <- ntr <- NULL

  aest=setup.default.aes(data,aest)
  if (length(slot)!=1) stop("Provide a single slot name!")
  slot=get.mode.slot(data,slot,allow.ntr = FALSE)$slot

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  df=GetData(data,genes=gene,mode.slot=c("ntr",slot),columns=columns,by.rows=FALSE,coldata=T,ntr.na = FALSE)
  g=ggplot(df,utils::modifyList(aes(!!sym(slot),ntr),aest))+cowplot::theme_cowplot()
  if (!is.null(aest$size)) g=g+geom_point() else g=g+geom_point(size=size)
  if (log) {
    g=g+scale_x_log10()
  } else {
    g=g+scale_x_continuous()
  }
  g=g+scale_y_continuous("NTR")
  g=g+xlab(paste0("Total RNA (",slot,")"))

  if (show.CI) {
    if (!all(c("lower","upper") %in% Slots(data))) stop("Compute lower and upper slots first! (ComputeNtrCI)")
    df=cbind(df,GetData(data,genes=gene,mode.slot=c("lower","upper"),by.rows=FALSE,coldata=T,ntr.na = FALSE)[,c("lower","upper")])
    g=g+geom_errorbar(data=df,mapping=aes(ymin=lower,ymax=upper))
  }
  g
}

#' Plot gene groups as points
#'
#' Plot either old, new or total RNA of a gene in a row, per condition.
#'
#' @param data the grandR object to get the data to be plotted from
#' @param gene the gene to plot
#' @param group how to group the genes (default: Condition)
#' @param mode.slot the  mode.slot of the grandR object to get the data from
#' @param columns which columns (i.e. samples or cells) to show (see details)
#' @param log show the y axis in log scale
#' @param show.CI show confidence intervals; one of TRUE/FALSE (default: FALSE)
#' @param aest parameter to set the visual attributes of the plot
#' @param size the point size used for plotting; overridden if size is defined via aest
#' @param transform function that is called on the data frame directly before plotting (can be NULL)
#'
#' @details The value of the aest parameter must be an \emph{Aesthetic mapping} as generated by \link[ggplot2]{aes}.
#'
#' @details To refer to data slots, the mode.slot syntax can be used: Each name is either a data slot, or one of (new,old,total) followed by a
#' dot followed by a slot. For new or old, the data slot value is multiplied by ntr or 1-ntr. This can be used e.g. to obtain the \emph{new counts}.
#'
#' @details The table used for plotting is the table returned by \link{GetData} with coldata set to TRUE, i.e. you can use all names from the \link{Coldata} table for aest.
#'
#' @details By default, aest is set to aes(color=Condition,shape=Replicate) (if both Condition and Replicate are names in the Coldata table).
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a ggplot object.
#'
#' @seealso \link{GetData}, \link{PlotGeneTotalVsNtr},\link{PlotGeneOldVsNew},\link{PlotGeneGroupsBars}
#'
#' @export
#' @concept geneplot
PlotGeneGroupsPoints=function(data,gene,group="Condition",mode.slot=DefaultSlot(data),
                              columns=NULL,
                              log=TRUE,
                              show.CI=FALSE,
                              aest=NULL,size=2,transform=NULL) {
  # R CMD check guard for non-standard evaluation
  lower <- upper <- value <- NULL

  aest=setup.default.aes(data,aest)

  if (length(group)!=1 && !group %in% names(Coldata(data))) stop("Group must be a name in the Coldata table!")

  if (length(mode.slot)!=1) stop("Provide a single slot name!")
  mode.slot=get.mode.slot(data,mode.slot)
  slot=mode.slot$slot
  mode=mode.slot$mode

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  if (slot=="ntr") {
    df=GetData(data,genes=gene,mode.slot="ntr",columns=columns,by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)
    df$ntr=df$Value
    df$value=df[[slot]]
    log=FALSE
  } else {
    df=GetData(data,genes=gene,mode.slot=c(slot,"ntr"),columns=columns,by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)
    df$value=switch(mode[1],total=df[[slot]],new=df[[slot]]*df[["ntr"]],old=df[[slot]]*(1-df[["ntr"]]),stop(paste0(mode," unknown!")))
  }

  if (!is.null(transform)) df=transform(df)

  g=ggplot(df,utils::modifyList(aes(!!sym(group),value),aest))+cowplot::theme_cowplot()
  if (!is.null(aest$size)) g=g+geom_point(position=if(show.CI) position_dodge(width=0.4) else "identity") else g=g+geom_point(size=size,position=if(show.CI) position_dodge(width=0.4) else "identity")
  g=g+xlab(NULL)+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  if (log) g=g+scale_y_log10()

  if (slot=="ntr")g=g+ylab("NTR") else g=g+ylab(paste0(toupper(substr(mode,1,1)),substr(mode,2,nchar(mode))," RNA (",slot,")"))

  if (show.CI) {
    if (!all(c("lower","upper") %in% Slots(data))) stop("Compute lower and upper slots first! (ComputeNtrCI)")
    df=cbind(df,GetData(data,genes=gene,mode.slot=c("lower","upper"),by.rows=FALSE,coldata=FALSE,ntr.na = FALSE)[,c("lower","upper")])
    if (slot=="ntr") {
      g=g+geom_errorbar(data=df,mapping=aes(ymin=lower,ymax=upper),width=0,position=position_dodge(width=0.4))
    } else {
      g=switch(mode[1],
               total=g,
               new=g+geom_errorbar(data=df,mapping=aes(ymin=lower*!!sym(slot),ymax=upper*!!sym(slot)),width=0,position=position_dodge(width=0.4)),
               old=g+geom_errorbar(data=df,mapping=aes(ymin=(1-upper)*!!sym(slot),ymax=(1-lower)*!!sym(slot)),width=0,position=position_dodge(width=0.4)),
               stop(paste0(mode," unknown!")))
    }
  }
  g
}

#' Plot gene values as bars
#'
#' Plot old and new RNA of a gene in a row.
#'
#' @param data the grandR object to get the data to be plotted from
#' @param gene the gene to plot
#' @param slot the slot of the grandR object to get the data from
#' @param columns which columns (i.e. samples or cells) to show (see details)
#' @param show.CI show confidence intervals; one of TRUE/FALSE (default: FALSE)
#' @param xlab The names to show at the x axis;
#' @param transform function that is called on the data frame directly before plotting (can be NULL)
#'
#' @details xlab can be given as a character vector or an expression that evaluates into a character vector.
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently it.
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a ggplot object.
#'
#' @seealso \link{GetData}, \link{PlotGeneTotalVsNtr},\link{PlotGeneOldVsNew},\link{PlotGeneGroupsBars}
#'
#' @export
#' @concept geneplot
PlotGeneGroupsBars=function(data,gene,slot=DefaultSlot(data),columns=NULL,show.CI=FALSE,xlab=NULL,transform=NULL) {
  # R CMD check guard for non-standard evaluation
  Name <- Value <- mode.slot <- upper <- lower <- NULL

  if (length(slot)!=1) stop("Provide a single slot name!")
  slot=get.mode.slot(data,slot,allow.ntr = FALSE)$slot

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  df=GetData(data,genes=gene,mode.slot=paste0(c("new.","old."),slot),columns=columns,by.rows=TRUE,coldata=TRUE,ntr.na = FALSE)

  xlab=substitute(xlab)
  xlab=if (is.null(xlab)) df$Name else eval(xlab,df,parent.frame())
  df$xlab=xlab

  if (!is.null(transform)) df=transform(df)

  g=ggplot(df,aes(Name,Value,fill=mode.slot))+
    cowplot::theme_cowplot()+
    geom_bar(stat="identity",position=position_stack())+
    scale_fill_manual(NULL,values = c('red','gray'),guide="none")+
    xlab(NULL)+
    scale_x_discrete(breaks=df$Name,labels=df$xlab)+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  g=g+ylab(paste0("Total RNA (",slot,")"))
  if (show.CI) {
    if (!all(c("lower","upper") %in% Slots(data))) stop("Compute lower and upper slots first! (ComputeNtrCI)")
    df2=GetData(data,genes=gene,mode.slot=c("lower","upper",slot),by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)
    g=g+geom_errorbar(data=df2,mapping=aes(y=!!sym(slot),fill=NULL,ymin=(1-upper)*!!sym(slot),ymax=(1-lower)*!!sym(slot)),width=0)
  }
  g
}

#' Gene plot for snapshot timecourse data
#'
#' Plot the total RNA expression vs the new-to-total RNA ratio for a gene
#'
#' @param data the grandR object to get the data to be plotted from
#' @param gene the gene to plot
#' @param time the times to show on the x axis (see details)
#' @param mode.slot the mode.slot of the grandR object to get the data from
#' @param columns which columns (i.e. samples or cells) to show (see details)
#' @param average.lines add average lines?
#' @param exact.tics use axis labels directly corresponding to the available temporal values?
#' @param log show the y axis in log scale
#' @param show.CI show confidence intervals; one of TRUE/FALSE (default: FALSE)
#' @param aest parameter to set the visual attributes of the plot
#' @param size the point size used for plotting; overridden if size is defined via aest
#'
#' @details The x axis of this plot will show a temporal dimension. The time parameter defines a name in the \link{Coldata} table containing the temporal values for each sample.
#'
#' @details The value of the aest parameter must be an \emph{Aesthetic mapping} as generated by \link[ggplot2]{aes}.
#'
#' @details The table used for plotting is the table returned by \link{GetData} with coldata set to TRUE, i.e. you can use all names from the \link{Coldata} table for aest.
#'
#' @details By default, aest is set to aes(color=Condition,shape=Replicate) (if both Condition and Replicate are names in the Coldata table).
#'
#' @details Columns can be given as a logical, integer or character vector representing a selection of the columns (samples or cells).
#' The expression is evaluated in an environment having the \code{\link{Coldata}}, i.e. you can use names of \code{\link{Coldata}} as variables to
#' conveniently build a logical vector (e.g., columns=Condition=="x").
#'
#' @return a ggplot object.
#'
#' @seealso \link{GetData}, \link{PlotGeneOldVsNew},\link{PlotGeneGroupsPoints},\link{PlotGeneGroupsBars}
#'
#' @export
#' @concept geneplot
PlotGeneSnapshotTimecourse=function(data,gene,time=Design$dur.4sU,
                            mode.slot=DefaultSlot(data),
                            columns=NULL,
                            average.lines=TRUE,
                            exact.tics=TRUE,
                            log=TRUE,
                            show.CI=FALSE,aest=NULL,size=2) {
  # R CMD check guard for non-standard evaluation
  x <- colour <- group <- Value <- ntr <- lower <- upper <- linetype <- NULL

  aest=setup.default.aes(data,aest)
  if (length(slot)!=1) stop("Provide a single slot name!")

  columns=substitute(columns)
  columns=if (is.null(columns)) colnames(data) else eval(columns,Coldata(data),parent.frame())
  columns=Columns(data,columns)

  df=GetData(data,genes=gene,mode.slot=mode.slot,columns=columns,by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)

  aes=utils::modifyList(aes(x=!!sym(time),y=Value),aest)
  if (!is.null(Condition(data))) aes=utils::modifyList(aes,aes(group=Condition))
  breaks=if (exact.tics) sort(unique(df[[time]])) else scales::breaks_extended(5)(df[[time]])

  oslot=mode.slot
  mode.slot=get.mode.slot(data,mode.slot)
  slot=mode.slot$slot
  mode=mode.slot$mode
  if (slot=="ntr") log=FALSE

  g=ggplot(df,mapping=aes)+cowplot::theme_cowplot()
  if (!is.null(aest$size)) g=g+geom_point(position=if(show.CI) position_dodge(width=0.4) else "identity") else g=g+geom_point(size=size,position=if(show.CI) position_dodge(width=0.4) else "identity")
  g=g+scale_x_continuous(NULL,labels = scales::number_format(accuracy = max(0.01,my.precision(breaks)),suffix="h"),breaks=breaks)
  if (slot=="ntr")g=g+ylab("NTR") else g=g+ylab(paste0(toupper(substr(mode,1,1)),substr(mode,2,nchar(mode))," RNA (",slot,")"))
  if (log) g=g+scale_y_log10()
  if (average.lines) {
    # compute average line:
    if (!is.null(Condition(data))) {
      ddf=as.data.frame(lapply(aes,function(col) rlang::eval_tidy(col,data=df)))
      ddf=plyr::ddply(ddf,intersect(names(ddf),c("x","colour","linetype","group")),function(s) c(Value=mean(s$y,na.rm=TRUE)))
      daes=aes(x,Value,group=group)
      if ("colour" %in% names(ddf)) daes=utils::modifyList(daes,aes(color=colour))
      if ("linetype" %in% names(ddf)) daes=utils::modifyList(daes,aes(linetype=linetype))
      g=g+geom_line(data=ddf,mapping=daes,inherit.aes=FALSE)
    } else {
      ddf=as.data.frame(lapply(aes,function(col) rlang::eval_tidy(col,data=df)))
      ddf=plyr::ddply(ddf,plyr::.(x),function(s) c(Value=mean(s$y,na.rm=TRUE)))
      g=g+geom_line(data=ddf,mapping=aes(x,Value),inherit.aes=F)
    }
  }
  if (show.CI) {
    if (!all(c("lower","upper") %in% Slots(data))) stop("Compute lower and upper slots first! (ComputeNtrCI)")
    df2=GetData(data,genes=gene,mode.slot=unique(c("lower","upper",slot,oslot)),by.rows=FALSE,coldata=TRUE,ntr.na = FALSE)
    if (slot=="ntr") {
      g=g+geom_errorbar(data=df2,mapping=aes(y=ntr,ymin=lower,ymax=upper),width=0,position=position_dodge(width=0.4))
    } else {
      g=switch(mode[1],
               total=g,
               new=g+geom_errorbar(data=df2,mapping=aes(y=!!sym(oslot),ymin=lower*!!sym(slot),ymax=upper*!!sym(slot)),width=0,position=position_dodge(width=0.4)),
               old=g+geom_errorbar(data=df2,mapping=aes(y=oslot,ymin=(1-upper)*!!sym(slot),ymax=(1-lower)*!!sym(slot)),width=0,position=position_dodge(width=0.4)),
               stop(paste0(mode," unknown!")))
    }
  }
  g
}

#' Rotate x axis labels
#'
#' Add this to a ggplot object to rotate the x axis labels
#'
#' @param angle the angle by which to rotate
#'
#' @return a ggplot theme object
#' @export
#'
#' @concept helper
RotatateAxisLabels=function(angle=90) {
  theme(axis.text.x = element_text(angle = angle, vjust = if (abs(angle-90)<10) 0.5 else 1, hjust=1))
}

Try the grandR package in your browser

Any scripts or data that you put into this service are public.

grandR documentation built on April 4, 2025, 2:27 a.m.