source("R/utils.R")
#' Correlation Coefficients Panel
#'
#' @author Jens Hooge
#'
#' @description
#' Plot absolute correlation coefficients (pearson) into panel
#' and adjust the text size according to the correlation
#'
#' @import graphics
#' @import stats
#'
#' @keywords internal
#'
#' @param x numeric vector
#' @param y numeric vector
#' @param digits Number of digits the correlation will be displayed with (Default: 2)
#' @param prefix Not yet documented
#' @param cex.cor Not yet documented
#' @param ... Additional parameters
#'
#' @return plot text element
panel_cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y))
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, txt)
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r)
}
#' Histogram Panel
#'
#' @author Jens Hooge
#'
#' @description
#' Plot histogram into diagonal panel of a numeric vector
#'
#' @import graphics
#'
#' @keywords internal
#'
#' @param x numeric vector
#' @param ... Additional parameters
#'
#' @return histogram with colored bars
panel_hist <- function(x, ...) {
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col = "#67000D", ...)
}
#' Smooth Scatter Panel
#'
#' @author Jens Hooge
#'
#' @description
#' Plot a smoothScatter (color density of a scatter plot)
#' with a loess fit.
#'
#' @import graphics
#' @import grDevices
#'
#' @keywords internal
#'
#' @param x numeric vector
#' @param y numeric vector
#' @param bg Background (Default: NA)
#' @param cex Not yet documented
#' @param col.smooth Not yet documented
#' @param span Not yet documented
#' @param iter Not yet documented
#' @param ... Additional parameters
#'
#' @return smoothed scatter plot
panel_smoothScatter <- function (x, y, bg = NA,
cex = 1, col.smooth = "red",
span = 2/3, iter = 3, ...) {
# colors for the density
palette <- colorRampPalette(c("blue", "orange", "red"))
s <- smoothScatter(x, y, colramp = palette, bg = bg, cex = cex, add=T)
ok <- is.finite(x) & is.finite(y)
if (any(ok))
lines(stats::lowess(x[ok], y[ok], f = span, iter = iter),
col = col.smooth, ...)
}
#' Pairs Plot
#'
#' @author Jens Hooge
#'
#' @description
#' A matrix of plots is produced. The upper diagonal matrix
#' consists of the correlation between pairs of variables. The
#' lower diagonal matrix shows the scatter of each sample including
#' a density overlay and a loess fit. Finally the diagonal shows a histogram
#' of each of the input variables.
#'
#' @import graphics
#' @import grDevices
#'
#' @param mat A matrix or a dataframe with numerical columns
#' @param ... Additional parameters
#'
#' @seealso pairs
#'
#' @return Plots a pairs plot on screen
#'
#' @examples
#' \dontrun{
#' data(iris)
#' general.plot_pairs(iris[, 1:4])
#' }
#'
#' @export
general.plot_pairs <- function(mat, ...) {
pairs(mat,
lower.panel = panel_smoothScatter,
upper.panel = panel_cor,
diag.panel = panel_hist,
...)
}
#' Plots a volcano plot.
#'
#' @author Jens Hooge
#'
#' @description
#' The plot does not aim to show any details about which genes or
#' pathways are over/under represented, but is meant to show only
#' the overall directionality of the genes between groups. If there
#' is a certain perturbation in a system, the volcano plot may reveal
#' a response in which there is a dramatic increase or decreased in
#' transcription. In either case, we can get a better idea of the overall
#' system by showing a volcano plot. Each gene is represented with its p-value
#' as the x-axis and its (log2) fold change as the y-axis.
#'
#' @import ggplot2
#' @import ggrepel
#' @import dplyr
#'
#' @param data A data.frame which includes columns for -log10(p-values) and log2(foldchange)
#' @param title The figure title
#' @param groups A character vector with the treatment groups
#' @param top_labeled A numeric value that controls how many samples should be highlighted based on their pvalue
#' @param xlabel X-Axis Label
#' @param ylabel Y-Axis Label
#' @param xcutoff A numeric value for the -log10(p-value) cutoff
#' @param ycutoff A numeric value for the log2(foldchange) cutoff
#' @param log1p A flag to apply a log(1+x) transformation on the y axis to account for large values
#'
#' @return A ggplot2 figure
#' @export
#'
#' @examples
#' \dontrun{
#' data(iPSC)
#' ## Plot Simple Volcano Plot
#' general.volcano_plot(iPSC)
#'
#' ## Plot Simple Volcano Plot with Group Labels
#' general.volcano_plot(iPSC, groups=c("A", "B"))
#'
#' ## Plot Volcano Plot and add top10 most significant samples
#' general.volcano_plot(iPSC, top_labeled=10, xcutoff=c(-log2(2), log2(4)))
#'
#' ## Plot Volcano Plot and add Top10 most Significant Samples and Transform y-axis by log(1+x)
#' general.volcano_plot(iPSC, top_labeled=10, log1p=TRUE)
#' }
general.volcano_plot <- function(data, title="Volcano Plot",
groups=c("Untreated", "Treated"),
top_labeled=0,
xlabel=expression(log[2]("Foldchange")),
ylabel=expression(-log[10]("p-value")),
xcutoff=c(-log2(4), log2(4)),
ycutoff=-log10(.05),
log1p=FALSE) {
if(is.null(groups)) {
groups <- ""
}
if(length(groups)!=2) {
warning("Number of group labels should be 2. No group colors applied!")
}
data$fc <- log2(data$fc)
data$pvalue <- -log10(data$pvalue)
# Color corresponds to fold change directionality
fig <- ggplot() +
ggtitle(label = title) + # Add a title
xlab(xlabel) + # x-axis label
ylab(ylabel) + # y-axis label
geom_vline(xintercept = 0, colour = "black") + # add line at 0
geom_hline(yintercept = ycutoff, colour = "black") + # p(0.05) = 1.3
scale_x_continuous(limits = c(xcutoff[1]-2, xcutoff[2]+2)) +
theme_bw(base_size = 16) + # clean up theme
theme(legend.position = "none") # remove legend
if (length(groups)==2) {
## Set group labels
data <- data %>%
mutate(color = ifelse(data$fc > 0 & data$pvalue > ycutoff,
yes = groups[1],
no = ifelse(data$fc < 0 & data$pvalue > ycutoff,
yes = groups[2],
no = "none")))
data$color <- as.factor(data$color)
## This hack is needed for top_n to work properly during package build
data <- cbind(data$color, data)
data[,ncol(data)] <- NULL
colnames(data) <- c("color", "id", "fc", "pvalue")
## Color points by group
colors <- c("#E64B35", "#3182bd", "#636363")
names(colors) <- c(groups[1], groups[2], "none")
fig <- fig +
geom_point(data=data, aes_string(x = "fc", y = "pvalue", color = "color"),
size = 1.75, alpha = 0.8, na.rm = T) + # add gene points
annotate(geom = "text",
label = groups[1],
x = xcutoff[1], y = max(data$pvalue)+1,
size = 5, colour = "black") + # add Untreated text
annotate(geom = "text",
label = groups[2],
x = xcutoff[2], y = max(data$pvalue)+1,
size = 5, colour = "black") + # add Treated text
scale_color_manual(values = colors) # change colors
} else {
fig <- fig +
geom_point(data=data, aes_string(x = "fc", y = "pvalue"),
size = 1.75, alpha = 0.8, na.rm = T) + # Make dots bigger
scale_colour_gradient(low = "black", high = "black", guide = FALSE) # Color black
}
if(log1p) {
fig <- fig + scale_y_continuous(trans = "log1p") ## Apply log(1+x) transformation
}
## Label top pvalue points by their id
if (top_labeled > 0) {
# top_labeled <- top_n(data, n = top_labeled, wt = pvalue)
top_labeled <- top_n(data, n = top_labeled)
fig <- fig +
geom_text_repel(data = top_labeled,
mapping = aes_string(x = "fc", y = "pvalue", label = "id"),
size = 3,
fontface = 'bold',
color = 'black',
box.padding = unit(0.5, "lines"),
point.padding = unit(0.5, "lines"))
}
return(fig)
}
#' Function to create a barplot, with bars split by a second variable
#'
#' @author Sebastian Voss, Adam Skubala
#'
#' @param freq vector or matrix with the bar heights, e.g. a frequency table ('table()' output),
#' names will be used for bar labels if 'labels' is not specified, if 'freq' is a matrix,
#' entries of each column are represented by separate bars with no space in between
#' @param labels bar labels (default=NULL)
#' @param labels.ab additional labels, which are placed above the bars (default=NULL), should
#' have the same dimension as 'freq'
#' @param file.name name for the PNG output file (without '.png' ending)
#' @param col color for the bars (default='tomato3'). can be a single value or an object of
#' the same dimension than freq, specifying the color for each bar
#' @param cex.lab size of the bar labels (default=1)
#' @param cex.lab.ab size of the bar labels above the bars (default=1)
#' @param cex.x size of the x-labels (default=1)
#' @param xlab label for the x-axis (default='')
#' @param ylab label for the y-axis (default='Frequency')
#' @param add.legend TRUE/FALSE should a legend be added underneath the plot (default=TRUE),
#' only works if legend parameters are specified
#' @param leg.lab labels for the legend (default=NULL)
#' @param leg.col colors for the legend (default=NULL)
#' @param leg.title title for the legend (default=NULL)
#' @param mar plot margin (default=c(5,4,3,1))
#' @param png.width width of the PNG-file in inches (default=6)
#' @param png.height height of the PNG-file in inches (default=5)
#' @param png.out TRUE/FALSE if true, a png file is created as output, otherwise the plot
#' is created in the R graphic device
#'
#' @return PNG file with the barplot ('file.name.png') numeric vector with size of the PNG-file
#' in inches for rescaling in RTF
#' @export
#'
#' @examples
#' \dontrun{
#' set.seed(42)
#' x <- sample(1:15, size=500, replace=TRUE)
#' by <- sample(c('A','B'), size=500, replace=TRUE)
#' freq <- table(by, x)
#' col <- matrix(ncol=ncol(freq), nrow=nrow(freq))
#'
#' col1 <- colorRampPalette(c('tomato3','grey95'))
#' col[1,] <- col1(9)[1]
#' col[2,] <- col1(9)[7]
#'
#' col2 <- colorRampPalette(c('skyblue2','grey95'))
#' col[1,c(5,11)] <- col2(9)[1]
#' col[2,c(5,11)] <- col2(9)[7]
#'
#' cex.lab <- 0.7
#' cex.lab.ab <- 0.5
#' cex.x <- 0.9
#' labels <- NULL
#' labels.ab <- rep(paste0('X', 1:15), each=2)
#' labels.ab[which(1:length(labels.ab) %% 2 == 0)] <- ''
#' mar <- c(7,4,3,1)
#' xlab <- 'numbers'
#' ylab <- 'frequencies'
#' file.name <- 'test'
#' png.width <- 7
#' png.height <- 6
#' add.legend <- TRUE
#' leg.title <- 'color'
#' leg.col <- c(col1(9)[1],col1(9)[7],col2(9)[1],col2(9)[7])
#' leg.lab <- c('red','faded red','blue','faded blue')
#'
#' general.bar_plot_by(freq=freq, labels=NULL, labels.ab=labels.ab,
#' file.name='test1_barplot', col=col, cex.lab=0.7, cex.lab.ab=0.5, cex.x=0.8,
#' xlab='Number', ylab='Frequency', add.legend=TRUE,
#' leg.lab=leg.lab, leg.col=leg.col, leg.title='color',
#' mar=c(7,4,3,1), png.width=7, png.height=6)
#' general.bar_plot_by(freq=freq, labels=paste0('X', 1:15), labels.ab=NULL,
#' file.name='test2_barplot',
#' col=col, cex.lab=0.7, cex.lab.ab=0.5, cex.x=0.8,
#' xlab='Variable', ylab='Frequency', add.legend=TRUE,
#' leg.lab=leg.lab, leg.col=leg.col, leg.title='color',
#' mar=c(7,4,3,1), png.width=7, png.height=6)
#' general.bar_plot_by(freq=table(x), labels.ab=paste0('X', 1:15),
#' file.name='test3_barplot', col=col[2,5],
#' xlab='Number', ylab='Frequency')
#' general.bar_plot_by(freq=table(x), labels.ab=paste0('X', 1:15),
#' file.name='test4_barplot', col=col[2,5], mar=c(7,4,3,1),
#' xlab='Number', ylab='Frequency', add.legend=TRUE,
#' leg.title='color', leg.col=leg.col[4], leg.lab=leg.lab[4])
#' }
general.bar_plot_by <- function(freq, labels=NULL, labels.ab=NULL, file.name, col='tomato3', cex.lab=1, cex.lab.ab=1, cex.x=1,
xlab='', ylab='Frequency', add.legend=FALSE, leg.lab=NULL, leg.col=NULL, leg.title=NULL,
mar=c(5,4,3,1), png.width=6, png.height=5, png.out=TRUE){
#extract labels from 'freq' if 'labels' is not specified
if(is.null(labels)){
if(is.matrix(freq)){
labels <- colnames(freq)
}else{
labels <- names(freq)
}
}
#define length of y-axis
if(is.null(labels.ab)){
ymax <- max(freq)
}else{
ymax <- 1.05*max(freq)
}
#create bar plot
create.plot <- function(){
#middle points of the bars, which will be added to the plot
bar.mp <- barplot(freq, plot=FALSE, beside=TRUE)
##if 'freq' is a vector (no by variable specified), middle points are given as a matrix with one column,
##whereas they are given as a matrix with one row for each bylevel, when 'freq' is a matrix
if (ncol(bar.mp)==1){
bar.mp <- t(bar.mp)
}
bar.mp.mp <- colMeans(bar.mp)
#set plot margins
par(mar=mar)
#empty plot
plot(0,0, xlim=c(min(bar.mp)-0.5,max(bar.mp)+0.5), ylim=c(0,ymax),
type='n', xlab='', ylab=ylab, main='', axes=FALSE)
#color background of the plotting region
rect(par('usr')[1], par('usr')[3],par('usr')[2],par('usr')[4], col='grey95')
#add customized axes with 45 degree labels on the x-axis
box()
axis(1, at=bar.mp.mp, labels=rep('',length(bar.mp.mp)), tcl=-0.3)
axis(2)
#add labels to the x-axis (45-degree angle)
y.text <- grconvertY(-0.04, from = 'npc', to = 'user') #y-coordinate for the labels
text(bar.mp.mp, y.text, labels=labels, xpd=TRUE, srt=-45, cex=cex.lab, adj=c(0,0.5))
#add bars
barplot(freq, add=TRUE, axes=FALSE, axisnames=FALSE, border=NA, col=col, beside=TRUE)
#add labels above the bars
if(!is.null(labels.ab)){
text(bar.mp, freq+0.02*max(freq), labels=labels.ab, adj=c(0.5,0), cex=cex.lab.ab)
}
##add legend
if(add.legend==TRUE){
x.leg <- (par('usr')[1]+par('usr')[2])/2
y.leg <- grconvertY(0, from='ndc', to='user')
l <- legend(x.leg, y.leg, legend=leg.lab, fill=leg.col, border='white',
xjust=0.5, yjust=0, title=leg.title, xpd=TRUE, horiz=FALSE, ncol=2,
bty='n', cex=0.8)
}
#add x-label
if(add.legend==TRUE){
text((par('usr')[1]+par('usr')[2])/2, l$rect$top, paste(xlab,'\n\n',sep=''), xpd=TRUE, cex=cex.x)
}else{
mtext(xlab, cex=cex.x, side=1, line=par('mar')[1]-2)
}
}
#PNG or standard R graphic device
if (png.out==TRUE){
size <- c(png.height,png.width)
names(size) <- c('height','width')
#create png-file
png(gsub(' ','_',paste(file.name,'.png',sep='')), height=size[1], width=size[2], units='in', res=300)
create.plot()
dev.off()
#return PNG size (useful for rescaling)
return(size)
}else{
create.plot()
}
}
#' Function to create a simple barplot
#'
#' @author Sebastian Voss, Adam Skubala
#'
#' @param freq vector with the bar heights, e.g. a frequency table ('table()' output),
#' names will be used for bar labels if 'labels' is not specified
#' @param labels bar labels (default=NULL)
#' @param labels.ab additional labels, which are placed above the bars (default=NULL)
#' @param file.name name for the PNG output file (without '.png' ending)
#' @param col color for the bars (default='tomato3'). if more than one color is specified,
# each bar is filled with the repsective color
#' @param cex.lab size of the bar labels (default=1)
#' @param cex.lab.ab size of the bar labels above the bars (default=1)
#' @param cex.x size of the x-labels (default=1)
#' @param xlab label for the x-axis (default='')
#' @param ylab label for the y-axis (default='Frequency')
#' @param add.legend TRUE/FALSE should a legend be added underneath the plot
#' only works if legend parameters are specified (default=TRUE)
#' @param leg.lab labels for the legend (default=NULL)
#' @param leg.col colors for the legend (default=NULL)
#' @param leg.title title for the legend (default=NULL)
#' @param mar plot margin (default=c(5,4,3,1))
#' @param png.width width of the PNG-file in inches (default=6)
#' @param png.height height of the PNG-file in inches (default=5)
#'
#' @return PNG file with the barplot ('file.name.png') and a
#' numeric vector with size of the PNG-file in inches for rescaling in RTF
#' @export
#'
#' @examples
#' \dontrun{
#' set.seed(42)
#' x <- sample(1:30, size=500, replace=TRUE)
#' freq <- table(x)
#'
#' general.bar_plot_simple(freq, labels.ab=1:30, file.name="test",
#' col=rep(c('tomato3','tomato3','skyblue2'), 10),
#' cex.lab=.7, cex.lab.ab=.8, cex.x=.9,
#' ylab="Bar Height",
#' add.legend=TRUE, leg.title="Color",
#' leg.lab=c('red','blue'), leg.col=c('tomato3','skyblue2'),
#' mar=c(4,4,3,1))
#' }
general.bar_plot_simple <- function(freq, labels=NULL, labels.ab=NULL, file.name, col='tomato3', cex.lab=1, cex.lab.ab=1, cex.x=1,
xlab='', ylab='Frequency', add.legend=FALSE, leg.lab=NULL, leg.col=NULL, leg.title=NULL,
mar=c(5,4,3,1), png.width=6, png.height=5){
#extract labels from 'freq' if 'labels' is not specified
if(is.null(labels)){
labels <- names(freq)
}
#define length of y-axis
if(is.null(labels.ab)){
ymax <- max(freq)
}else{
ymax <- 1.05*max(freq)
}
#create bar plot
png(paste0(file.name,'.PNG'), width=png.width, height=png.height, units='in', res=300)
#middle points of the bars, which will be added to the plot
bar.mp <- barplot(freq, plot=FALSE)
#set plot margins
par(mar=mar)
#empty plot
plot(0,0, xlim=c(min(bar.mp)-0.5,max(bar.mp)+0.5), ylim=c(0,ymax),
type='n', xlab='', ylab=ylab, main='', axes=FALSE)
#color background of the plotting region
rect(par('usr')[1], par('usr')[3],par('usr')[2],par('usr')[4], col='grey95')
#add customized axes with 45 degree labels on the x-axis
box()
axis(1, at=bar.mp, labels=rep('',length(bar.mp)), tcl=-0.3)
axis(2)
#add labels to the x-axis (45-degree angle)
y.text <- grconvertY(-0.04, from = 'npc', to = 'user') #y-coordinate for the labels
text(bar.mp, y.text, labels=labels, xpd=TRUE, srt=-45, cex=cex.lab, adj=c(0,0.5))
#add bars
barplot(freq, add=TRUE, axes=FALSE, axisnames=FALSE, border=NA, col=col)
#add labels above the bars
if(!is.null(labels.ab)){
text(bar.mp, freq+0.02*max(freq), labels=labels.ab, adj=c(0.5,0), cex=cex.lab.ab)
}
##add legend
if(add.legend==TRUE){
x.leg <- (par('usr')[1]+par('usr')[2])/2
y.leg <- grconvertY(0, from='ndc', to='user')
l <- legend(x.leg, y.leg, legend=leg.lab, fill=leg.col, border='white',
xjust=0.5, yjust=0, title=leg.title, xpd=TRUE, horiz=FALSE,
bty='n', cex=0.8)
}
#add x-label
if(add.legend==TRUE){
text((par('usr')[1]+par('usr')[2])/2, l$rect$top, paste(xlab,'\n\n',sep=''), xpd=TRUE, cex=cex.x)
}else{
mtext(xlab, cex=cex.x, side=1, line=par('mar')[1]-2)
}
dev.off()
#return PNG size (useful for importing plot into an RTF document)
size <- c(png.height,png.width)
names(size) <- c('height','width')
return(size)
}
#' Function to create boxplots of a metric variable, grouped by two nominal variables
#'
#' @author Jens Hooge
#'
#' @import ggplot2
#'
#' @param data A data frame with group variables and a column with numeric values. (Required)
#' @param group_by1 First grouping variable for which the plot will be facetted by. (Required)
#' @param group_by2 Second grouping variable which defines the subgroup of group1 (Required)
#' @param col.jitter_by The variable name by which the jitter points will be colored by. (Default: NULL)
#' @param shape_jitter_by The variable name by which the jitter points will be shaped by. (Default: NULL)
#' @param title The title of the plot (Default: NULL)
#' @param xlab The x-axis label (Default: NULL)
#' @param ylab The y-axis label (Default: NULL)
#' @param legend Flag indicating whether the legend should be displayed (Default: FALSE)
#' @param rotate Flag indicating whether the x-axis labels should be rotated by 45° (Default: FALSE)
#'
#' @return A ggplot figure
#' @export
#'
#' @examples
#' \dontrun{
#' require(reshape2)
#' n <- 500
#' df <- data.frame(A=sample(c("a1", "a2", "a3"), n, replace=TRUE),
#' B=sample(c("b1", "b2"), n, replace=TRUE),
#' C=sample(c("c1", "c2", "c3"), n, replace=TRUE),
#' D=sample(c("d1", "d2", "d3"), n, replace=TRUE),
#' value=rnorm(n, 100, 1))
#'
#' general.box_plot_facetted(df, group_by1 = "A", group_by2 = "B",
#' col.jitter_by = "A", shape_jitter_by = "C",
#' rotate=TRUE)
#' }
general.box_plot_facetted <- function(data, group_by1, group_by2,
col.jitter_by=NULL, shape_jitter_by=NULL,
title=NULL, xlab=NULL, ylab=NULL, legend=FALSE,
rotate=FALSE) {
## Check Inputs
stopifnot(group_by1 %in% colnames(data),
group_by2 %in% colnames(data))
facet <- paste0("~", group_by2)
fig <- ggplot(df, aes_string(x=group_by1, y="value")) +
geom_boxplot(outlier.shape = NA) +
stat_boxplot(geom = "errorbar", width = 0.5, linetype="dashed") +
stat_summary(fun.y=mean, colour="black", geom="point",
shape=3, size=5, show.legend = FALSE) +
geom_jitter(aes_string(shape=shape_jitter_by,
col=col.jitter_by), alpha=.7) +
ggtitle(label = title) +
xlab(xlab) +
ylab(ylab) +
facet_grid(facet)
theme_bw()
if(!legend) {
fig <- fig + theme(legend.position = "none")
}
if(rotate) {
fig <- fig + theme(axis.text.x=element_text(angle=45, hjust=1))
}
return(fig)
}
#' Function to create a correlation plot.
#'
#' @author Sebastian Voss, Adam Skubala
#'
#' @import ellipse
#'
#' @TODO This function has to be refactored and the number of
#' input parameters have to be reduced.
#'
#' @description
#' The upper diagonale of the correlation matrix is visualized by colored ellipses,
#' with the color, shape and direction of the ellipsis showing the dependence of
#' the respective variable pair. the lower diagonale of the matrix-like plot
#' contains the corresponding correlation values.
#'
#' @param data data set in wide format (Required)
#' @param cor.type type of correlation coefficient, 'pearson', 'spearman' or 'kendall' (Default: 'spearman')
#' @param col vector of length two with the colors for negative and positive correlations (Default: c('tomato1','green3'))
#' @param plot.type plot type in the upper diagonale, 'ellipse' or 'points', i.e. scatterplots (Default='ellipse')
#' @param size.cor text size of the correlation numbers (Default: 1)
#' @param add.hist add histograms on the left hand side of the plot (Defaul=FALSE)
#' @param disp.sig.lev ellipses of correlations with a p-value above this threshold will not be displayed
#' and the corresponding correlation number will be displayed in grey (Default: 1)
#' @param disp.thresh ellipses of correlations with an absolute value below this threshold will not be displayed
# and the corresponding correlation number will be displayed in grey (Default: 0.1)
#' @param conf.level level for the confidence intervals (Does not work for 'kendall') (Default: Null)
#' @param n.boot number of bootstrap iterations, if confidence intervals for 'cor.type='spearman'' are calculated (Default: 1000)
#' @param pt.size size of the points, if plot.type='points' (Default: 0.4)
#' @param group.cut number of the last variable in the first group if variables can be split up into two groups,
# within group correlations will be faded out (Default: Null)
#' @param border color of the border around the ellipses (Default: NA)
#'
#' @param png.out output as PNG? (Default: FALSE)
#' @param png.width width of PNG in inches (Default: 5)
#' @param png.height height of PNG in inches (Default: 5)
#' @param file.name filename of the PNG without '.png'
#' @param ... Additional parameters for par()
#'
#' @return The correlation plot
#' @export
#'
#' @examples
#' \dontrun{
#' data(mtcars)
#' general.corplot(data=mtcars, cor.type='pearson', disp.sig.lev=0.05, group.cut=3)
#' general.corplot(data=mtcars, cor.type='pearson', disp.sig.lev=0.05, cex=0.95)
#' general.corplot(data=mtcars, cor.type='pearson', disp.sig.lev=0.05, group.cut=4,
#' conf.level=0.95, cex=0.95)
#' general.corplot(data=mtcars, cor.type='pearson', disp.sig.lev=0.05, group.cut=4,
#' plot.type='points', conf.level=0.95, cex=0.95)
#' general.corplot(data=mtcars, cor.type='pearson', disp.thresh=0.5, group.cut=4,
#' plot.type='points', conf.level=0.95, cex=0.95, add.hist=TRUE)
#' }
general.corplot <- function(data, cor.type=c('pearson','spearman','kendall'),
col=c('tomato1','green3'), plot.type=c('ellipse','points'),
size.cor=1, add.hist=FALSE, disp.sig.lev=1, disp.thresh=0.1,
conf.level=NULL, n.boot=1000, pt.size=0.4, group.cut=NULL, border=NA,
png.out=FALSE, png.width=5, png.height=5, file.name=NULL, ...) {
#check and recycle parameters
cor.type <- match.arg(cor.type)
plot.type <- match.arg(plot.type)
#calculate correlation matrix
corm <- cor(data, method=cor.type, use='pairwise.complete.obs')
#also in text format, rounded to 2 digits
corm.text <- format(round(corm,2), nsmall=2)
#create color gradient function for the specified colors
col.fun <- colorRamp(c(col[1],'white',col[2]), bias=1)
#plot function
create.plot <- function(){
#calculate margin size, based on par-settings and variable name lengths
par(mar=c(0.5,0.5,0.5,0.5), ...)
##string length
mar.var <- max(strwidth(colnames(data), units='inches'))
##transfrom from inches to lines
mar.var <- mar.var*(par('mar')/par('mai'))[1]
##set margins
par(mar=c(0.5,1+mar.var+0.5,1+mar.var+0.5,0.5), ...)
#empty plot
plot(0,0, xlim=c(0.5,ncol(corm)+ifelse(add.hist,1.5,0)+0.5), ylim=c(-(nrow(corm)+0.5),-0.5), type='n',
axes=FALSE, xlab='', ylab='')
#background rectangle and grid lines
rect(0.5, -(nrow(corm)+0.5), ncol(corm)+ifelse(add.hist,1.5,0)+0.5, -0.5, col='grey95', border=NA)
abline(v=seq(0.5,ncol(corm)+0.5,1), col='white')
abline(h=seq(-(nrow(corm)+0.5),-0.5,1), col='white')
abline(0,-1, col='white', lwd=6)
#add histograms
if (add.hist==TRUE){
#separate histogram column from the rest by white rectangle
rect(ncol(corm)+0.5, -(nrow(corm)+0.5), ncol(corm)+0.5+0.5, -0.5, col='white', border=NA)
for (i in 1:ncol(data)){
#calculate histogram
hist.i <- hist(data[,i], plot=FALSE)
x <- hist.i$breaks
y <- c(0,hist.i$counts)
#transform into panel coordinates
tr.i <- utils.trafo_pan(x=x, y=y, pan.row=i, pan.col=ncol(corm)+1.5)
#add histogram into panel
rect(xleft=tr.i$xt[-length(tr.i$xt)], ybottom=rep(tr.i$yt[1],length(tr.i$yt)-1),
xright=tr.i$xt[-1], ytop=tr.i$yt[-1], col='grey60', border='grey95')
}
}
#add row- and column names
text(x=rep(0, ncol(data)), y=-(1:ncol(data)), labels=colnames(data), adj=c(1,0.5),
xpd=TRUE, cex=1*par('cex'))
text(x=1:ncol(data), y=rep(0, ncol(data)), labels=colnames(data), adj=c(0,0.5),
srt=90, xpd=TRUE, cex=1*par('cex'))
#add ellipses and text for variables i and j
for (i in 1:(ncol(corm)-1)){
for (j in (i+1):nrow(corm)){
#calculate p-value
p.ij <- cor.test(data[,i], data[,j], method=cor.type, exact=ifelse(cor.type=='spearman',FALSE,TRUE))$p.value
#if correlation is significant and greater than the specified threshold, add ellipse in the upper triangle
if (p.ij<=disp.sig.lev & abs(corm[i,j])>=disp.thresh){
if(plot.type=='ellipse'){
ell <- ellipse(corm[i,j], t=0.4, npoints=1000)
ell[,1] <- ell[,1] + j
ell[,2] <- ell[,2] - i
polygon(ell, col=rgb(col.fun((corm[i,j]+1)/2), maxColorValue=255), border=border, lwd=0.5)
}else if(plot.type=='points'){
tr.ij <- utils.trafo_pan(x=data[,j], y=data[,i], pan.row=i, pan.col=j)
points(tr.ij$xt, tr.ij$yt, pch=20, cex=pt.size, col=rgb(col.fun((corm[i,j]+1)/2), maxColorValue=255))
}
}
#add correlation number (and confidence interval) in the lower triangle
if(!is.null(conf.level) & cor.type!='kendall'){
cor.height <- strheight(corm.text[i,j], cex=0.7*size.cor*par('cex'))
segments(i, -j-2.25*cor.height/2, i, -j+2.25*cor.height/2, col='grey66', lwd=0.75)
segments(i-0.1, -j-2.25*cor.height/2, i+0.1, -j-2.25*cor.height/2, col='grey66', lwd=0.75)
segments(i-0.1, -j+2.25*cor.height/2, i+0.1, -j+2.25*cor.height/2, col='grey66', lwd=0.75)
rect(i-0.45, -j-1.25*cor.height/2, i+0.45, -j+1.25*cor.height/2, col='grey95', border=NA)
if (cor.type=='spearman'){
set.seed(19121909)
cor.ci <- cor.ci(na.exclude(data[,c(i,j)]), n.iter=n.boot, p=1-conf.level, method=cor.type, plot=FALSE)
ci.l <- format(round(cor.ci$ci$low.e, 2), nsmall=2)
ci.u <- format(round(cor.ci$ci$up.e, 2), nsmall=2)
} else if (cor.type=='pearson'){
cor.ci <- cor.test(data[,i], data[,j], method=cor.type, exact=TRUE, conf.level=conf.level)$conf.int
ci.l <- format(round(cor.ci[1], 2), nsmall=2)
ci.u <- format(round(cor.ci[2], 2), nsmall=2)
}
text(i,-j-2*cor.height/2, ci.l, adj=c(0.5, 1.5), cex=0.4*size.cor*par('cex'), col='grey66')
text(i,-j+2*cor.height/2, ci.u, adj=c(0.5,-0.5), cex=0.4*size.cor*par('cex'), col='grey66')
} else if (!is.null(conf.level) & cor.type=='kendall'){
message('no CI for cor.type=\'kendall\'')
}
text(i,-j, corm.text[i,j], adj=c(0.5,0.5), cex=0.7*size.cor*par('cex'),
col=ifelse(p.ij<=disp.sig.lev & abs(corm[i,j])>=disp.thresh,'black','grey50'))
}
}
#if two groups are specified by a cutoff, the within group correlation matrix is faded out by a transparent rectangle
if (!is.null(group.cut)){
trans <- 0.75
rect(0.5, -(group.cut+0.5), group.cut+0.5, -0.5, col=adjustcolor('white', alpha.f=trans), border=NA)
rect(0.5+group.cut, -(nrow(corm)+0.5), ncol(corm)+0.5, -(0.5+group.cut), col=adjustcolor('white', alpha.f=trans), border=NA)
}
}
if (png.out==TRUE){
#output as PNG...
png(paste0(file.name,'.png'), height=png.height, width=png.width, units='in', res=600)
create.plot()
dev.off()
size <- c(png.height, png.width)
names(size) <- c('height','width')
invisible(size)
}else{
#...or in R device
create.plot()
}
}
#' Function to create histograms to check for the necessity of a log-transformation of metric data.
#'
#' @author Sebastian Voss, Adam Skubala
#'
#' @description
#' The function creates two histograms, one of the original data
#' and one of the log-tramsformed data. If desired, a second time point can be added,
#' so that a plot with four histograms is created.
#'
#' @param x Numeric vector (Required)
#' @param file.name begining of the name for the PNG output files (without '.png' ending) (Required)
#' @param by Character or factor vector of the same length than 'x' to
#' the time, if two time points should be analyzed. should
#' contain only two levels (Default: Null)
#' @param nbars number of bars for the histograms (Default: 15)
#' @param title Title of the plot (Default: '')
#' @param xlab Label for the x-axis (Default: '')
#' @param col.orig Color of the histogram bars of the original values (Default: 'lightblue')
#' @param col.log Color of the histogram bars of the log-transformed values (Default: 'lightyellow')
#'
#' @return A .png file with two or four histograms and a numeric vector
#' with the size of the .png
#' @export
#'
#' @examples
#' \dontrun{
#' x <- exp(rnorm(1000))
#' by <- factor(rep(c('baseline','day 30'), each=500),
#' level=c('baseline','day 30'))
#' general.hist_log(x=x, by=by, file.name='test_hist',
#' title='Test Historgram', xlab='Measurement')
#' }
general.hist_log <- function(x, by=NULL, file.name, nbars=15, title='', xlab='',
col.orig='lightblue', col.log='lightyellow'){
#define some graphical parameters for the case of just one time point
height.png <- 8
png.row <- 1
oma3 <- 5
mar1 <- 10
xlab.log <- 'log. values'
if (xlab==''){
xlab <- 'orig. values'
}
#rename data vector
x1 <- x
if (!is.null(by)){
#change graphical parameters if two time points have to be analyzed
height.png <- 14
png.row <- 2
oma3 <- 7
mar1 <- 12
#convert 'by' into factor
by <- factor(by)
#split data vector
x1 <- x[by==levels(by)[1]]
x2 <- x[by==levels(by)[2]]
}
#create PNG
png(paste0(file.name,'.png'), width=14, height=height.png, units='in', res=300)
#set graphical parameters
par(mfrow=c(png.row,2), oma=c(1,0,oma3,0), mar=c(mar1,7,3,2), cex.lab=2, cex.axis=2,
cex.main=2, mgp=c(4.5,1.5,0))
#create two histograms and add appropriate normal density
hist.data <- hist(x1, freq=TRUE, col=col.orig, xlab=xlab, ylab='frequency', main='',
breaks=utils.exBar(x1,nbars))
#normal density is scaled to fit the frequency distribution
curve(sum(hist.data$count)*diff(hist.data$breaks)[1]*dnorm(x, mean(x1), sd(x1)),
from=min(x1), to=max(x1), add=TRUE, lty=2, lwd=2)
hist.data <- hist(log(x1), freq=TRUE, col=col.log, xlab=xlab.log, ylab='frequency', main='',
breaks=utils.exBar(log(x1),nbars))
curve(sum(hist.data$count)*diff(hist.data$breaks)[1]*dnorm(x, mean(log(x1)), sd(log(x1))),
from=log(min(x1)), to=log(max(x1)), add=TRUE, lty=2, lwd=2)
#if a second time point is specified by 'by', add two more histograms + time labels
if (!is.null(by)){
hist.data <- hist(x2, freq=TRUE, col=col.orig, xlab=xlab, ylab='frequency', main='',
breaks=utils.exBar(x2,nbars))
curve(sum(hist.data$count)*diff(hist.data$breaks)[1]*dnorm(x, mean(x2), sd(x2)),
from=min(x2), to=max(x2), add=TRUE, lty=2, lwd=2)
hist.data <- hist(log(x2), freq=TRUE, col=col.log, xlab=xlab.log, ylab='frequency', main='',
breaks=utils.exBar(log(x2),nbars))
curve(sum(hist.data$count)*diff(hist.data$breaks)[1]*dnorm(x, mean(log(x2)), sd(log(x2))),
from=log(min(x2)), to=log(max(x2)), add=TRUE, lty=2, lwd=2)
mtext(levels(by)[1], side=3, line=1, outer=TRUE, cex=2)
mtext(levels(by)[2], side=3, line=-37, outer=TRUE, cex=2)
}
#add title and subtitle
mtext(title, side=3, line=oma3-3, outer=TRUE, font=2, cex=2.25)
mtext('----- rescaled density of normal distribution with appropriate mean and sd', side=1,
outer=TRUE, line=-2, cex=1.5)
dev.off()
#export size information to allow for convenient rescaling
size <- c(height.png,14)
names(size) <- c('height','width')
return(size)
}
#' Function to create scatterplots, color-coded by subgroups
#'
#' @author Sebastian Voss, Adam Skubala
#'
#' @param x numeric vector (Required)
#' @param y second numeric vector of the same length than 'x' (Required)
#' @param by character or factor vector of the same length than 'x' to specify
#' by-groups used for color-coding (Default: Null)
#' @param col.by colors for the 'by'-groups(Default: 1:length(unique(by)))
#' @param name.by character, name of the 'by'-variable, used as legend-title
#' @param mar margins of the plot as in 'par()' (Default: c(6,4,1,1))
#' @param xlab character, labels for the x-axis (Default: '')
#' @param ylab character, labels for the y-axis (Default: '')
#' @param xlim Range of the x-axis (Default: Null)
#' @param ylim Range of the y-axis (Default: Null)
#' @param mark TODO: This parameter was not documented in original code (Default: Null)
#' @param xlog TRUE/FALSE is x log-transformed? if yes, axis annotation will account for that (Default: FALSE)
#' @param ylog TRUE/FALSE is y log-transformed? if yes, axis annotation will account for that (Default: FALSE)
#' @param pch symbol for the jitter points (default=1, i.e. the same symbol for all points),
#' if at least as many symbols as the number of levels of the 'by.col' variable are given,
#' symbol coding is used in addition to color coding. (Default: 1)
#' @param cex.pch size of the points (Default: 0.75)
#' @param add.reg TRUE/FALSE if true, adds a regression line (Default: FALSE)
#' @param add.mean TODO: This parameter was not documented in original code (Default: FALSE)
#' @param add.leg TRUE/FALSE if true and 'by' is specified, a legend is added to the plot (Default: TRUE)
#' @param cex.mean TODO: This parameter was not documented in original code (Default: 1)
#' @param file.name name for the PNG output file (without '.png' ending) (Default: NULL)
#' @param png.out TRUE/FALSE if true, a png file is created as output, otherwise
#' the plot is created in the R graphic device (Default: TRUE)
#' @param png.width width of PNG file in inches (Default: 6)
#' @param png.height height of PNG file in inches (Default: 5)
#' @param ... Additional parameters for par()
#'
#' @return PNG files with the scatterplot ('file.name.png')
#' numeric vector with size of the PNG-file in inches for rescaling in RTF
#'
#' @export
#'
#' @examples
#' \dontrun{
#' set.seed(42)
#' x <- rnorm(500)
#' y <- x^2+rnorm(500)
#' by <- factor(c(rep('male', 250),
#' rep('female',250)),
#' levels=c('male','female'))
#'
#' general.scatter_by(x, y, by=by, col.by=c('skyblue','tomato'), name.by="Sex",
#' xlab="Sex", ylab="Biomarker", file.name="test", add.reg=TRUE,
#' mar=c(7,4,4,2), xlim=range(x), ylim=range(y),
#' pch <- c(1, 4), xlog=TRUE, add.mean=TRUE)
#' }
general.scatter_by <- function(x, y, by=NULL, col.by=1:length(unique(by)), name.by='', mar=c(6,4,2,1),
xlab='', ylab='', xlim=NULL, ylim=NULL, mark=NULL,
xlog=FALSE, ylog=FALSE, pch=1, cex.pch=0.75, add.reg=FALSE, add.mean=FALSE, add.leg=TRUE,
cex.mean=1, file.name=NULL, png.out=TRUE, png.width=6, png.height=5, ...){
#delete all observations with at least one NA
data <- cbind.data.frame(x, y)
if (!is.null(by)){
data <- cbind.data.frame(data, factor(by))
}
data <- na.exclude(data)
x <- data[,1]
y <- data[,2]
if (!is.null(by)){
by <- data[,3]
}
#create scatterplot
##plot function
create.plot <- function(){
#if a by-variable is specified, a legend is added to the plot
#-> bottom margin has to be bigger
if (!is.null(by) & add.leg==TRUE){
mar[1] <- mar[1]+0.5+0.25*length(levels(by))
}
par(mar=mar, ...)
#empty plot
plot(0,0, type='n', xlim=xlim, ylim=ylim, main='', ylab='', xlab='', axes=FALSE)
#color background of the plotting region
rect(par('usr')[1], par('usr')[3],par('usr')[2],par('usr')[4], col='grey95')
#add axis and box
if (xlog==FALSE){
axis(1)
}else{
axis(1, at=axTicks(1), labels=format(round(exp(axTicks(1)),2),nsmall=2))
}
if (ylog==FALSE){
axis(2)
}else{
axis(2, at=axTicks(2), labels=format(round(exp(axTicks(2)),2),nsmall=2))
}
box()
#color for points
if (!is.null(by)){
col.points <- col.by[as.numeric(by)]
}else{
col.points <- col.by[1]
}
#if at least as many symbols are given in 'pch' than the number of levels of the color variable,
#use symbol coding in addition to color coding
if (!is.null(by) & length(pch)>=length(levels(by))){
pch.by <- pch[as.numeric(by)]
#else use just the first symbol in 'pch'
}else{
pch.by <- pch[1] #for plot
pch <- rep(pch[1], length(levels(by))) #for legend
}
#add vertical and horizontal line if mean for overall mean
if (add.mean==TRUE & is.null(mark)){
abline(v=mean(x), lty=2, col='grey66')
abline(h=mean(y), lty=2, col='grey66')
}else if(!is.null(mark)){
abline(v=ifelse(xlog==TRUE, log(mark[1]), mark[1]), lty=2, col='grey66')
abline(h=ifelse(ylog==TRUE, log(mark[2]), mark[2]), lty=2, col='grey66')
}
#add points
points(x,y, col=col.points, cex=cex.pch, pch=pch.by)
#add means (crosses)
if (add.mean==TRUE){
#points(mean(x), mean(y), col='grey66', cex=6.5*cex.mean, pch=3, lwd=3)
if(!is.null(by)){
mby.x <- as.vector(by(x, INDICES=by, FUN=mean))
mby.y <- as.vector(by(y, INDICES=by, FUN=mean))
points(mby.x, mby.y, col=col.by, cex=5*cex.mean, pch=3, lwd=2.5)
}
}
#add regression line
if (add.reg==TRUE){
fit <- lm(y~x)
x.str <- ifelse(xlog==TRUE, 'log(x)','x')
y.str <- ifelse(ylog==TRUE, 'log(y)','y')
reg.label1 <- paste0('Regression line: ',y.str,' = ',format(round(summary(fit)$coef[1,1],2), nsmall=2))
reg.label2 <- ifelse(summary(fit)$coef[2,1]>=0,paste0(' + ',format(round(summary(fit)$coef[2,1],2), nsmall=2),' ',x.str),
paste0(' - ',format(round(-summary(fit)$coef[2,1],2), nsmall=2),' ',x.str))
reg.label3 <- paste0('\n(Pearson cor. = ',format(round(cor(x,y,method='p'),2),nsmall=2),', Spearman cor. = ',format(round(cor(x,y, method='s'),2),nsmall=2),', n = ',nrow(data),')')
reg.label <- paste0(reg.label1, reg.label2)
mtext(reg.label, side=3, line=par('mar')[3]-1.5, cex=0.8*par('cex'))
mtext(reg.label3, side=3, line=par('mar')[3]-2.5, cex=0.8*par('cex'))
abline(fit, col='grey40', lwd=2.5, lty=3)
}
#add legend, if 'by' is specified
if (!is.null(by) & add.leg==TRUE){
x.leg <- (par('usr')[1]+par('usr')[2])/2
y.leg <- grconvertY(0, from='ndc', to='user')
l <- legend(x.leg, y.leg, legend=levels(as.factor(by)), pch=pch[1:length(by)],
col=col.by, xjust=0.5, yjust=0,
title=name.by, xpd=TRUE, horiz=FALSE, bty='n', cex=0.8*par('cex'), ncol=2)
#add x-label
text((par('usr')[1]+par('usr')[2])/2, l$rect$top, paste(xlab,'\n\n',sep=''), xpd=TRUE, cex=1*par('cex'))
}else{
#add x-label
mtext(xlab, cex=1*par('cex'), side=1, line=par('mar')[1]-2)
}
#add y-label
mtext(ylab, cex=1*par('cex'), side=2, line=par('mar')[2]-1)
}
#PNG or standard R graphic device
if (png.out==TRUE){
png(paste0(file.name,'.png'), width=png.width, height=png.height, units='in', res=300)
create.plot()
dev.off()
#return PNG size (useful for importing plot into an RTF document)
size <- c(png.height, png.width)
names(size) <- c('height','width')
return(size)
}else{
create.plot()
}
}
#' Custom geom for general.lineplot
#'
#' @keywords internal
#'
#' @author Jens Hooge
#'
#' @seealso ggplot2::layer
#'
#' @param mapping Set of aesthetic mappings created by aes or aes_.
#' If specified and inherit.aes = TRUE (the default),
#' it is combined with the default mapping at the top
#' level of the plot. You must supply mapping if there
#' is no plot mapping.
#' @param data The data to be displayed in this layer. There are three options:
#' If NULL, the default, the data is inherited from the plot data as
#' specified in the call to ggplot.
#' A data.frame, or other object, will override the plot data.
#' All objects will be fortified to produce a data frame. See fortify
#' for which variables will be created.
#' A function will be called with a single argument, the plot data.
#' The return value must be a data.frame., and will be used as the layer data.
#' @param var.type The way the variability region is displayed (e.g. 'errorbars' or 'ribbon')
#' @param stat The statistical transformation to use on the data for this layer, as a string.
#' @param position Position adjustment, either as a string, or the result of a call to a position
#' adjustment function.
#' @param na.rm If FALSE, the default, missing values are removed with a warning.
#' If TRUE, missing values are silently removed.
#' @param show.legend logical. Should this layer be included in the legends?
#' NA, the default, includes if any aesthetics are mapped.
#' FALSE never includes, and TRUE always includes.
#' @param inherit.aes If FALSE, overrides the default aesthetics, rather than combining with them.
#' This is most useful for helper functions that define both data and aesthetics
#' and shouldn't inherit behaviour from the default plot specification, e.g. borders.
#' @param ... other arguments passed on to layer. These are often aesthetics, used to set an
#' aesthetic to a fixed value, like color = "red" or size = 3. They may also be parameters to the
#' paired geom/stat.
#'
#' @return ggplot2 layer
geom_variability <- function(mapping = NULL, data = NULL, var.type=c("errorbar", "ribbon"), stat = "identity", position = "identity",
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...) {
geom <- geom_errorbar(mapping, data = data,
stat = stat, position = position,
na.rm = na.rm, show.legend = show.legend, inherit.aes = inherit.aes,
width=0.2, ...)
if (var.type=="ribbon") {
geom <- geom_ribbon(mapping, data = data,
stat = stat, position = position,
na.rm = na.rm, show.legend = show.legend, inherit.aes = inherit.aes,
alpha=0.3, ...)
}
return(geom)
}
#' Function generate (non-)facetted lineplot
#'
#' @author Jens Hooge
#'
#' @param data A data frame in long format (Required)
#' @param by Variable the plot should be facetted by (Default: NULL)
#' @param facet.rows Number of rows in which the facets should be displayed. (Default: 1)
#' @param upper.err Upper bound width on errorbars or ribbon variability region (Default: 1)
#' @param lower.err Lower bound width on errorbars or ribbon variability region (Default: 1)
#' @param var.type Type of how to display variability ("errorbar", "ribbon") (Default: "errorbar")
#' @param title Title of the plot (Default: NULL)
#' @param xlab x-axis label (Default: NULL)
#' @param ylab y-axis label (Default: NULL)
#' @param xlim Upper and lower limit (e.g. c(0, 10)) of x-axis (Default: NULL)
#' @param ylim Upper and lower limit (e.g. c(0, 10)) of y-axis (Default: NULL)
#' @param legend.position Position of the legend ("none", "left", "right", "top", "bottom") (Default: "left")
#' @param size Point Size (Default: 3)
#' @param x.log10trans Logical flag for log10 x-axis trasformation
#' @param y.log10trans Logical flag for log10 y-axis trasformation
#'
#' @return a ggplot2 plot
#' @export
#'
#' @examples
#' \dontrun{
#' library(reshape2)
#' library(ggplot2)
#' n=7
#' df <- data.frame(Dose1=exp((1/2)*1:n),
#' Dose2=exp((1/3)*1:n),
#' Dose3=exp((1/4)*1:n),
#' Dose4=exp((1/5)*1:n),
#' Dose5=exp((1/6)*1:n),
#' Dose6=exp((1/7)*1:n),
#' Visit=1:n)
#' df <- melt(df, id=c("Visit"))
#' colnames(df) <- c("Visit", "Dose", "Measure")
#'
#' ## Lineplot with Errorbars
#' general.lineplot(df, upper.err=1, lower.err=1, var.type="errorbar")
#'
#' ## Lineplot with Errorbars and log transformed y axis
#' general.lineplot(df, upper.err=1, lower.err=1, var.type="errorbar", y.log10trans = T)
#'
#' ## Facetted Lineplot
#' general.lineplot(df, by="Dose", facet.row=3, upper.err=1, lower.err=1,
#' var.type="errorbar", y.log10trans = T)
#'
#' ## Lineplot with Error Ribbon
#' general.lineplot(df, by="Dose", upper.err=1, lower.err=1, var.type="ribbon")
#'
#' ## Lineplot with Error Ribbon and log transformed y-axis
#' general.lineplot(df, by="Dose", upper.err=1, lower.err=1, var.type="ribbon", y.log10trans = T)
#' }
general.lineplot <- function(data, by=NULL, facet.rows=1,
upper.err, lower.err,
var.type=c("errorbar", "ribbon"),
title=NULL, xlab=NULL, ylab=NULL,
xlim=NULL, ylim=NULL,
legend.position="right", size=3,
x.log10trans=FALSE, y.log10trans=FALSE) {
# Define the top and bottom of the errorbars
fig <- ggplot(data, aes(data$Visit, data$Measure)) +
geom_point(aes(col=data$Dose), size=size) +
geom_line(aes(col=data$Dose)) +
geom_variability(aes(ymax = data$Measure+upper.err, ymin=data$Measure-lower.err,
col=data$Dose, fill=data$Dose), var.type = var.type) +
ggtitle(label = title) +
xlab(xlab) +
ylab(ylab) +
theme_bw() +
theme(legend.position=legend.position)
if(!is.null(by)) {
stopifnot(by %in% colnames(data))
facet <- as.formula(paste0("~", by))
fig <- fig + facet_wrap(facet, scales = "free", nrow=facet.rows)
}
if(!is.null(xlim)) {
fig <- fig + xlim(xlim)
}
if(!is.null(ylim)) {
fig <- fig + ylim(ylim)
}
if(x.log10trans) {
fig <- fig + scale_x_log10()
}
if(y.log10trans) {
fig <- fig + scale_y_log10()
}
return(fig)
}
#' Function to generate a facetted scatter plot
#'
#' @author Jens Hooge
#'
#' @keywords internal
#'
#' @param data A dataframe with at least three columns (x,y and a facetting variable) (Required)
#' @param by Grouping variable (Required)
#' @param fun Function for statistical aggregation function (Default: "mean")
#' @param smooth.fun Smoothing method (function) to use, eg. "lm", "glm", "gam", "loess", "rlm". (Default: "auto")
#' For method = "auto" the smoothing method is chosen based on the size of the largest group
#' (across all panels). loess is used for less than 1,000 observations; otherwise gam is used with
#' formula = y ~ s(x, bs = "cs"). Somewhat anecdotally, loess gives a better appearance,
#' but is O(n^2) in memory, so does not work for larger datasets.
#' @param smooth.formula Formula to use in smoothing function, eg. y ~ x, y ~ poly(x, 2), y ~ log(x) (Default: y ~ x)
#' @param density Display density contour (Default: FALSE)
#' @param title Plot Title (Default: NULL)
#' @param xlab x-axis label (Default: NULL)
#' @param ylab y-axis label (Default: NULL)
#' @param legend.pos (Default: "none")
#'
#' @return ggplot2 figure
general.scatter.facetted <- function(data, by, fun="mean", smooth.fun="auto", smooth.formula=y ~ x,
density=FALSE,
title=NULL, xlab=NULL, ylab=NULL,
legend.pos="none") {
xagg <- aggregate(cbind(data$x, data$y), by=list(data[, by]), fun)
colnames(xagg) <- c(by, paste0("x.", fun), paste0("y.", fun))
data <- left_join(data, xagg, by)
facet <- paste0("~", by)
fig <- ggplot(data, aes_string(x=data$x, y=data$y, color=by)) +
geom_point() +
geom_point(aes_string(x=paste0("x.", fun), y=paste0("y.", fun)),
shape=3, size=18, show.legend = FALSE) +
geom_smooth(method=smooth.fun, formula = smooth.formula) +
facet_grid(facet) +
ggtitle(label = title) +
xlab(xlab) +
ylab(ylab) +
theme_bw() +
theme(legend.position = legend.pos)
if(density) {
fig <- fig + stat_density2d(aes(fill=..level..,alpha=..level..),
geom='polygon',colour='black') +
scale_fill_continuous(low="green", high="red") +
guides(alpha="none")
}
return(fig)
}
#' Function to generate a simple scatter plot
#'
#' @author Jens Hooge
#'
#' @keywords internal
#'
#' @param data A dataframe with at least three columns (x,y and a facetting variable) (Required)
#' @param by Grouping variable (Default: NULL)
#' @param fun Function for statistical aggregation function (Default: "mean")
#' @param smooth.fun Smoothing method (function) to use, eg. "lm", "glm", "gam", "loess", "rlm". (Default: "auto")
#' For method = "auto" the smoothing method is chosen based on the size of the largest group
#' (across all panels). loess is used for less than 1,000 observations; otherwise gam is used with
#' formula = y ~ s(x, bs = "cs"). Somewhat anecdotally, loess gives a better appearance,
#' but is O(n^2) in memory, so does not work for larger datasets.
#' @param smooth.formula Formula to use in smoothing function, eg. y ~ x, y ~ poly(x, 2), y ~ log(x) (Default: y ~ x)
#' @param density Display density contour (Default: FALSE)
#' @param title Plot Title (Default: NULL)
#' @param xlab x-axis label (Default: NULL)
#' @param ylab y-axis label (Default: NULL)
#' @param legend.pos (Default: "none")
#'
#' @return ggplot2 figure
general.scatter.simple <- function(data, by=NULL, fun="mean", smooth.fun="auto", smooth.formula=y ~ x,
density=FALSE,
title=NULL, xlab=NULL, ylab=NULL,
legend.pos="none") {
xagg <- apply(data[, c("x", "y")], 2, fun, na.rm=T)
xagg <- data.frame(rep(xagg[1], nrow(data)),
rep(xagg[2], nrow(data)))
colnames(xagg) <- c(paste0("x.", fun), paste0("y.", fun))
data <- cbind(data, xagg)
fig <- ggplot(data, aes_string(x=data$x, y=data$y)) +
geom_point() +
geom_point(aes_string(x=paste0("x.", fun), y=paste0("y.", fun)),
shape=3, size=18, show.legend = FALSE) +
geom_smooth(method=smooth.fun, formula = smooth.formula) +
ggtitle(label = title) +
xlab(xlab) +
ylab(ylab) +
theme_bw() +
theme(legend.position = legend.pos)
if(density) {
fig <- fig + stat_density2d(aes(fill=..level..,alpha=..level..),
geom='polygon',colour='black') +
scale_fill_continuous(low="green", high="red") +
guides(alpha="none")
}
return(fig)
}
#' Function factory to generate a scatter plots
#'
#' @author Jens Hooge
#'
#' @description
#' Depending on the grouping variable 'by', this function factory generates
#' either an ungrouped (by=NULL) or a grouped scatterplot (by=<grouping_variable>).
#' This function provides the option for different regression fits, based on an inbuilt
#' smooth function or a custom model, defined by smooth.formula.
#'
#' @param data A dataframe with at least three columns (x,y and a facetting variable) (Required)
#' @param by Grouping variable (Required)
#' @param fun Function for statistical aggregation function (Default: "mean")
#' @param smooth.fun Smoothing method (function) to use, eg. "lm", "glm", "gam", "loess", "rlm". (Default: "auto")
#' For method = "auto" the smoothing method is chosen based on the size of the largest group
#' (across all panels). loess is used for less than 1,000 observations; otherwise gam is used with
#' formula = y ~ s(x, bs = "cs"). Somewhat anecdotally, loess gives a better appearance,
#' but is O(n^2) in memory, so does not work for larger datasets.
#' @param smooth.formula Formula to use in smoothing function, eg. y ~ x, y ~ poly(x, 2), y ~ log(x) (Default: y ~ x)
#' @param density Display density contour (Default: FALSE)
#' @param title Plot Title (Default: NULL)
#' @param xlab x-axis label (Default: NULL)
#' @param ylab y-axis label (Default: NULL)
#' @param legend.pos (Default: "none")
#'
#' @return ggplot2 figure
#' @export
#'
#' @examples
#' \dontrun{
#' x <- rnorm(500)
#' y <- rnorm(500)+x^2
#' df <- data.frame(x=x, y=y,
#' A=sample(c("a1", "a2"), 500, replace = TRUE),
#' B=sample(c("b1", "b2"), 500, replace = TRUE))
#'
#' ## A simple ungrouped scatter plot with a loess fit
#' general.scatter(df)
#'
#' ## A grouped scatter plot with a loess fit
#' general.scatter(df, by="A")
#'
#' ## An ungrouped scatter plot and a marker indicating the data variance
#' general.scatter(df, fun="var")
#'
#' ## Scatterplots with different regression fits
#' # Linear Fit
#' general.scatter(df, by="A", smooth.fun="lm")
#'
#' ## Robust Linear Fit
#' library(MASS)
#' general.scatter(df, by="A", smooth.fun="rlm")
#'
#' ## User defined polynomial fit
#' # Degree = 1 resulting in a linear fit
#' general.scatter(df, smooth.fun="glm", smooth.formula=y ~ poly(x, 1))
#' # Degree = 2 resulting in a quadratic fit
#' general.scatter(df, smooth.fun="glm", smooth.formula=y ~ poly(x, 2))
#'
#' ## Equivalently for grouped scatterplots
#' # Degree = 1 resulting in a linear fit
#' general.scatter(df, by="A", smooth.fun="glm", smooth.formula=y ~ poly(x, 1))
#' # Degree = 2 resulting in a quadratic fit
#' general.scatter(df, by="B", smooth.fun="glm", smooth.formula=y ~ poly(x, 2))
#'
#' ## Including a density contour
#' general.scatter(df, by="B", smooth.fun="glm", smooth.formula=y ~ poly(x, 2),
#' density=T, legend.pos="")
#' }
general.scatter <- function(data, by=NULL, fun="mean", smooth.fun="auto", smooth.formula=y ~ x,
density=FALSE,
title=NULL, xlab=NULL, ylab=NULL,
legend.pos="none") {
args <- list(data=data, by=by, fun=fun,
density=density,
smooth.fun=smooth.fun, smooth.formula=smooth.formula,
title=title, xlab=xlab, ylab=ylab,
legend.pos=legend.pos)
fig <- NULL
if(is.null(by)) {
fig <- do.call(general.scatter.simple, args)
} else {
fig <- do.call(general.scatter.facetted, args)
}
return(fig)
}
#' Function to plot the skewness of of the input data
#'
#' @import e1071
#'
#' @param data Input data in form of a numeric vector or a data.frame
#'
#' @return A ggplot figure
#' @export
#'
#' @examples
#' \dontrun{
#' ## Bar plot for data with small number of variables (<=50)
#' set.seed(42)
#' n <- 100 ## number of samples
#' m <- 10 ## number of variables
#' alpha <- runif(m, 0, 100)
#' beta <- runif(m, 0, 100)
#' data <- as.data.frame(mapply(rbeta, n1, alpha, beta))
#' general.skewness(data)
#'
#' ## Density plot for data with large naumber of variables (>50)
#' set.seed(42)
#' n <- 100 ## number of samples
#' m <- 50 ## number of variables
#' alpha <- runif(m, .1, 100)
#' beta <- runif(m, .1, 100)
#' data <- as.data.frame(mapply(rbeta, n, alpha, beta))
#' general.skewness(data)
#' }
general.skewness <- function(data) {
X <- as.data.frame(data) ## in case it is just a numeric vector
skewValues <- as.data.frame(sapply(X, skewness, na.rm=TRUE, type=1))
skewValues$variable <- rownames(skewValues)
rownames(skewValues) <- NULL
colnames(skewValues) <- c("skewnessValue", "variable")
skewValues$variable <- as.factor(skewValues$variable)
if (ncol(X) < 50){
fig <- ggplot(skewValues, aes(x=variable, y=skewnessValue)) +
geom_bar(stat="identity", position="dodge") +
geom_hline(yintercept = 2, linetype = "dashed", color="red") +
geom_hline(yintercept = -2, linetype = "dashed", color="red") +
ylab(NULL) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5))
} else {
fig <- ggplot(skewValues, aes(x=skewnessValue)) +
geom_density() +
geom_point(aes(x=skewnessValue, y = 0.0005),
alpha = 0.25, size=4, colour="darkgray") +
geom_vline(xintercept = 2, linetype = "dashed", color="red") +
geom_vline(xintercept = -2, linetype = "dashed", color="red") +
annotate("text", x = Inf, y = Inf, label = sprintf("n=%i", ncol(X)),
vjust=1.8, hjust=1.2) +
xlab(NULL) +
ylab(NULL) +
theme_bw()
}
return(fig)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.