#' Calculate average gene expression between replicates
#'
#' @param counts Data.frame of counts (rows=genes, columns=samples)
#' @param cond character vector of length ncol(counts) assigning each column to a condition
#' @param method character of either "mean", "median", or "geoMean" of how to calculate average expression. Default is "mean"
#' @return Data frame with rows=genes and columns=condition with indicated average of gene expression beween replicates
#' @export
avgExp<-function(counts, cond, method=c("mean","median","geoMean")){
levs<-unique(cond)
x<-c()
if(method[1] == "mean"){
for(i in 1:length(levs)){
x<-cbind(x, rowMeans(counts[,which(cond %in% levs[i])]))
}
}
if(method[1] == "median"){
for(i in 1:length(levs)){
x<-cbind(x, Biobase::rowMedian(counts[,which(cond %in% levs[i])]))
}
}
if(method[1] == "geoMean"){
for(i in 1:length(levs)){
x<-cbind(x, BinfTools:::rowGeoMean(counts[,which(cond %in% levs[i])]))
}
}
rownames(x)<-rownames(counts)
colnames(x)<-levs
return(as.data.frame(x))
}
#' Sample correlation heatmap
#'
#' @param counts data.frame object with rows=genes, and columns=samples
#' @param method character indicating correlation method, default is "spearman"
#' @param title character indicating plot title
#' @param hmcol Color ramp palette to use for heatmap colours. Defaults to 'RdYlBu' colours
#' @param showNum Boolean indicating if correlation coefficients should be displayed on heatmap. Default=TRUE.
#' @param showTree Boolean indicating if hierarchical clustering trees should be shown. Default=FALSE
#' @param annodf Data frame with rownames=sample names and columns of annotation data to be plotted. Leave NULL if none.
#' @param annoCols Colour palette for annotations
#' @return Heatmap with clustered rows/columns of correlation between samples
#' @export
corHeat<-function(counts, method="spearman", title="Spearman Correlation", hmcol=NULL, showNum=TRUE, showTree=FALSE, annodf=NULL, annoCols="Dark2"){
M<-cor(counts, method=method)
if (is.null(hmcol)) {
hmcol <- colorRampPalette(rev(colPal("RdYlBu")))(100)
}
treeheight=50
if(isFALSE(showTree)){
treeheight=0
}
if(is.null(annodf)){
annoCols=NULL
} else {
cols<-colPal(annoCols)
annoCols<-list()
for(i in 1:ncol(annodf)){
annodf[,i]<-factor(annodf[,i])
annoCols[[i]]<-cols[1:length(levels(annodf[,i]))]
names(annoCols[[i]])<-levels(annodf[,i])
names(annoCols)[i]<-colnames(annodf)[i]
}
}
lim<-NULL
if(min(M)<0){
lim<-seq(from=-1, to=1, length.out=100)
}
pheatmap::pheatmap(M, color=hmcol, main=title, display_numbers=showNum, treeheight_col = treeheight, treeheight_row = treeheight,
annotation_row=annodf, annotation_col=annodf, annotation_colors = annoCols, breaks=lim)
}
## colInds is indices of columns in data.frame
.custPairs <- function(colInds, data=iris, title="test", m="spearman") {
n <- length(colInds)
cols <- expand.grid(names(data)[colInds], names(data)[colInds])
cInds <- unlist(mapply(function(a, b, c) a*n+b:c, 0:max(0,n-2), 2:n, rep(n, n-1)))
cols <- cols[cInds,] # indices will be in column major order
#print(cols)
## These parameters are applied to each plot we create
#pars <- list(geom_point(alpha=0.8, color="black"),
# geom_smooth(method=m, color="red", lwd=1))
## Create the plots (dont need the lower plots in the ggpairs call)
#plots <- apply(cols, 1, function(cols)
# ggplot(data[,cols], aes_string(x=cols[2], y=cols[1])) + pars)
plots<-apply(cols, 1, function(cols)
ggpubr::ggscatter(data, x=cols[1], y=cols[2],
add="reg.line", conf.int=TRUE,
cor.coef=FALSE, cor.method=m,
add.params=list(color="red", fill="lightpink")))
gg <- GGally::ggpairs(data[, colInds],
diag=list(continuous="blankDiag"),
upper=list(continuous=GGally::wrap("cor", method=m)),
title=title)
rowFromTop <- unlist(mapply(`:`, 2:n, rep(n, n-1)))
colFromLeft <- rep(1:(n-1), times=(n-1):1)
for (i in seq_along(plots))
gg <- GGally::putPlot(gg, plots[[i]], rowFromTop[i], colFromLeft[i])
return( gg )
}
.corPlots<-function(avgmat, title="Correlation", method="spearman", transform="none", printInd=TRUE){
spPlot<-function(dat, x, y, xlab, ylab, title, method){
g<-ggpubr::ggscatter(dat, x=x, y=y,
add="reg.line", conf.int=TRUE,
cor.coef=TRUE, cor.method=method,
add.params=list(color="red", fill="red"),
xlab=xlab, ylab=ylab, title=title)
print(g)
}
pre<-"Mean Normalized Expression -"
if(transform == "log10"){
avgmat<-log10(1+avgmat)
pre<-paste("Log10",pre)
}
if(transform == "log2"){
avgmat<-log2(1+avgmat)
pre<-paste("Log2",pre)
}
print(.custPairs(colInds=c(1:ncol(avgmat)), data=avgmat, title=title, m=method))
if(isTRUE(printInd)){
for(i in 1:ncol(avgmat)){
if(i<ncol(avgmat)){
for(j in (i+1):ncol(avgmat)){
x<-colnames(avgmat)[i]
y<-colnames(avgmat)[j]
message("Plotting",x,"vs",y,"...")
spPlot(avgmat, x, y, xlab=paste(pre,x), ylab=paste(pre,y), title, method)
}
}
}
}
}
#'Expresion correlation plots of all gene expressions
#'
#'@param counts data.frame object with rows=genes and columns=samples of gene expression
#'@param cond character vector of length ncol(counts) assigning each sample to a condition
#'@param title character indicating the title of the plot(s)
#'@param method character indicating correlation method. Default is 'spearman'
#'@param transform character indicating if gene expression should be transformed. Default is "none", other options are "log2" and "log10"
#'@param printInd Boolean indicating if individual correlation plots should be printed in addition to paired plots. Default is FALSE.
#'@return correlation plots
#'@export
expCor<-function(counts, cond, title, method="spearman", transform="none", printInd=FALSE){
avgmat<-avgExp(counts, cond)
.corPlots(avgmat, title, method, transform, printInd)
}
#Plot a correlation between two columns from two different or the same data frame or two named vectors
#Great for comparing gene expression changes or values, or two gene rankings for GSEA
#' Correlation Plotting
#' Plot a correlation bewteen any two columns from any data frame(s) or named vectors
#'
#' @param x Data frame with rownames or named vector containing data to be plotted on the x-axis
#' @param y Data frame with rownames or named vector containing data to be plotted on the y-axis
#' @param xCol Character indicating column name in x of data to be plotted on the x-axis. Leave FALSE if x is a named vector.
#' @param yCol Character indicating column name in y of data to be plotted on the y-axis. Leave FALSE if y is a named vector.
#' @param xlab Character indicating x-axis label
#' @param ylab Character indicating y-axis label
#' @param title Character indicating title of plot
#' @param scale Should the data be z-score scaled? Default is FALSE.
#' @param showP Boolean indicating if the p-value should also be displayed (Default = FALSE)
#' @return Scatter plot of data with line of best fit and R-value of correlation. Note that x and y data are paired based on the rownames of the data frame or names of the named vector. Non-paired values will not be plotted.
#' @export
plotCor<-function(x,y, xCol = FALSE, yCol = FALSE, xlab = "", ylab = "", title = "", scale = FALSE, showP = FALSE){
if(isFALSE(xCol)){
if(isFALSE(yCol)){
x<-x[which(names(x) %in% names(y))]
y<-y[which(names(y) %in% names(x))]
} else {
x<-x[which(names(x) %in% rownames(y))]
tmpNames<-rownames(y)[which(rownames(y) %in% names(x))]
y<-y[which(rownames(y) %in% names(x)),which(colnames(y) %in% yCol)]
names(y)<-tmpNames
}
} else {
if(isFALSE(yCol)){
tmpNames<-rownames(x)[which(rownames(x) %in% names(y))]
x<-x[which(rownames(x) %in% names(y)),which(colnames(x) %in% xCol)]
names(x)<-tmpNames
y<-y[which(names(y) %in% names(x))]
} else {
tmpNames<-rownames(x)[which(rownames(x) %in% rownames(y))]
x<-x[which(rownames(x) %in% rownames(y)), which(colnames(x) %in% xCol)]
names(x)<-tmpNames
tmpNames<-rownames(y)[which(rownames(y) %in% names(x))]
y<-y[which(rownames(y) %in% names(x)), which(colnames(y) %in% yCol)]
names(y)<-tmpNames
}
}
x<-x[order(names(x))]
y<-y[order(names(y))]
#print(length(x))
#print(length(y))
if(isTRUE(scale)){
message("z-score...")
x<-scale(x)
y<-scale(y)
}
fit<-lm(y~x)
plot(x,y, main=title, pch=16, xlab=xlab, ylab=ylab)
abline(fit, lty=2, col="red")
if(isFALSE(showP)){
legend("topleft", legend=paste0("R=",round(sqrt(summary(fit)$adj.r.squared),digits=4)))
} else {
legend("topleft", legend=c(paste0("R=",round(sqrt(summary(fit)$adj.r.squared),digits=4)),
paste0("p=",signif(summary(fit)$coefficients[4], digits=4))))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.