#' @title PCA Plot with batch information
#'
#' @description Plots the first two principal components for all the samples of
#' a gene expression dataset along with the batch information
#'
#' @param exprData A matrix containing gene expression data. Row names should be
#' samples and column names should be genes.
#' @param batch.info A data frame containing the samples names and details of the
#' batch they belong to.
#' @param batch Title of the batch the data is being corrected for.
#' @param NameString string that will be appear in all output filenames. Default="" (empty string)
#' @param cond The column name in the *batch.info* data which denotes biological condition,
#' if there are any to be considered for PCA plots, i.e. do you want different shapes for cancer
#' and normal samples? If you don't need the conditions or have the dataset with one
#' kind of samples, no need to add this argument.
#' @param when String indicating for which dataset is the PCA plot being created
#' @param return.plot Should the plot be returned as an object to the environment?
#' If FALSE, plot is saved to a pdf file, if TRUE, plot is returned to the environment.
#' Default = FALSE
#'
#' @import grDevices
#' @import stats
#' @import graphics
#'
#' @export
pca_batch <- function(exprData, batch.info, batch, NameString = "", cond="", when = "", return.plot=FALSE){
print("===========================Plotting PCs along with batch=====================")
#matching sample IDs with batches
if (is.character(batch.info[,1])){
id <- as.character(rownames(exprData))
s<- match (id, batch.info[,1])
} else if (is.numeric(batch.info[,1])){
id <-as.numeric(rownames(exprData))
s<- match (id, batch.info[,1])
}
#removing genes with zero variance
std_genes <- apply(exprData, MARGIN =2, sd)
genes_sd_0 <- length(which (std_genes ==0))
remgenes <- length (std_genes) - genes_sd_0
exprData_sd0<- exprData[,which (std_genes > 0)]
print(paste0("Removed ", genes_sd_0, " genes with zero variance for PCA..."))
print (paste0(remgenes, " genes are being used for PCA..."))
#calculating the principal components and adding the data to pca_data
print("Calculating Principal Components...")
pca.dat <- prcomp(exprData_sd0, center = TRUE, scale. = TRUE)
print("PCs calculated")
if(cond==""){
pca_data <- cbind.data.frame(pca.dat$x[, 1:2], batch.info[s,2])
colnames(pca_data)[3] <- "Batch"
pca_data[,3] <- as.factor(pca_data[,3])
#plot PCA biplot
when_str <- unlist(strsplit(when, split="_"))[1]
pca_plot <- ggplot2::ggplot(data = pca_data) +
ggplot2::geom_point(size =6,
alpha= 0.6,
ggplot2::aes(x=PC1, y=PC2, colour=Batch))+
ggplot2::labs(title=paste("PCA with", batch, when_str, "correction", sep = " "),
x = "PC1",
y = "PC2",
colour = "Batch")+
ggplot2::theme(
#Adjusting axis titles, lines and text
axis.title = ggplot2::element_text(size = 15),
axis.line = ggplot2::element_line(size=0.75),
axis.text = ggplot2::element_text(size=15, colour ="black"),
#Center align the title
plot.title = ggplot2::element_text(face = "bold", hjust =0.5, size =20),
#Adjust legend title and text
legend.title = ggplot2::element_text(size = 15, face = "bold"),
legend.text = ggplot2::element_text(size = 15),
# Adjust panel border
panel.border = ggplot2::element_rect(fill=NA, size= 0.75),
# Remove panel grid lines
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
# Remove panel background
panel.background = ggplot2::element_blank())+
ggplot2::scale_colour_manual(values=c("#fcba03", "#19e01c", "#ff470f",
"#0fdbca", "#ff217e","#405ce6",
"#6b6769","#b264ed"))
} else if (cond!=""){
pca_data <- cbind.data.frame(pca.dat$x[, 1:2], batch.info[s,2], batch.info[s,3])
colnames(pca_data)[3:4] <- c("Batch", "Condition")
pca_data[,3] <- as.factor(pca_data[,3])
pca_data[,4] <- as.factor(pca_data[,4])
#plot PCA biplot
when_str <- unlist(strsplit(when, split="_"))[1]
pca_plot <- ggplot2::ggplot(data = pca_data) +
ggplot2::geom_point(size =6,
alpha= 0.6,
ggplot2::aes(x=PC1, y=PC2, colour=Batch, shape=Condition))+
ggplot2::labs(title=paste("PCA with", batch, when_str, "correction", sep = " "),
x = "PC1",
y = "PC2",
colour = "Batch",
shape=cond)+
ggplot2::theme(
#Adjusting axis titles, lines and text
axis.title = ggplot2::element_text(size = 15),
axis.line = ggplot2::element_line(size=0.75),
axis.text = ggplot2::element_text(size=15, colour ="black"),
#Center align the title
plot.title = ggplot2::element_text(face = "bold", hjust =0.5, size =20),
#Adjust legend title and text
legend.title = ggplot2::element_text(size = 15, face = "bold"),
legend.text = ggplot2::element_text(size = 15),
# Adjust panel border
panel.border = ggplot2::element_rect(fill=NA, size= 0.75),
# Remove panel grid lines
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
# Remove panel background
panel.background = ggplot2::element_blank())+
ggplot2::scale_colour_manual(values=c("#fcba03", "#19e01c", "#ff470f",
"#0fdbca", "#ff217e","#405ce6",
"#6b6769","#b264ed"))
}
if(return.plot==FALSE){
date <- as.character(format(Sys.Date(), "%Y%m%d"))
plotFile <- ifelse (NameString =="",
paste0(date, "_plot_", batch, "_pca_", when, ".pdf"),
paste0(date, "_plot_", NameString, "_", batch, "_pca_", when, ".pdf"))
pdf (plotFile)
plot(pca_plot)
dev.off()
print(paste0("Plotted PC1 & PC2 with batch to ", plotFile))
} else if(return.plot==TRUE){
#returning pca plot
return(pca_plot)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.