R/pca_batch.R

Defines functions pca_batch

Documented in pca_batch

#' @title PCA Plot with batch information
#'
#' @description Plots the first two principal components for all the samples of
#'  a gene expression dataset along with the batch information
#'
#' @param exprData A matrix containing gene expression data. Row names should be
#' samples and column names should be genes.
#' @param batch.info A data frame containing the samples names and details of the
#' batch they belong to.
#' @param batch Title of the batch the data is being corrected for.
#' @param NameString  string that will be appear in all output filenames. Default="" (empty string)
#' @param cond The column name in the *batch.info* data which denotes biological condition,
#' if there are any to be considered for PCA plots, i.e. do you want different shapes for cancer
#' and normal samples? If you don't need the conditions or have the dataset with one
#' kind of samples, no need to add this argument.
#' @param when String indicating for which dataset is the PCA plot being created
#' @param return.plot Should the plot be returned as an object to the environment?
#' If FALSE, plot is saved to a pdf file, if TRUE, plot is returned to the environment.
#' Default = FALSE
#'
#'  @import grDevices
#'  @import stats
#'  @import graphics
#'
#' @export
pca_batch <- function(exprData, batch.info, batch, NameString = "", cond="", when = "", return.plot=FALSE){

  print("===========================Plotting PCs along with batch=====================")

  #matching sample IDs with batches
  if (is.character(batch.info[,1])){
    id <- as.character(rownames(exprData))
    s<- match (id, batch.info[,1])

  } else if (is.numeric(batch.info[,1])){
    id <-as.numeric(rownames(exprData))
    s<- match (id, batch.info[,1])
  }

  #removing genes with zero variance
  std_genes <- apply(exprData, MARGIN =2, sd)
  genes_sd_0 <- length(which (std_genes ==0))
  remgenes <- length (std_genes) - genes_sd_0
  exprData_sd0<- exprData[,which (std_genes > 0)]
  print(paste0("Removed ", genes_sd_0, " genes with zero variance for PCA..."))
  print (paste0(remgenes, " genes are being used for PCA..."))

  #calculating the principal components and adding the data to pca_data
  print("Calculating Principal Components...")
  pca.dat <- prcomp(exprData_sd0, center = TRUE, scale. = TRUE)
  print("PCs calculated")

  if(cond==""){
  pca_data <- cbind.data.frame(pca.dat$x[, 1:2], batch.info[s,2])
  colnames(pca_data)[3] <- "Batch"
  pca_data[,3] <- as.factor(pca_data[,3])


  #plot PCA biplot
  when_str <- unlist(strsplit(when, split="_"))[1]
  pca_plot <- ggplot2::ggplot(data = pca_data) +

    ggplot2::geom_point(size =6,
                        alpha= 0.6,
                        ggplot2::aes(x=PC1, y=PC2, colour=Batch))+

    ggplot2::labs(title=paste("PCA with", batch, when_str, "correction", sep = " "),
                  x = "PC1",
                  y = "PC2",
                  colour = "Batch")+

    ggplot2::theme(

      #Adjusting axis titles, lines and text
      axis.title = ggplot2::element_text(size = 15),
      axis.line = ggplot2::element_line(size=0.75),
      axis.text = ggplot2::element_text(size=15, colour ="black"),
      #Center align the title
      plot.title = ggplot2::element_text(face = "bold", hjust =0.5, size =20),

      #Adjust legend title and text
      legend.title = ggplot2::element_text(size = 15, face = "bold"),
      legend.text = ggplot2::element_text(size = 15),

      # Adjust panel border
      panel.border = ggplot2::element_rect(fill=NA, size= 0.75),

      # Remove panel grid lines
      panel.grid.major = ggplot2::element_blank(),
      panel.grid.minor = ggplot2::element_blank(),

      # Remove panel background
      panel.background = ggplot2::element_blank())+

     ggplot2::scale_colour_manual(values=c("#fcba03",  "#19e01c", "#ff470f",
                                           "#0fdbca", "#ff217e","#405ce6",
                                           "#6b6769","#b264ed"))
  } else if (cond!=""){

    pca_data <- cbind.data.frame(pca.dat$x[, 1:2], batch.info[s,2], batch.info[s,3])
    colnames(pca_data)[3:4] <- c("Batch", "Condition")
    pca_data[,3] <- as.factor(pca_data[,3])
    pca_data[,4] <- as.factor(pca_data[,4])


    #plot PCA biplot
    when_str <- unlist(strsplit(when, split="_"))[1]
    pca_plot <- ggplot2::ggplot(data = pca_data) +

      ggplot2::geom_point(size =6,
                          alpha= 0.6,
                          ggplot2::aes(x=PC1, y=PC2, colour=Batch, shape=Condition))+

      ggplot2::labs(title=paste("PCA with", batch, when_str, "correction", sep = " "),
                    x = "PC1",
                    y = "PC2",
                    colour = "Batch",
                    shape=cond)+

      ggplot2::theme(

        #Adjusting axis titles, lines and text
        axis.title = ggplot2::element_text(size = 15),
        axis.line = ggplot2::element_line(size=0.75),
        axis.text = ggplot2::element_text(size=15, colour ="black"),
        #Center align the title
        plot.title = ggplot2::element_text(face = "bold", hjust =0.5, size =20),

        #Adjust legend title and text
        legend.title = ggplot2::element_text(size = 15, face = "bold"),
        legend.text = ggplot2::element_text(size = 15),

        # Adjust panel border
        panel.border = ggplot2::element_rect(fill=NA, size= 0.75),

        # Remove panel grid lines
        panel.grid.major = ggplot2::element_blank(),
        panel.grid.minor = ggplot2::element_blank(),

        # Remove panel background
        panel.background = ggplot2::element_blank())+

      ggplot2::scale_colour_manual(values=c("#fcba03",  "#19e01c", "#ff470f",
                                            "#0fdbca", "#ff217e","#405ce6",
                                            "#6b6769","#b264ed"))
  }


  if(return.plot==FALSE){
  date <- as.character(format(Sys.Date(), "%Y%m%d"))
  plotFile <- ifelse (NameString =="",
                      paste0(date, "_plot_", batch, "_pca_", when, ".pdf"),
                      paste0(date, "_plot_", NameString, "_", batch, "_pca_", when, ".pdf"))
  pdf (plotFile)
  plot(pca_plot)
  dev.off()
  print(paste0("Plotted PC1 & PC2 with batch to ", plotFile))
  } else if(return.plot==TRUE){
    #returning pca plot
    return(pca_plot)
  }

}
jankinsan/BatchEC documentation built on Sept. 9, 2021, 8:12 p.m.