R/GeneExpressionFigures.R

#' Prepare a volcanoPlot of gene expression data using ggplot2
#'
#' Volcano plot presents a scatter plot of the -log10(p-value) / log2 gene
#' expression from a differential gene expression study
#'
#' @param deSeqObj DESeq2 differential expression object
#' @param lfcThreshold a log2 threshold required for definition of differential expression
#' @param adjPValueThreshold minimal threshold of FDR corrected p-value for differential expression
#'
#' @return None
#'
#' @author Oxford Nanopore Bioinformatics, \email{support@@nanoporetech.com}
#' @references \url{https://en.wikipedia.org/wiki/Volcano_plot_(statistics)}
#' @keywords volcano
#'
#' @export
volcanoPlot <- function(deSeqObj, lfcThreshold, adjPValueThreshold) {
  volcanoSubstr <- results(deSeqObj)
  logUp <- which(volcanoSubstr$log2FoldChange >= lfcThreshold)
  logDown <- which(volcanoSubstr$log2FoldChange <= -lfcThreshold)
  withStat <- which(volcanoSubstr$padj <= adjPValueThreshold)
  colours <- c(noDifference="gray", upRegulated="red", downRegulated="green")
  gene <- rep("noDifference", nrow(volcanoSubstr))
  gene[logUp[logUp %in% withStat]] <- "upRegulated"
  gene[logDown[logDown %in% withStat]] <- "downRegulated"

  plot <- ggplot(data.frame(volcanoSubstr), aes(x=log2FoldChange, y=-log10(padj))) +
    geom_point(size=1.2) +
    geom_hline(yintercept=-log10(adjPValueThreshold), color="orange") +
    geom_vline(xintercept=-lfcThreshold, color="green") +
    geom_vline(xintercept=lfcThreshold, color="red") +
    aes(colour=gene) +
    scale_colour_manual(values=colours) +
    ggtitle("Volcano plot showing distribution of lfc vs adjp for expressed genes")

  print(plot)
}



#' Prepare a boxplot of gene expression profile for a candidate gene
#'
#' Using available gene expression context prepare a boxplot of gene expression for each sample; grouped by
#' experimental conditions.
#'
#' @param geneOfInterestNormalExpression dataframe of preprepared data
#'
#' @return None
#'
#' @author Oxford Nanopore Bioinformatics, \email{support@@nanoporetech.com}
#' @references \url{https://en.wikipedia.org/wiki/Volcano_plot_(statistics)}
#' @keywords volcano
#'
#' @export
candidateGeneExpressionBoxPlot <- function(geneOfInterestNormalExpression) {
  plot <- ggplot(geneOfInterestNormalExpression, aes(x=group, y=count, colour=group)) +
    geom_point(position=position_jitter(w=0.1,h=0), size=2) +
    scale_y_log10() +
    ggtitle("Distribution of normalised read counts for experimental conditions") +
    xlab("Experimental Condition") +
    ylab("normalised read count - log10 scale") +
    scale_colour_brewer(palette="Paired")
  print(plot)
}





#' Prepare a scatter of plot of PCA analysis for gene expression profiles
#'
#' PCA analysis will seek the facets of the experimental data that can define the
#' greatest sources of systematic difference within the experimental data
#'
#' @param deSeqObj DESeq2 differential expression object
#'
#' @return None
#'
#' @author Oxford Nanopore Bioinformatics, \email{support@@nanoporetech.com}
#' @keywords PCA
#'
#' @export
#' @importFrom DESeq2 counts
#' @importFrom pcaMethods prep pca
pcaPlot <- function() {
  pcaMatrix <- counts(deSeqObj)
  md <- prep(t(pcaMatrix), scale="none", center=TRUE)
  pca <- pca(md, method="svd", center=TRUE, nPcs=3)
  xdata <- as.data.frame(pca@scores)
  xdata <- cbind(xdata, group=studyDesign$group[match(rownames(xdata), rownames(studyDesign))])
  x <- 1
  y <- 2
  xpercent <- round(pca@R2[x]*100, digits=1)
  ypercent <- round(pca@R2[y]*100, digits=1)
  xlab <- paste("Prin.Comp. ",x," (",xpercent,"%)",sep="")
  ylab <- paste("Prin.Comp. ",y," (",ypercent,"%)",sep="")

  plot <- ggplot(xdata, aes(x=PC1, y=PC2, colour=group)) +
    geom_point(size=5, shape=18) +
    scale_colour_brewer(palette="Paired") +
    geom_vline(xintercept=0, color="darkgray") +
    geom_hline(yintercept=0, color="darkgray") +
    ggtitle("PCA analysis of experimental samples") +
    ylab(paste("PC2 (",ypercent,"%)",sep=""))  +
    xlab(paste("PC1 (",xpercent,"%)",sep=""))

  print(plot)
}
sagrudd/cDNA_tutorial documentation built on May 30, 2019, 2:13 p.m.