R/cross_plot.R

Defines functions cross_plot_prep cross_plot

Documented in cross_plot

#' Create a cross plot comparing differential expression (DE) results
#' @description This function creates a cross plot visualising the differences
#' in log2(fold-change) between two DE analyses.
#' @inheritParams volcano_enhance
#' @param DEtable1,DEtable2,DEtable1Subset,DEtable2Subset tables of DE results,
#' usually generated by \code{\link{DEanalysis_edger}}; the first two should
#' contain all genes, while the second two should only contain DE genes
#' @param mask whether to hide genes that were not called DE in either
#' comparison; default is FALSE
#' @param df Optionally, pre-computed cross plot table, from cross_plot_prep
#' @param lfc.threshold the log2(fold-change)
#' threshold to determine whether a gene is DE
#' @param labnames,cols.chosen the legend labels and colours for the 4
#' categories of genes ("not DE", "DE both", "DE comparison 1", "DE comparison 2")
#' @param labels.per.region how many labels to show in each region of the plot;
#' the plot is split in 8 regions using the axes and major diagonals, and the
#' points closest to the origin in each region are labelled; default is
#' 5, set to 0 for no labels
#' @param fix.axis.ratio whether to ensure the x and y axes have the same
#' units, resulting in a square plot; default is TRUE
#' @return The cross plot as a ggplot object.
#' @export
#' @examples
#' expression.matrix.preproc <- as.matrix(read.csv(
#'   system.file("extdata", "expression_matrix_preprocessed.csv", package = "bulkAnalyseR"), 
#'   row.names = 1
#' ))[1:500, 1:4]
#' 
#' anno <- AnnotationDbi::select(
#'   getExportedValue('org.Mm.eg.db', 'org.Mm.eg.db'),
#'   keys = rownames(expression.matrix.preproc),
#'   keytype = 'ENSEMBL',
#'   columns = 'SYMBOL'
#' ) %>%
#'   dplyr::distinct(ENSEMBL, .keep_all = TRUE) %>%
#'   dplyr::mutate(NAME = ifelse(is.na(SYMBOL), ENSEMBL, SYMBOL))
#'   
#' edger <- DEanalysis_edger(
#'   expression.matrix = expression.matrix.preproc,
#'   condition = rep(c("0h", "12h"), each = 2),
#'   var1 = "0h",
#'   var2 = "12h",
#'   anno = anno
#' )
#' deseq <- DEanalysis_edger(
#'   expression.matrix = expression.matrix.preproc,
#'   condition = rep(c("0h", "12h"), each = 2),
#'   var1 = "0h",
#'   var2 = "12h",
#'   anno = anno
#' )
#' cross_plot(
#'   DEtable1 = edger, 
#'   DEtable2 = deseq,
#'   DEtable1Subset = dplyr::filter(edger, abs(log2FC) > 1, pvalAdj < 0.05),
#'   DEtable2Subset = dplyr::filter(deseq, abs(log2FC) > 1, pvalAdj < 0.05),
#'   labels.per.region = 0
#' )
cross_plot = function(
  DEtable1,
  DEtable2,
  DEtable1Subset,
  DEtable2Subset,
  df = NULL,
  lfc.threshold = NULL,
  raster = FALSE,
  mask = FALSE,
  labnames = c("not DE", "DE both", "DE comparison 1", "DE comparison 2"),
  cols.chosen = c("grey", "purple", "dodgerblue", "lightcoral"),
  labels.per.region = 5,
  fix.axis.ratio = TRUE,
  add.guide.lines = TRUE,
  add.labels.custom = FALSE,
  genes.to.label = NULL,
  seed = 0,
  label.force = 1
){
  if(is.null(lfc.threshold)){
    lfc.threshold <- min(abs(DEtable1Subset$log2FC), abs(DEtable2Subset$log2FC), na.rm = TRUE)
  }
  
  if (is.null(df)){
    df <- cross_plot_prep(
      DEtable1 = DEtable1,
      DEtable2 = DEtable2,
      DEtable1Subset = DEtable1Subset,
      DEtable2Subset = DEtable2Subset,
      lfc.threshold = lfc.threshold,
      mask = mask,
      labnames = labnames
    )
  }
  names(cols.chosen) <- labnames
  lfc1 = NULL
  lfc2 = NULL
  if(raster){
    cp <- ggplot(df) +
      theme_minimal() +
      ggrastr::rasterise(geom_point(aes(x = lfc1, y = lfc2, colour = colours), alpha = 0.5)) +
      scale_colour_manual(values = cols.chosen)
  }else{
    cp <- ggplot(df) +
      theme_minimal() +
      geom_point(aes(x = lfc1, y = lfc2, colour = colours), alpha = 0.5) +
      scale_colour_manual(values = cols.chosen)
  }
  
  if(fix.axis.ratio){
    max.lfc <- max(abs(df$lfc1), abs(df$lfc2), na.rm=TRUE)
    cp <- cp + xlim(-max.lfc, max.lfc) + ylim(-max.lfc, max.lfc)
  }
  
  if(add.guide.lines){
    cp <- cp +
      geom_hline(yintercept = 0, color = "black") +
      geom_vline(xintercept = 0, color = "black") +
      geom_abline(slope = 1, intercept = 0, color = "black") +
      geom_hline(yintercept =      lfc.threshold, color = "green") +
      geom_hline(yintercept =     -lfc.threshold, color = "green") +
      geom_vline(xintercept =      lfc.threshold, color = "green") +
      geom_vline(xintercept =     -lfc.threshold, color = "green") +
      geom_hline(yintercept =  2 * lfc.threshold, color = "blue") +
      geom_hline(yintercept = -2 * lfc.threshold, color = "blue") +
      geom_vline(xintercept =  2 * lfc.threshold, color = "blue") +
      geom_vline(xintercept = -2 * lfc.threshold, color = "blue")
    
  }
  
  if(add.labels.custom){
    genes.to.rename <- genes.to.label[names(genes.to.label) != ""]
#    genes.to.label <- df$gene_name[(match(genes.to.label, c(df$gene_name, df$gene_id)) - 1) %% nrow(df) + 1]
    genes.to.label <- unique(genes.to.label[!is.na(genes.to.label)])
    genes.to.rename <- genes.to.rename[genes.to.rename %in% genes.to.label]
    df.label <- dplyr::filter(df, .data$gene_name %in% genes.to.label)
    df.label$name[match(genes.to.rename, df.label$gene_name)] <- names(genes.to.rename)
    if(nrow(df.label) == 0){
      message(paste0("add.labels.custom was TRUE but no genes specified; ",
                     "did you forget to supply genes.to.label or annotation?"))
    }
    cp <- cp +
      ggrepel::geom_label_repel(data = df.label,
                                mapping = aes(x = lfc1, y = lfc2, label = .data$gene_name),
                                max.overlaps = Inf,
                                force = label.force,
                                point.size = NA)
  }
  
  if(labels.per.region > 0){
    colours <- NULL
    df.lab <- df %>%
      dplyr::mutate(
        dist = sqrt(lfc1 ^ 2 + lfc2 ^ 2),
        quad = ifelse(colours == labnames[2],
                    ifelse(lfc1 * lfc2 > 0,
                           ifelse(lfc1 > 0, 1, 5),
                           ifelse(lfc1 > 0, 7, 3)),
                    ifelse(colours == labnames[3],
                           ifelse(lfc1 > 0, 8, 4),
                           ifelse(lfc2 > 0, 2, 6))))
    df.top.distances <- data.frame()
    for(region in 1:8){
      df.lab.sub <- dplyr::filter(df.lab, .data$quad == region) %>%
        dplyr::arrange(dplyr::desc(.data$dist))
      df.top.dist <- utils::head(df.lab.sub, labels.per.region)
      df.top.distances <- rbind(df.top.distances, df.top.dist)
    }
    set.seed(seed = seed)
    cp <- cp +
      ggrepel::geom_label_repel(data = df.top.distances,
                                mapping = aes(x = lfc1, y = lfc2, label = .data$gene_name),
                                max.overlaps = Inf,
                                force = label.force,
                                point.size = NA)
  }
  
  cp
}

cross_plot_prep <- function(  
    DEtable1,
    DEtable2,
    DEtable1Subset,
    DEtable2Subset,
    lfc.threshold,
    mask = FALSE,
    labnames = c("not DE", "DE both", "DE comparison 1", "DE comparison 2")
){
  de1 <- DEtable1Subset$gene_id
  de2 <- DEtable2Subset$gene_id
  
  if(mask){
    all.genes <- unique(c(de1, de2))
  }else{
    all.genes <- unique(c(DEtable1$gene_id, DEtable2$gene_id))
  }
  
  lfc1 <- DEtable1$log2FC[match(all.genes, DEtable1$gene_id)]
  pval1 <- DEtable1$pvalAdj[match(all.genes, DEtable1$gene_id)]
  lfc2 <- DEtable2$log2FC[match(all.genes, DEtable2$gene_id)]
  pval2 <- DEtable2$pvalAdj[match(all.genes, DEtable2$gene_id)]
  
  colours = vector(length=length(all.genes))
  colours[] = labnames[1]
  colours[all.genes %in% de1] = labnames[3]
  colours[all.genes %in% de2] = labnames[4]
  colours[all.genes %in% intersect(de1, de2)] = labnames[2]
  
  df <- data.frame(
    gene_id = all.genes,
    gene_name = c(DEtable1$gene_name, DEtable2$gene_name)[match(all.genes, c(DEtable1$gene_id, DEtable2$gene_id))],
    lfc1 = lfc1, 
    pval1 = pval1,
    lfc2 = lfc2, 
    pval2 = pval2,
    colours = colours
  )
  return(df)
}

Try the bulkAnalyseR package in your browser

Any scripts or data that you put into this service are public.

bulkAnalyseR documentation built on Dec. 28, 2022, 2:04 a.m.