AndrewsBCellLymphoma: One-liner RNA-seq and ChIP-seq Analyses

Documented in PlotChIPAnnos PlotChIPHeatmaps PlotChIPPCAs

#' Plot ChIP PCAs
#'
#' \code{PlotChIPPCAs} generates PCA plots for all contrasts found in 
#' \code{results}. If \code{consensus = TRUE}, it will also generate one with
#' all samples and all peaks.
#'
#' @param results \code{DBA} object as returned by 
#'   \code{\link[DiffBind]{dba.analyze}}.
#' @param outpath Path to directory to be used for output. 
#' @param method Method used for \code{DiffBind} analyses 
#'   (e.g. \code{DBA_DESEQ2}).
#' @param fdr.thresh Number indicating FDR threshold. Peaks greater than
#'   \code{fdr.thresh} will not be used for PCAs.
#' @param fc.thresh Number indicating absolute log fold-change that peaks must 
#'   meet to be used for the PCA.
#' @param consensus Boolean indicating whether PCAs are for consensus peaks.
#'
#' @importFrom grDevices pdf dev.off
#' @importFrom DiffBind dba.plotPCA dba.report
#'
#' @export
#'
#' @author Jared Andrews
#'
PlotChIPPCAs <- function(results, outpath, method, fdr.thresh = 1, 
  fc.thresh = 0, consensus = TRUE) {

  if (consensus) {
    lab <- "Consensus"
    out <- "/ConsensusFigures/PCAPlots/Consensus.PCAs.pdf"
  } else {
    lab <- "DB"
    out <- paste0("/DBRFigures/PCAPlots/DBRs.fdr.", fdr.thresh, ".log2fc.", 
      fc.thresh, ".PCA.pdf")
  }

  pdf(paste0(outpath, out), height = 5, width = 5)

  if (consensus) {
    dba.plotPCA(results, th = 1, sub = paste0(lab, " Peaks"), method = method)
  
    for (i in seq_along(results$contrasts)) {
      dba.plotPCA(results, contrast = i, th = 1, method = method, 
        sub = paste0(lab, " Peaks\n", results$contrasts[[i]]$name1, " vs ", 
          results$contrasts[[i]]$name2))
    }

  } else {
    for (i in seq_along(results$contrasts)) {
      rep <- dba.report(results, th = fdr.thresh, contrast = i, 
        fold = fc.thresh, method = method)

      if (length(rep) > 1) {

        dba.plotPCA(results, report = rep, contrast = i, sub = paste0(lab, 
         " Peaks\n",
          results$contrasts[[i]]$name1, " vs ", results$contrasts[[i]]$name2))
      } else {
        message(paste0("No DB sites for ", results$contrasts[[i]]$name1, 
          " vs ", results$contrasts[[i]]$name2, ", skipping PCA."))
        next
      }
    }
  }

  dev.off()
}


#' Plot ChIP annotation breakdowns
#'
#' \code{PlotChIPAnnos} generates various plots for all annotated peaksets 
#' (as generated by \code{\link[ChIPseeker]{annotatePeak}}) found in 
#' \code{peak.anno}. 
#'
#' @param peak.anno A named list of \linkS4class{csAnno} objects with annotated
#'   peaks as returned by \code{\link[ChIPseeker]{annotatePeak}}. If 
#'   \code{consensus = TRUE}, only one is expected, otherwise there should be
#'   three (All DBRs, group1 up, group2 up).
#' @param outpath Path to directory to be used for output. 
#' @param consensus Boolean indicating if consensus peaks are provided.
#' @param comp Name of comparison, for file naming.
#' @param fc Fold change threshold, for file naming.
#' @param fdr FDR threshold, for file naming.
#'
#' @importFrom grDevices pdf dev.off
#' @importFrom ChIPseeker plotAnnoPie plotAnnoBar upsetplot plotDistToTSS
#'
#' @export
#'
#' @author Jared Andrews
#'
PlotChIPAnnos <- function(peak.anno, outpath, consensus = TRUE, comp = NULL,
  fc = NULL, fdr = NULL) {

  if (consensus) {
    if (length(peak.anno) > 1) {
      stop("'peak.anno' should only have 1 element when 'consensus' is TRUE")
    }

    pdf(paste0(outpath, 
      "/ConsensusFigures/PeakAnnotations/PeakAnnotations.pdf"), height = 4)

    plotAnnoPie(peak.anno[[1]])
    plotAnnoBar(peak.anno[[1]])
    upsetplot(peak.anno[[1]])
    plotDistToTSS(peak.anno[[1]], 
      title = "Distribution of Peaks Relative to TSS")

    dev.off()
  } else {
    if (length(peak.anno) > 3) {
      stop("'peak.anno' should only have 3 elements when 'consensus' is FALSE")
    }
    pdf(paste0(outpath, "/DBRFigures/PeakAnnotations/", comp, ".fdr.", fdr, 
      ".log2fc.", fc, ".PeakAnnotations.pdf"), height = 4)

    for (i in seq_along(peak.anno)) {
      plotAnnoPie(peak.anno[[i]], main = names(peak.anno)[i])
      upsetplot(peak.anno[[i]])
    }

    plotAnnoBar(peak.anno)
    plotDistToTSS(peak.anno, 
      title = paste0("Distribution of Peaks Relative to TSS"))
    
    dev.off()
  }
}


#' Plot ChIP heatmaps
#'
#' \code{PlotChIPHetmaps} generates signal and correlation heatmaps for a 
#' \code{DBA} object. 
#'
#' This function will generate heatmaps for each contrast individually as well
#' as all differentially bound peaks in all samples if \code{consensus = FALSE}.
#' A table will be saved of all peaks in the combined heatmap, with row order
#' retained if row clustering is performed.
#'
#' @param results \code{DBA} object as returned by 
#'   \code{\link[DiffBind]{dba.analyze}}.
#' @param outpath Path to directory to be used for output. 
#' @param method Method used for \code{DiffBind} analyses 
#'   (e.g. \code{DBA_DESEQ2}).
#' @param breaks Scalar vector of breaks to use for color mapping.
#' @param colors Vectors of colors to use for color mapping. 
#' @param fdr.thresh Number indicating FDR threshold. Peaks greater than
#'   \code{fdr.thresh} will not be included in the heatmap or used for sample
#'   correlations.
#' @param fc.thresh Number indicating absolute log fold-change that peaks must 
#'   meet to be included in the heatmaps or used for sample correlations.
#' @param consensus Boolean indicating whether creating heatmaps for consensus 
#'   peaks.
#'
#' @importFrom grDevices pdf dev.off
#' @importFrom DiffBind dba.plotHeatmap dba.report
#' @importFrom pheatmap pheatmap
#' @importFrom utils flush.console
#'
#' @export
#'
#' @author Jared Andrews
#'
PlotChIPHeatmaps <- function(results, outpath, method, breaks, colors, 
  fdr.thresh = 1, fc.thresh = 0, consensus = TRUE) {

  if (consensus) {
    lab <- "Consensus"
    out <- "/ConsensusFigures/Heatmaps/Consensus.Heatmaps.pdf"
  } else {
    lab <- "DB"
    out <- paste0("/DBRFigures/Heatmaps/DBRs.fdr.", fdr.thresh, ".log2fc.", 
      fc.thresh, ".Heatmaps.pdf")
    out.tab <- paste0("/DBRFigures/Heatmaps/DBRs.fdr.", fdr.thresh, ".log2fc.", 
      fc.thresh, ".Heatmaps.txt")
  }

  pdf(paste0(outpath, out), height = 8, width = 7)

  # maxSites can't be more than number of total peaks.
  if (consensus) {
    if (nrow(results$binding) > 10000) {
      max.sites <- 10000
    } else {
      max.sites <- nrow(results$binding)
    }

    dba.plotHeatmap(results, th = 1, margin = 6, correlations = FALSE, 
      scale = "row", density.info = "none", colScheme = colors, breaks = breaks, 
      maxSites = max.sites , 
      main = paste0(lab, " - Top 10000 Variable Peaks"))
    dba.plotHeatmap(results, th = 1, margin = 6, correlations = FALSE, 
      scale = "row", density.info = "none", colScheme = colors, breaks = breaks, 
      maxSites = max.sites , Colv = NULL, 
      main = paste0(lab, " - Top 10000 Variable Peaks"))
    dba.plotHeatmap(results, th = 1, margin = 6, density.info = "none", 
      main = paste0(lab, " Peaks - Correlation"))

  } else {
    sig.peaks <- list()

    for (i in seq_along(results$contrasts)) {
      rep <- dba.report(results, th = fdr.thresh, contrast = i, 
        fold = fc.thresh, method = method)

      # Skip contrast if no DB sites.
      if (length(rep) > 1) {
        # Get positions for each peak.
        df <- as.data.frame(rep)
        sig.peaks[[i]] <- paste0(df$seqnames, ":", df$start, "-", df$end)

        rowv <- TRUE
        if (length(rep) > 30000) {
          rowv <- NULL
          message(paste0("Too many DB sites to cluster rows for ", 
          results$contrasts[[i]]$name1, " vs ", results$contrasts[[i]]$name2),
          ", no row clustering will be performed.")
        }

        dba.plotHeatmap(results, margin = 6, contrast = i, report = rep,
          correlations = FALSE, scale = "row", density.info = "none", 
          Rowv = rowv, colScheme = colors, breaks = breaks, 
          maxSites = nrow(results$binding),  
          main = paste0("All ", lab, " Peaks\n", results$contrasts[[i]]$name1, 
            " vs ", results$contrasts[[i]]$name2))
        dba.plotHeatmap(results, margin = 6, contrast = i, report = rep,
          correlations = FALSE, scale = "row", density.info = "none", 
          colScheme = colors, breaks = breaks, maxSites = nrow(results$binding), 
          Colv = NULL, Rowv = rowv, 
          main = paste0("All ", lab, " Peaks\n", results$contrasts[[i]]$name1, 
            " vs ", results$contrasts[[i]]$name2))
        dba.plotHeatmap(results, report = rep, contrast = i, 
          margin = 6, density.info = "none", main = paste0(lab, 
            " Peaks - Correlation",
            "\n", results$contrasts[[i]]$name1, " vs ", 
            results$contrasts[[i]]$name2))
      } else {
        message(paste0("No DB sites for ", results$contrasts[[i]]$name1, " vs ",
          results$contrasts[[i]]$name2, ", skipping contrast heatmaps."))
        next
      }
    }

    # Get counts for all unique DEGs.
    sig.peaks <- unique(unlist(sig.peaks))
    rep <- dba.peakset(results, consensus = TRUE, bRetrieve = TRUE)
    df <- as.data.frame(rep)
    rownames(df) <- paste0(df$seqnames, ":", df$start, "-", df$end)
    df <- df[sig.peaks,]

    message(paste0("Total of ", nrow(df), " DB regions in combined heatmap ",
      "across all comparisons."))
    flush.console()

    m <- as.matrix(df[, 6:ncol(df)])

    rowv <- TRUE
    if (nrow(df) > 30000) {
      rowv <- FALSE
      message(paste0("Too many DB sites to cluster rows for,",
        " no row clustering will be performed."))
    }

    p <- pheatmap(m, cluster_rows = rowv, show_rownames = FALSE,
      cluster_cols = FALSE, scale = "row", fontsize_col = 6, color = colors,
      breaks = breaks, main = "All DB Peaks")
    print(p)

    p <- pheatmap(m, cluster_rows = rowv, show_rownames = FALSE,
      cluster_cols = TRUE, scale = "row", fontsize_col = 6, color = colors,
      breaks = breaks, main = "All DB Peaks")
    print(p)

    if (rowv) {
      df <- df[p$tree_row[["order"]],]
    }

    write.table(df, file = paste0(outpath, out.tab), sep = "\t", quote = FALSE,
      row.names = FALSE)
  }

  dev.off()
}


#' @importFrom grDevices colorRampPalette
.heatmap_colors <- function(breaks, preset = NULL, custom.colors = NULL, 
  reverse = FALSE) {

  preset <- match.arg(preset, c("BuRd", "OrPu", "BrTe", "PuGr", "BuOr"))
  preset.colors <- list(
    "BuRd" = c("#053061", "#2166ac", "#f5f5f5", "#b2182b", "#67001f"),
    "OrPu" = c("#b35806", "#e08214", "#f5f5f5", "#8073ac", "#542788"), 
    "BrTe" = c("#8c510a", "#d8b365", "#f5f5f5", "#5ab4ac", "#01665e"),
    "PuGr" = c("#542788", "#8073ac", "#f5f5f5", "#5aae61", "#00441b"),
    "BuOr" = c("#3F7F93", "#7BA7B5", "#f5f5f5", "#D58A76", "#C25539")
    )

  if (is.null(custom.colors)) {
    custom.colors <- preset.colors[[preset]]
  }

  if (reverse) {
    custom.colors <- rev(custom.colors)
  }

  colors <- colorRampPalette(custom.colors)(n = length(breaks) - 1)

  return(colors)
}