R/GroupHeatSpot.R

Defines functions GroupHeatSpot

Documented in GroupHeatSpot

#' @title GroupHeatSpot
#' @name GroupHeatSpot
#'
#' @importFrom GenomicRanges reduce
#' @importFrom BiocGenerics start
#' @importFrom BiocGenerics end
#' @importFrom Rsamtools BamFile
#' @importFrom GenomicAlignments readGAlignments
#' @importFrom SummarizedExperiment colData
#' @importFrom parallel mclapply
#' @importFrom data.table as.data.table
#' @importFrom reshape2 melt
#' @import ggplot2
#' @importFrom ggbio autoplot
#' @import patchwork
#'
#' @param object an ICASDataSet
#' @param SJ a charactor of SJ
#' @param flank the distance of flanking extension
#' @param cells which cells are output, NULL for all cells
#' @param BamFiles a file list of bamfiles
#' @param BamPostfix the postfix of bam files, remove the postfix the base name of bam must consistent with cells name
#' @param logTrans whether to perform log1p for the depth
#' @param ignore.strand the strandedness of bam file
#' @param ignore.SEorPE ignore single end paired of bamfiles (if your bam files have single end and paired end at the same time, you need to ignore the SEorPE)
#' @param singleEnd singleEnd or paired end of bam
#' @param antisense strandedness for singleEnd
#' @param strandMode strandMode for readGAlignments
#' @param mapqFilter A non-negative integer(1) specifying the minimum mapping quality to include. BAM records with mapping qualities less than mapqFilter are discarded.
#' @param geneModul A TxDb object made from GTF used by BAM aligner or a GRangesList object
#' @param shrinkage the shrinkage for intron region (must be a positive >= 1). default 1 for no shrinkage
#' @param high.col the color for high expressed base
#' @param low.col the color for low expressed base
#' @param curvature the curvature for splice junction
#' @param reduce To collapse all gene structure annotation features or not
#' @param rel_heights The relative heights of each row in the grid (must be a vector with 4 numbers).
#' @param highlight.region the region to highlight
#' @param highlight.color the color of highlight.region
#' @param highlight.label the label of highlight.region
#' @param highlight.label.size the size of highlight region label
#' @param removeTxid logical value, indicating whether to remove the transcript of y axis lab
#' @param MinimumFractionForP The minimum fraction of scaled expression to calculate base pair waldtest P value
#' @param NT the cores for parallel
#'
#' @author Tang Chao
#' @export

GroupHeatSpot <- function(object,
                          SJ,
                          flank = 200,
                          cells = NULL,
                          BamFiles,
                          BamPostfix,
                          logTrans = FALSE,
                          ignore.strand = TRUE,
                          ignore.SEorPE = FALSE,
                          singleEnd = FALSE,
                          antisense = TRUE,
                          strandMode = 2,
                          mapqFilter = 0,
                          geneModul,
                          shrinkage = 1,
                          high.col = "#0000FF80",
                          low.col =  "#FFFF0080",
                          curvature = -0.1,
                          reduce = FALSE,
                          rel_heights = c(1, 1, 1, 1),
                          highlight.region = NULL,
                          highlight.color = "#0000FF80",
                          highlight.label = NULL,
                          highlight.label.size = 4,
                          removeTxid = FALSE,
                          MinimumFractionForP = 0.05,
                          NT = 1,
                          ...) {
  if(!is(object, "ICASDataSet"))
    stop("The object must be a ICASDataSet data")

  if(length(SJ) != 1) {
    stop("SJ must be one SJ")
  }
  if(!SJ %in% row.names(psi(object)))
    stop("SJ must be in row.names of psi(object)")

  if(is.null(cells)) {
    cells <- row.names(colData(object))
  } else {
    cells <- cells[cells %in% row.names(colData(object))]
    if(length(cells) == 0) {
      stop("cells are not in row.names(colData(object))")
    }
  }

  if(!all(file.exists(BamFiles))) {
    stop("all BamFiles file exists are FALSE")
  }

  if(!identical(sort(cells), sort(gsub(BamPostfix, "", basename(BamFiles))))) {
    stop("Bamfiles are not cover your cells or maybe your BamPostfix are wrong")
  }

  if(!is.logical(ignore.strand)) {
    stop("ignore.strand must be logical value")
  }

  if(!is.logical(removeTxid)) {
    stop("removeTxid must be logical value")
  }

  if(!is.logical(singleEnd)) {
    stop("singleEnd must be logical value")
  }

  if(!is.logical(ignore.SEorPE)) {
    stop("ignore.SEorPE must be logical value")
  }

  if(!is.logical(logTrans)) {
    stop("logTrans must be logical value")
  }

  if(!is.logical(antisense)) {
    stop("antisense must be logical value")
  }

  if(length(strandMode) != 1) {
    stop("strandMode must be 1 or 2")
  }

  if(!is.element(strandMode, c(1, 2))) {
    stop("strandMode must be 1 or 2")
  }

  if(!is.numeric(mapqFilter)) {
    stop("mapqFilter must be a positive value")
  }

  if(mapqFilter < 0) {
    stop("mapqFilter must be a positive value")
  }

  if(!(is(geneModul, "TxDb") | is(geneModul, "GRangesList"))) {
    stop("geneModul must be a TxDb form GenomicFeatures or a GRangesList")
  }

  if(!is.numeric(flank)) {
    stop("flank must be a positive value")
  }

  if(flank < 0) {
    stop("flank must be a positive value")
  }

  if(!is.numeric(curvature)) {
    stop("curvature must be numeric value")
  }

  if(!is.numeric(NT)) {
    stop("NT must be a positive value")
  }

  if(!is.logical(reduce)) {
    stop("reduce must be logical value")
  }

  if(length(rel_heights) != 4) {
    stop("length of rel_heights must be 4")
  }

  if(any(!is.numeric(rel_heights))) {
    stop("rel_heights must be 4 positive numeric values")
  }

  if(any(rel_heights < 0)) {
    stop("rel_heights must be 4 positive numeric values")
  }

  if(NT <= 0) {
    stop("NT must be a positive value")
  }

  NT <- as.integer(NT)

  if(highlight.label.size <= 0) {
    stop("highlight.label.size must be a positive value")
  }

  highlight.label.size <- as.integer(highlight.label.size)


  if(!is.null(highlight.region)) {
    tryCatch(hlgr <- as(highlight.region, "GRanges"), error = function(e) stop("the highlight.region character vector must contain strings of the form 'chr:start-end' or 'chr:start-end:strand'"))

    if(any(width(hlgr) < 3)) {
      width(hlgr[width(hlgr) < 3]) <- 3
    }
  }

  rankcell <- row.names(colData(object)[order(colData(object)[[object@design]]), ])
  rankcell <- rankcell[rankcell %in% cells]

  ## AS event phase
  event <- SJEvent(object = object, SJ = SJ)
  region <- GenomicRanges::reduce(as(strsplit(event$ID, "@")[[1]], "GRanges"))
  BiocGenerics::start(region) <- BiocGenerics::start(region) - min(BiocGenerics::start(region), flank)
  BiocGenerics::end(region) <- BiocGenerics::end(region) + flank



  ## Fig4 gene structure

  p4 <- plotgene(object = object,
                 SJ = SJ,
                 flank = flank,
                 geneModul = geneModul,
                 shrinkage = shrinkage,
                 curvature = curvature,
                 reduce = reduce,
                 highlight.region = highlight.region,
                 highlight.color = highlight.color,
                 highlight.label = highlight.label,
                 highlight.label.size = highlight.label.size,
                 returnData = TRUE,
                 removeTxid = removeTxid)

  if(is.list(p4) & length(p4) == 2) {
    plast <- p4[[1]]
  } else {
    plast <- p4
  }

  ## BAM reads processing

  event <- SJEvent(object = object, SJ = SJ)
  region <- GenomicRanges::reduce(as(strsplit(event$ID, "@")[[1]], "GRanges"))
  BiocGenerics::start(region) <- BiocGenerics::start(region) - min(BiocGenerics::start(region), flank)
  BiocGenerics::end(region) <- BiocGenerics::end(region) + flank

  parallel::mclapply(BamFiles, function(x) {
    if(ignore.SEorPE) {
      suppressWarnings(as.numeric(ReadBam(BamFile = x,
                                          region = region,
                                          ignore.strand = TRUE,
                                          singleEnd = TRUE,
                                          antisense = antisense,
                                          strandMode = strandMode,
                                          mapqFilter = mapqFilter)))
    } else {
      suppressWarnings(as.numeric(ReadBam(BamFile = x,
                                          region = region,
                                          ignore.strand = ignore.strand,
                                          singleEnd = singleEnd,
                                          antisense = antisense,
                                          strandMode = strandMode,
                                          mapqFilter = mapqFilter,
                                          ...)))
    }
  }, mc.cores = NT) -> base_reads

  ## BAM reads normalization

  if(logTrans) {
    base_reads_norm <- lapply(base_reads, function(x) log1p(x)/log1p(max(x)))
  } else {
    base_reads_norm <- lapply(base_reads, function(x) x/max(x))
  }

  base_reads_norm <- do.call(rbind, base_reads_norm)
  colnames(base_reads_norm) <- BiocGenerics::start(region):BiocGenerics::end(region)
  row.names(base_reads_norm) <- gsub(BamPostfix, "", basename(BamFiles))
  base_reads_norm <- base_reads_norm[rankcell, ]

  if(is.list(p4) & length(p4) == 2) {
    base_reads_norm <- base_reads_norm[, as.character(p4[[2]]$x)]
    colnames(base_reads_norm) <- p4[[2]]$x2
  }

  ## Fig1 Heat map

  annoMat <- data.frame(cell = factor(rankcell, levels = rankcell),
                        group = SummarizedExperiment::colData(object)[rankcell, object@design],
                        pos = min(as.numeric(colnames(base_reads_norm))))

  lapply(seq_len(min(c(round(flank * 0.2), round(ncol(base_reads_norm)*0.01)))), function(i) {
    data.frame(cell = annoMat$cell, group = annoMat$group, pos = annoMat$pos + i)
  }) -> annoMat2
  annoMat2 <- do.call(rbind, annoMat2)

  base_reads_norm <- data.table::as.data.table(reshape2::melt(base_reads_norm))
  base_reads_norm[, Var1 := factor(Var1, levels = rankcell)]
  Mat1 <- base_reads_norm
  ggplot(Mat1, aes(x = Var2, y = as.numeric(Var1))) +
    geom_tile(aes(fill = value), show.legend = FALSE) +
    theme_classic() +
    scale_fill_gradient(high = high.col, low = low.col) +
    geom_point(data = annoMat2, aes(x = pos, y = as.numeric(cell), colour = group), shape = 15, size = 1) +
    theme(legend.position = "none",
          axis.line.x = element_blank(),
          axis.line.y = element_line(size = 0.1, color = "white"),
          axis.text.x = element_blank(),
          axis.text.y = element_text(size = 14),
          axis.title = element_blank(),
          axis.ticks = element_blank()) +
    guides(colour = guide_legend(override.aes = list(size = 3))) +
    scale_y_continuous(breaks = CumPos(table(annoMat$group)), labels = levels(annoMat$group))+
    scale_x_continuous(expand = c(0, 0)) -> p1

  ## Fig2 density plot

  base_reads_norm <- merge(annoMat[, c("cell", "group")], base_reads_norm, by.x = "cell", by.y = "Var1")
  base_reads_norm <- data.table::as.data.table(base_reads_norm)
  Mat2 <- base_reads_norm[, .(me = mean(value), v = sd(value)), by = list(group, Var2)]

  ggplot(Mat2, aes(x = Var2, y = me)) +
    geom_line(aes(colour = group)) +
    geom_ribbon(aes(x = Var2, ymin = me - v, ymax = me + v, fill = group), alpha = 0.2) +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          axis.text = element_text(size = 12),
          axis.title.y = element_text(size = 16),
          axis.text.x = element_blank(),
          axis.title.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.line.y = element_line(),
          legend.position = "none") +
    labs(y = "Scaled\nexpression") +
    scale_x_continuous(expand = c(0, 0)) -> p2

  ## Fig3 P value

  pos_rm <- base_reads_norm[, mean(value), by = c("Var2", "group")][, max(V1), by = "Var2"][V1 < MinimumFractionForP, Var2]

  Mat3 <- na.omit(base_reads_norm[, PosReadTest(depth = value, group = group), by = Var2])

  Mat3[Var2 %in% pos_rm, V1 := 1]

  ggplot(Mat3, aes(Var2, y = -log10(V1))) +
    geom_step() +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          axis.text = element_text(size = 12),
          axis.text.x = element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_text(size = 16),
          axis.ticks.x = element_blank(),
          axis.line.y = element_line(),
          legend.position = "none") +
    labs(y = "-log10(P)") +
    scale_x_continuous(expand = c(0, 0)) +
    geom_vline(xintercept = Mat3[V1 == 0, Var2], color = "white", size = 2) +
    geom_hline(yintercept = -log10(0.01), lty = 2) -> p3

  if(any(rel_heights == 0)) {
    ps <- list(p1, p2, p3, plast)
    ps <- ps[rel_heights != 0]

    if(length(ps) == 1) {
      print(ps[[1]])
    } else {
      if(length(ps) == 2) {
        ps[[1]] / ps[[2]] + plot_layout(heights = rel_heights[rel_heights != 0])
      } else {
        ps[[1]] / ps[[2]] / ps[[3]] + plot_layout(heights = rel_heights[rel_heights != 0])
      }
    }
  } else {
    (p1 / p2 / p3 / plast) + plot_layout(heights = rel_heights)
  }
}
tangchao7498/ICAS documentation built on Jan. 28, 2021, 3:56 p.m.