R/plotgene.R

Defines functions plotgene

Documented in plotgene

#' @title plotgene
#' @name plotgene
#' @description plotgene can be used to plot the gene structure and AS event
#' @importFrom ggbio autoplot
#' @importFrom ggbio theme_null
#' @importFrom ggbio theme_clear
#' @import GenomicFeatures
#' @importFrom GenomicRanges reduce
#' @param object an ICASDataSet
#' @param SJ a charactor of SJ
#' @param flank the distance of flanking extension
#' @param geneModul A TxDb object made from GTF used by BAM aligner or a GRangesList object
#' @param shrinkage the shrinkage for intron region (must be a positive >= 1). default 1 for no shrinkage
#' @param curvature the curvature for splice junction
#' @param reduce To collapse all gene structure annotation features or not
#' @param highlight.region the region to highlight
#' @param highlight.color the color of highlight.region
#' @param highlight.label the label of highlight.region
#' @param highlight.label.size the size of highlight region label
#' @param returnData logical value, indicating return data or plot
#' @param removeTxid logical value, indicating whether to remove the transcript of y axis lab
#'
#' @export

plotgene <- function(object,
                     SJ,
                     flank = 200,
                     geneModul,
                     shrinkage = 1,
                     curvature = -0.1,
                     reduce = FALSE,
                     highlight.region = NULL,
                     highlight.color = "#0000FF80",
                     highlight.label = NULL,
                     highlight.label.size = 4,
                     returnData = FALSE,
                     removeTxid = FALSE,
                     ...) {

  if(!is(object, "ICASDataSet"))
    stop("The object must be a ICASDataSet data")

  if(length(SJ) != 1) {
    stop("SJ must be one SJ")
  }
  if(!SJ %in% row.names(psi(object)))
    stop("SJ must be in row.names of psi(object)")

  if(!is.numeric(flank)) {
    stop("flank must be a positive value")
  }

  if(!(is(geneModul, "TxDb") | is(geneModul, "GRangesList"))) {
    stop("geneModul must be a TxDb form GenomicFeatures or a GRangesList")
  }

  if(shrinkage < 1) {
    stop("shrinkage must be >= 1")
  }

  if(flank < 0) {
    stop("flank must be a positive value")
  }

  if(!is.numeric(curvature)) {
    stop("curvature must be numeric value")
  }

  if(!is.logical(reduce)) {
    stop("reduce must be logical value")
  }

  if(is.null(highlight.label)) highlight.label <- NA

  if(highlight.label.size <= 0) {
    stop("highlight.label.size must be a positive value")
  }

  if(!is.logical(returnData)) {
    stop("returnData must be a logical value")
  }

  if(!is.logical(removeTxid)) {
    stop("removeTxid must be a logical value")
  }

  highlight.label.size <- as.integer(highlight.label.size)

  if(!is.null(highlight.region)) {
    tryCatch(hlgr <- as(highlight.region, "GRanges"), error = function(e) stop("the highlight.region character vector must contain strings of the form 'chr:start-end' or 'chr:start-end:strand'"))

    if(any(width(hlgr) < 3)) {
      width(hlgr[width(hlgr) < 3]) <- 3
    }
  }

  event <- SJEvent(object = object, SJ = SJ)
  region <- GenomicRanges::reduce(as(strsplit(event$ID, "@")[[1]], "GRanges"))
  BiocGenerics::start(region) <- BiocGenerics::start(region) - min(BiocGenerics::start(region), flank)
  BiocGenerics::end(region) <- BiocGenerics::end(region) + flank

  Mat5 <- data.table::as.data.table(as(strsplit(event$ID, "@")[[1]], "GRanges"))
  Mat6 <- data.table::as.data.table(as(SJ, "GRanges"))

  if(shrinkage == 1) { # original size
    if(is(geneModul, "TxDb")) { # From TxDb
      if(reduce) {
        p4 <- tryCatch(ggbio::autoplot(geneModul, which = region, stat = "reduce"), error = function(e) stop("maybe your TxDb external pointer is not valid"))
      } else {
        p4 <- tryCatch(ggbio::autoplot(geneModul, which = region), error = function(e) stop("maybe your TxDb external pointer is not valid"))
      }

      p4 <- p4 + ggbio::theme_null()
      p4 <- p4@ggplot

      ypos <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(max(x$y)))
      ypos <- max(unlist(ypos[!mapply(is.infinite, ypos)]))

      if(is.null(highlight.region)) { # No highlight.region

        p4 +
          geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos, yend = ypos), curvature = curvature) +
          geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos, yend = ypos), curvature = curvature, color = 2) +
          lims(y = c(0, ypos * 1.2)) +
          scale_x_continuous(expand = c(0, 0)) -> plast

      } else { # With highlight.region

        ypos_min <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(min(x$ymax)))
        ypos_min <- min(unlist(ypos_min[!mapply(is.infinite, ypos_min)]))

        p4 +
          geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos, yend = ypos), curvature = curvature) +
          geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos, yend = ypos), curvature = curvature, color = 2) +
          lims(y = c(0, ypos * 1.2)) +
          scale_x_continuous(expand = c(0, 0)) +
          geom_rect(data = as.data.frame(hlgr), aes(xmin = start, xmax = end, ymin = ypos_min, ymax = ypos), fill = highlight.color) +
          annotate(geom = "text", x = start(hlgr) + 0.5 * width(hlgr), y = ypos_min * 0.5, label = highlight.label, size = highlight.label.size) -> plast
      }

    } else { # From GRangesList

      if(reduce) geneModul <- GRangesList(reduce(unlist(geneModul)))

      lapply(geneModul, function(x) {
        GRanges(seqnames = unique(as.character(seqnames(x))),
                ranges = IRanges(start = min(start(x)), end = max(end(x))),
                strand = unique(as.character(strand(x))))
      }) -> tx_range
      if(is.null(names(tx_range))) names(tx_range) <- seq_along(tx_range)
      tx_range <- mapply(function(x) length(findOverlaps(query = region, subject = x, type = "within")), tx_range)
      tx_cover <- names(tx_range)[tx_range == 1]

      geneModul <- lapply(geneModul, function(x) x[subjectHits(findOverlaps(region, x))])
      geneModul <- geneModul[mapply(length, geneModul) > 0]

      if(length(geneModul) == 0) { # No exon in this region

        if(length(tx_cover) == 0) tx_cover <- "Ref"

        ggplot() +
          geom_hline(yintercept = seq_along(tx_cover)) +
          scale_y_continuous(breaks = seq_along(tx_cover), labels = tx_cover) +
          ggbio::theme_clear() +
          theme(axis.title = element_blank(), axis.ticks.y = element_blank()) -> p4

        if(is.null(highlight.region)) { # No highlight.region

          p4 +
            geom_curve(data = Mat5, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature) +
            geom_curve(data = Mat6, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature, color = 2) +
            scale_x_continuous(limits = c(start(region), end(region)), expand = c(0, 0)) +
            scale_y_continuous(limits = c(0, length(tx_cover) * 1.2), breaks = seq_along(tx_cover), labels = if(reduce) NULL else tx_cover) -> plast

        } else { # Add highlight.region

          p4 +
            geom_curve(data = Mat5, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature) +
            geom_curve(data = Mat6, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature, color = 2) +
            scale_x_continuous(limits = c(start(region), end(region)), expand = c(0, 0)) +
            geom_rect(data = as.data.frame(hlgr), aes(xmin = start, xmax = end, ymin = 0.9, ymax = length(tx_cover) + 0.1), fill = highlight.color) +
            annotate(geom = "text", x = start(hlgr) + 0.5 * width(hlgr), y = 0.5, label = highlight.label, size = highlight.label.size) +
            scale_y_continuous(limits = c(0, length(tx_cover) * 1.2), breaks = seq_along(tx_cover), labels = if(reduce) NULL else tx_cover) -> plast
        }

      } else { # With exon in this region

        geneModul <- lapply(geneModul, function(x) {
          start(x) <- MinMax(start(x), start(region), end(region))
          end(x) <- MinMax(end(x), start(region), end(region))
          return(x)
        })
        geneModul <- GRangesList(geneModul)

        p4 <- tryCatch(ggbio::autoplot(geneModul, group.selfish = removeTxid)@ggplot, error = function(e) ggplot())

        ypos <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(max(x$y)))
        ypos <- max(unlist(ypos[!mapply(is.infinite, ypos)]))

        if(is.null(highlight.region)) { # No highlight.region
          p4 +
            geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature) +
            geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature, color = 2) +
            scale_x_continuous(expand = c(0, 0)) +
            ggbio::theme_clear() +
            theme(axis.title = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank()) +
            scale_y_continuous(breaks = seq_along(geneModul), labels = names(geneModul), limits = c(0, ypos * 1.2)) -> plast

        } else { # Add highlight.region

          ypos_min <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(min(x$ymax)))
          ypos_min <- min(unlist(ypos_min[!mapply(is.infinite, ypos_min)]))

          p4 +
            geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature) +
            geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature, color = 2) +
            ggbio::theme_clear() +
            theme(axis.title = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank()) +
            geom_rect(data = as.data.frame(hlgr), aes(xmin = start, xmax = end, ymin = ypos_min * 0.9, ymax = ypos * 1.02), fill = highlight.color) +
            annotate(geom = "text", x = start(hlgr) + 0.5 * width(hlgr), y = ypos_min * 0.6, label = highlight.label, size = highlight.label.size) +
            scale_y_continuous(expand = c(0, 0), breaks = seq_along(geneModul), labels = names(geneModul), limits = c(0, ypos * 1.2)) -> plast
        }
      }
    }

  } else { # zoom out the intron
    stopifnot(shrinkage > 1)

    if(is(geneModul, "TxDb")) { # From TxDb
      ebt <- exonsBy(geneModul, by = "tx", use.names = T)
      if(is.element("gene_id", colnames(event))) {
        if(!is.na(event$gene_id)) {
          tbg <- transcriptsBy(x = geneModul, by = "gene")
          tx_tu <- tbg[event$gene_id][[1]]$tx_name
          geneModul <- ebt[tx_tu]
        } else {
          tx_tu <- GenomicFeatures::transcriptsByOverlaps(x = geneModul, ranges = region)$tx_name
          geneModul <- ebt[tx_tu]
        }
      } else {
        tx_tu <- GenomicFeatures::transcriptsByOverlaps(x = geneModul, ranges = region)$tx_name
        geneModul <- ebt[tx_tu]
      }
    } else { # From GRangesList
      geneModul <- geneModul
    }

    if(reduce) geneModul <- GRangesList(reduce(unlist(geneModul)))

    lapply(geneModul, function(x) {
      GRanges(seqnames = unique(as.character(seqnames(x))),
              ranges = IRanges(start = min(start(x)), end = max(end(x))),
              strand = unique(as.character(strand(x))))
    }) -> tx_range
    if(is.null(names(tx_range))) names(tx_range) <- seq_along(tx_range)
    tx_range <- mapply(function(x) length(findOverlaps(query = region, subject = x, type = "within")), tx_range)
    tx_cover <- names(tx_range)[tx_range == 1]

    geneModul <- lapply(geneModul, function(x) x[subjectHits(findOverlaps(region, x))])
    geneModul <- geneModul[mapply(length, geneModul) > 0]

    if(length(geneModul) == 0) { # No exon in this region

      if(length(tx_cover) == 0) tx_cover <- "Ref"

      ggplot() +
        geom_hline(yintercept = seq_along(tx_cover)) +
        scale_y_continuous(breaks = seq_along(tx_cover), labels = tx_cover) +
        ggbio::theme_clear() +
        theme(axis.title = element_blank(), axis.ticks.y = element_blank()) -> p4

      if(is.null(highlight.region)) { # No highlight.region

        p4 +
          geom_curve(data = Mat5, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature) +
          geom_curve(data = Mat6, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature, color = 2) +
          scale_x_continuous(limits = c(start(region), end(region)), expand = c(0, 0)) +
          scale_y_continuous(limits = c(0, length(tx_cover) * 1.2), breaks = seq_along(tx_cover), labels = if(reduce) NULL else tx_cover) -> plast

      } else { # Add highlight.region

        p4 +
          geom_curve(data = Mat5, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature) +
          geom_curve(data = Mat6, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature, color = 2) +
          scale_x_continuous(limits = c(start(region), end(region)), expand = c(0, 0)) +
          geom_rect(data = as.data.frame(hlgr), aes(xmin = start, xmax = end, ymin = 0.9, ymax = length(tx_cover) + 0.1), fill = highlight.color) +
          annotate(geom = "text", x = start(hlgr) + 0.5 * width(hlgr), y = 0.5, label = highlight.label, size = highlight.label.size) +
          scale_y_continuous(limits = c(0, length(tx_cover) * 1.2), breaks = seq_along(tx_cover), labels = if(reduce) NULL else tx_cover) -> plast
      }

    } else { # With exon in this region

      ## trim overhang exon region from here
      geneModul <- lapply(geneModul, function(x) {
        start(x) <- MinMax(start(x), start(region), end(region))
        end(x) <- MinMax(end(x), start(region), end(region))
        return(x)
      })
      geneModul <- GRangesList(geneModul)
      ## trim overhang exon region end

      ## down sampling for shrinkage from here
      ex_base <- coverage(reduce(unlist(geneModul)))[[unique(as.character(seqnames(reduce(unlist(geneModul)))))]]
      ex_base <- tryCatch(ex_base[start(region):end(region)], error = function(e) ex_base[start(region):length(ex_base)])
      if(length(ex_base) < width(region)) ex_base <- c(ex_base, rep(0, width(region) - length(ex_base)))

      ex_base_tab <- data.table::data.table(x = start(region):end(region),
                                            isExon = as.numeric(ex_base),
                                            bin = rep(seq_along(runValue(ex_base)), runLength(ex_base)))

      set.seed(547498)
      ex_base_tab_sub <- tryCatch(rbind(ex_base_tab[isExon == 0, .SD[sample(.N, .N/shrinkage, replace = FALSE), ], by = bin], ex_base_tab[isExon == 1, ]), error = function(e) {
        rbind(ex_base_tab[isExon == 0, .SD[sample(.N, .N/shrinkage, replace = TRUE), ], by = bin], ex_base_tab[isExon == 1, ])
      })

      sites_gtf <- c(start(reduce(unlist(geneModul))) - 1, end(reduce(unlist(geneModul))) + 1) # remaining splice site for SJ
      sites_sj <- c(start(as(strsplit(event$ID, "@")[[1]], "GRanges")), end(as(strsplit(event$ID, "@")[[1]], "GRanges")))
      sites <- union(sites_gtf, sites_sj)

      if(!is.null(highlight.region)) {
        sites <- union(sites, c(start(hlgr), end(hlgr)))
      }

      ex_base_tab_sub <- unique(rbind(ex_base_tab_sub, ex_base_tab[x %in% sites, ]))
      data.table::setkey(ex_base_tab_sub, x)
      ex_base_tab_sub$x2 <- min(ex_base_tab_sub$x):(min(ex_base_tab_sub$x) + nrow(ex_base_tab_sub) - 1)
      data.table::setkey(ex_base_tab_sub, x)

      for(i in seq_along(geneModul)) {
        start(geneModul[[i]]) <- ex_base_tab_sub[match(start(geneModul[[i]]), x), x2]
        end(geneModul[[i]]) <- ex_base_tab_sub[match(end(geneModul[[i]]), x), x2]
      }
      ## down sampling for shrinkage end

      p4 <- tryCatch(ggbio::autoplot(geneModul, group.selfish = removeTxid)@ggplot, error = function(e) ggplot())

      ypos <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(max(x$y)))
      ypos <- max(unlist(ypos[!mapply(is.infinite, ypos)]))

      Mat5$start <- ex_base_tab_sub[match(Mat5$start, x), x2]
      Mat6$start <- ex_base_tab_sub[match(Mat6$start, x), x2]
      Mat5$end <- ex_base_tab_sub[match(Mat5$end, x), x2]
      Mat6$end <- ex_base_tab_sub[match(Mat6$end, x), x2]

      x_wrong <- c(ex_base_tab_sub[bin == min(bin), round(mean(x2))], ex_base_tab_sub[bin == max(bin), round(mean(x2))])
      x_corect <- ex_base_tab_sub[match(x_wrong, x2), x]

      if(is.null(highlight.region)) { # No highlight.region
        p4 +
          geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature) +
          geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature, color = 2) +
          scale_x_continuous(expand = c(0, 0), breaks = x_wrong, labels = x_corect) +
          ggbio::theme_clear() +
          theme(axis.title = element_blank(),
                axis.line.y = element_blank(),
                axis.ticks.y = element_blank(),
                axis.line.x = element_blank(),
                axis.ticks.x = element_blank(),
                axis.text.x = element_blank()) +
          scale_y_continuous(breaks = seq_along(geneModul), labels = names(geneModul), limits = c(0, ypos * 1.2)) -> plast

      } else { # Add highlight.region

        start(hlgr) <- ex_base_tab_sub[match(start(hlgr), x), x2]
        end(hlgr) <- ex_base_tab_sub[match(end(hlgr), x), x2]

        ypos_min <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(min(x$ymax)))
        ypos_min <- min(unlist(ypos_min[!mapply(is.infinite, ypos_min)]))

        p4 +
          geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature) +
          geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature, color = 2) +
          ggbio::theme_clear() +
          theme(axis.title = element_blank(),
                axis.line.y = element_blank(),
                axis.ticks.y = element_blank(),
                axis.line.x = element_blank(),
                axis.ticks.x = element_blank(),
                axis.text.x = element_blank()) +
          geom_rect(data = as.data.frame(hlgr), aes(xmin = start, xmax = end, ymin = ypos_min * 0.9, ymax = ypos * 1.02), fill = highlight.color) +
          annotate(geom = "text", x = start(hlgr) + 0.5 * width(hlgr), y = ypos_min * 0.6, label = highlight.label, size = highlight.label.size) +
          scale_y_continuous(expand = c(0, 0), breaks = seq_along(geneModul), labels = names(geneModul), limits = c(0, ypos * 1.2)) +
          scale_x_continuous(expand = c(0, 0), breaks = x_wrong, labels = x_corect) -> plast
      }
    }
  }

  if(removeTxid) {
    plast <- plast + theme(axis.text.y = element_blank())
  }

  if(returnData) {
    res <- tryCatch(list(plast, ex_base_tab_sub), error = function(e) plast)
  } else {
    res <- plast
  }
  return(res)
}
tangchao7498/ICAS documentation built on Jan. 28, 2021, 3:56 p.m.