R/sashimi_plot.R

Defines functions plot_sashimi .get_exons_juncs_to_plot .plot_gene_track .plot_junc .plot_mutation

Documented in .get_exons_juncs_to_plot plot_sashimi

#' Visualise RNA-seq data in a the form of a sashimi plot
#'
#' \code{plot_sashimi} plots the splicing events and coverage over a specific
#' transcript and/or genomic region of interest. Centres around ggplot
#' functions.
#'
#' @param gene_tx_to_plot gr. Currently only accepts an ensembl transcript id
#'   that is found in the ref input. In future, this may be extended to plot
#'   genes, however need to think about how to plot the gene_track
#' @param region_to_plot gr. Only plot exons and juncs that overlap this region
#'   and are connected to \code{gene_tx_to_plot}
#' @param junc_metadata gr. Junction details. Essentially must contain the
#'   co-ordinates of the junctions to plot and annotation of junctions by
#'   acceptor/donor using \code{annotate_junc_ref}
#' @param junc_counts_case num. Vector containing the counts of cases with same
#'   order and length as \code{junc_metadata}
#' @param junc_counts_ctrl num. Vector containing the counts of ctrls same order
#'   and length as \code{junc_metadata}
#' @param coverage_case chr. Path to the bw object of case. Leave as NULL if you
#'   don't want this to be plotted.
#' @param ref gr. gtf annotation imported through \code{rtracklayer}.
#'
#' @return ggplot(s) displaying the splicing and coverage surrounding the
#'   transcript/region of interest.
#'
#' @export
plot_sashimi <- function(gene_tx_to_plot, region_to_plot = NULL, junc_metadata, junc_counts_case, mutation = NULL, junc_counts_ctrl = NULL,
                         coverage_case = NULL, coverage_ctrl = NULL, ref, add_junc_label = F, binwidth = 50){

  ##### Obtain the co-ordinates to plot #####

  exons_juncs_to_plot <- .get_exons_juncs_to_plot(gene_tx_to_plot, region_to_plot, junc_metadata, junc_counts_case, junc_counts_ctrl, ref)
  junc_metadata_to_plot <- junc_metadata[exons_juncs_to_plot$junc_indexes]

  chromosome_to_plot <- exons_juncs_to_plot$ref_exons %>% GenomicRanges::seqnames() %>% as.vector() %>% unique()
  strand_to_plot <- exons_juncs_to_plot$ref_exons %>% GenomicRanges::strand() %>% as.vector() %>% unique()
  gene_name_to_plot <- exons_juncs_to_plot$gene_name_to_plot

  min_start_coord <- c(GenomicRanges::start(exons_juncs_to_plot$ref_exons), GenomicRanges::start(junc_metadata_to_plot)) %>% min()
  max_end_coord <- c(GenomicRanges::end(exons_juncs_to_plot$ref_exons), GenomicRanges::end(junc_metadata_to_plot)) %>% max()
  range_start_end <- max_end_coord - min_start_coord
  xmin <- min_start_coord - range_start_end/30
  xmax <- max_end_coord + range_start_end/30

  ##### Plot exons and junctions #####

  gene_track_plot <- .plot_gene_track(ref_exons_to_plot = exons_juncs_to_plot$ref_exons,
                                      gene_tx_start = exons_juncs_to_plot$gene_tx_start,
                                      gene_tx_end = exons_juncs_to_plot$gene_tx_end,
                                      chromosome = chromosome_to_plot,
                                      xmin, xmax,
                                      add_strand = strand_to_plot)

  sashimi_plot_list <- list()

  # plot junctions for case
  sashimi_plot_list[[1]] <- .plot_junc(junc_metadata_to_plot = junc_metadata_to_plot,
                                       counts = junc_counts_case[exons_juncs_to_plot$junc_indexes],
                                       plot = gene_track_plot,
                                       colour = ggpubr::get_palette("jco", 1),
                                       add_junc_label = add_junc_label)

  # add mutation annotation for case
  if(!is.null(mutation)){

    sashimi_plot_list[[1]] <- .plot_mutation(plot = sashimi_plot_list[[1]], mutation = mutation)

  }

  # plot junctions for ctrl
  if(!is.null(junc_counts_ctrl)){

    sashimi_plot_list[[1]] <- sashimi_plot_list[[1]] +
      ylab("Case") +
      theme(axis.title.y = element_text(colour = "black", face = "bold", angle = 90))

    sashimi_plot_list[[2]] <- .plot_junc(junc_metadata_to_plot = junc_metadata_to_plot,
                                         counts = junc_counts_ctrl[exons_juncs_to_plot$junc_indexes],
                                         plot = gene_track_plot,
                                         colour = "#DC0000",
                                         add_junc_label = add_junc_label) +
      ylab("Control") +
      theme(axis.title.y = element_text(colour = "black", face = "bold", angle = 90))

  }

  # arrange plots into panels
  sashimi_plot <- ggpubr::ggarrange(plotlist = sashimi_plot_list,
                                    ncol = 1,
                                    nrow = length(sashimi_plot_list),
                                    common.legend = T,
                                    align = "v",
                                    legend = "bottom")

  ##### Plot coverage #####

  if(!is.null(coverage_case)){

    coverage_plot <-
    plot_coverage(coverage_case = coverage_case,
                  coverage_ctrl = coverage_ctrl,
                  region_to_plot = GenomicRanges::GRanges(stringr::str_c(chromosome_to_plot, ":", xmin, "-", xmax)),
                  binwidth = binwidth) +
      theme(axis.title.x = element_blank())

    coverage_plot_leg <- ggpubr::get_legend(coverage_plot)
    sashimi_plot_plot_leg <- ggpubr::get_legend(sashimi_plot_list[[1]])

    # arrange plots into panels
    sashimi_plot <- ggpubr::ggarrange(plotlist = c(list(coverage_plot), sashimi_plot_list),
                                      ncol = 1,
                                      nrow = length(c(list(coverage_plot), sashimi_plot_list)),
                                      align = "v",
                                      legend = "none",
                                      heights = c(1, rep(1.25, length(sashimi_plot_list))))

    # add legends
    sashimi_plot <- ggpubr::ggarrange(plotlist = list(coverage_plot_leg, sashimi_plot, sashimi_plot_plot_leg),
                                      ncol = 1,
                                      nrow = 3,
                                      heights = c(0.75, 10, 0.75))

  }

  # Add annotation
  sashimi_plot <- sashimi_plot %>%
    ggpubr::annotate_figure(top = ggpubr::text_grob(stringr::str_c("Chromosome ", chromosome_to_plot, ", ",
                                                                   ifelse(gene_name_to_plot == gene_tx_to_plot,
                                                                          gene_name_to_plot,
                                                                          stringr::str_c(gene_name_to_plot, ", ", gene_tx_to_plot)),
                                                                   ", strand: ", strand_to_plot),
                                                    x = 0.98, face = "italic", size = 10, just = "right"))

  return(sashimi_plot)

}

#' Obtain the details of the exons/junctions to plot
#'
#' \code{.get_exons_juncs_to_plot} plots the splicing events and coverage over a specific
#' transcript and/or genomic region of interest. Centres around ggplot
#' functions.
#'
#' @inheritParams plot_sashimi
#'
#' @return
#'
#' @export
.get_exons_juncs_to_plot <- function(gene_tx_to_plot, region_to_plot, junc_metadata, junc_counts_case, junc_counts_ctrl, ref){

  # obtain the exons and juncs connected to that transcript
  ref_exons_to_plot <- ref[ref$type == "exon"]

  if(stringr::str_detect(gene_tx_to_plot, "ENST")){

    col_to_filter <- "transcript_id"

  }else if(stringr::str_detect(gene_tx_to_plot, "ENSG")){

    col_to_filter <- "gene_id"

  }else{

    col_to_filter <- "gene_name"

  }

  ref_exons_to_plot <- ref[ref$type == "exon" & GenomicRanges::mcols(ref)[[col_to_filter]] == gene_tx_to_plot]
  gene_name_to_plot <- ref_exons_to_plot$gene_name %>% unique()
  ref_exons_to_plot <- ref_exons_to_plot %>%  GenomicRanges::disjoin()

  # currently the any() used here may be too liberal, especially for overlapping genes
  # but may be okay, since juncs need to precisely match the exon boundary
  junc_indexes_to_plot <-
    which(any(GenomicRanges::mcols(junc_metadata)[[stringr::str_c(col_to_filter, "_start")]] == gene_tx_to_plot) |
            any(GenomicRanges::mcols(junc_metadata)[[stringr::str_c(col_to_filter, "_end")]] == gene_tx_to_plot))

  gene_tx_start <- ref_exons_to_plot %>% GenomicRanges::start() %>% min()
  gene_tx_end <- ref_exons_to_plot %>% GenomicRanges::end() %>% max()

  # if a region inputted, find only juncs and exons overlapping that region
  if(!is.null(region_to_plot)){

    if(length(region_to_plot) != 1) stop("region_to_plot must be 1 detail genomic range...")

    overlapping_exons <- GenomicRanges::findOverlaps(query = ref_exons_to_plot, subject = region_to_plot)
    ref_exons_to_plot <- ref_exons_to_plot[S4Vectors::queryHits(overlapping_exons)]

    overlapping_juncs <- GenomicRanges::findOverlaps(query = junc_metadata, subject = region_to_plot)
    junc_indexes_to_plot <- junc_indexes_to_plot[junc_indexes_to_plot %in% S4Vectors::queryHits(overlapping_juncs)]

  }

  # only taking the junc that are expressed with at least > 0 in either the case OR ctrl
  if(!is.null(junc_counts_ctrl)){

    junc_indexes_count_filter <- which(round(junc_counts_case, 2) > 0 | round(junc_counts_ctrl, 2) > 0)

  }else{

    junc_indexes_count_filter <- which(round(junc_counts_case, 2) > 0)

  }

  junc_indexes_to_plot <-
    junc_indexes_to_plot[junc_indexes_to_plot %in% junc_indexes_count_filter]

  if(length(ref_exons_to_plot) == 0) stop("no exons found to plot...")
  if(length(junc_indexes_to_plot) == 0) stop("no junctions found to plot...")

  exons_juncs_to_plot <-
    list(ref_exons = ref_exons_to_plot,
         gene_name_to_plot = gene_name_to_plot,
         gene_tx_start = gene_tx_start,
         gene_tx_end = gene_tx_end,
         junc_indexes = junc_indexes_to_plot)

  return(exons_juncs_to_plot)

}

.plot_gene_track <- function(ref_exons_to_plot, gene_tx_start, gene_tx_end, chromosome, xmin, xmax, add_strand = NULL){

  ref_exons_to_plot_df <-
    ref_exons_to_plot %>%
    GenomicRanges::ranges() %>%
    as.data.frame()

  # extend line to the end of the plot if transcript end falls outside xmin/xmax
  segment_x_start <- ifelse(gene_tx_start < xmin, xmin, gene_tx_start)
  segment_x_end <- ifelse(gene_tx_end > xmax, xmax, gene_tx_end)

  gene_track <-
    ggplot() +
    geom_segment(aes(x = segment_x_start, xend = segment_x_end,
                     y = 0, yend = 0),
                 size = 2) +
    geom_rect(data = ref_exons_to_plot_df,
              aes(xmin = start, xmax = end,
                  ymin = -0.25, ymax = 0.25),
              colour = "black", , fill = ggpubr::get_palette("Greys", 10)[4]) +
    scale_y_continuous(limits = c(-1, 1)) +
    scale_x_continuous(name = stringr::str_c("Chromosome ", chromosome),
                       limits = c(xmin, xmax)) +
    ggpubr::theme_pubclean(flip = T) +
    theme(axis.line.y = element_blank(),
          axis.ticks = element_blank(),
          axis.text.y = element_blank(),
          axis.title.y = element_blank())

  if(!is.null(add_strand)){

    gene_track <- gene_track +
      geom_segment(aes(x = ifelse(add_strand == "+",
                                  segment_x_start,
                                  segment_x_end),
                       xend = ifelse(add_strand == "+",
                                     segment_x_start + (segment_x_end - segment_x_start)/30,
                                     segment_x_end - (segment_x_end - segment_x_start)/30),
                       y = 0.65, yend = 0.65),
                   size = 0.75, arrow = arrow(length = unit(0.3, units = "cm")))

  }

  return(gene_track)

}

.plot_junc <- function(junc_metadata_to_plot, counts, plot, colour, ymin = -0.5, ymax = 0.5, ncp = 25,
                       add_junc_label = T){

  # filter for juncs with count > 1, otherwise these have a geom_label but with no geom_path
  # add a junc_id to group juncs for plotting
  junc_metadata_to_plot <- junc_metadata_to_plot %>%
    as.data.frame() %>%
    dplyr::mutate(count = round(counts, 2), # for better vis, does mean e.g. 0.004 will be rounded to 0 and filtered
                  junc_id = dplyr::row_number())

  # using the function from grid, we calculate the points to plot the curve for each junction
  # without this (using geom_curve), there's no way of knowing the midpoint of y to add a label
  junc_ctrl_points <- grid:::calcControlPoints(x1 = junc_metadata_to_plot$start, x2 = junc_metadata_to_plot$end,
                                               y1 = (ymax + ymin)/2, y2 = (ymax + ymin)/2,
                                               angle = 90, curvature = -0.5,
                                               ncp = ncp) # how many ctrl points per junc?

  # format ctrl points, they come in an unnamed vector of x and y points
  # so let's add a junc_id to these
  junc_ctrl_points <-
    dplyr::tibble(pos = junc_ctrl_points$x,
                  y = junc_ctrl_points$y,
                  junc_id = junc_metadata_to_plot$junc_id %>%
                    rep(times = ncp) %>% # repeat these indexes the same number of ctrl points
                    sort())

  # add the intial start/end points, since these are not return from the ctrl points
  junc_ctrl_points <-
    dplyr::tibble(pos = c(junc_metadata_to_plot$start, junc_metadata_to_plot$end),
                  y = (ymax + ymin)/2, # start from mid of exons
                  junc_id = rep(junc_metadata_to_plot$junc_id, 2)) %>%
    dplyr::bind_rows(junc_ctrl_points)

  # mark the midpoints to label with the count
  .mark_mid <- function(x){

    mid <- min(x) + (max(x) - min(x))/2

    # the point with the lowest absolute diff from the mid
    mid_index <- which.min(abs(x - mid))

    mid_marked <- logical(length = length(x))
    mid_marked[mid_index] <- TRUE

    return(mid_marked)

  }

  junc_ctrl_points <-
    junc_ctrl_points %>%
    dplyr::group_by(junc_id) %>%
    dplyr::mutate(mid = .mark_mid(pos)) %>%
    dplyr::ungroup() %>%
    mutate(y = y/max(y), # normalise y so values should always sit between -1 and 1
           y = ifelse(junc_id %% 2 == 0, -y, y)) # neg even juncs to be plotted on bottom

  # also join with the the counts for label and size
  # use annotation of acceptor/donor for colour if it's available
  use_annot_ref <- "annot_ref" %in% colnames(junc_metadata_to_plot)

  junc_ctrl_points <- junc_ctrl_points %>%
    dplyr::left_join(junc_metadata_to_plot %>%
                       dplyr::select(one_of(c("junc_id", "count", if(use_annot_ref) "annot_ref" else NULL))),
                     by = "junc_id") %>%
    dplyr::mutate(linetype = ifelse(count == 0, 2, 1)) %>% # make the 0 count lines dashed
    arrange(junc_id, pos)

  if(use_annot_ref){

    junc_ctrl_points <- junc_ctrl_points %>%
      dplyr::mutate(annot_ref = annot_ref %>% factor(levels = c("annotated", "exon_skip", "novel_acceptor", "novel_donor", "none")))

  }

  sashimi_plot <- plot +
    geom_path(data = junc_ctrl_points,
              aes(x = pos, y = y,
                  group = as.factor(junc_id),
                  size = count,
                  colour = if(use_annot_ref) junc_ctrl_points$annot_ref else "arb_colour_cat"),
              lineend = "round",
              linetype = junc_ctrl_points$linetype) +
    scale_size_continuous(range = c(0.1, 2), # not starting from 0 so even 0 count plots will be plotted
                          limits = c(0, 1),
                          guide = "none") +
    scale_colour_manual(name = "Acceptor/donor annotation",
                        values = if(use_annot_ref) c("#0073C2", "#ffa500", "#DC0000", "#00A087", "grey") else colour,
                        drop = F) +
    theme(legend.key = element_rect(colour = "black", fill = "white"),
          axis.title.x = element_blank())

  if(add_junc_label){

    sashimi_plot <- sashimi_plot +
      ggrepel::geom_label_repel(data = junc_ctrl_points %>% dplyr::filter(mid),
                                aes(x = pos, y = y,
                                    label = count,
                                    colour = if(use_annot_ref) junc_metadata_to_plot$annot_ref else "arb_colour_cat"),
                                min.segment.length = 0, # always show line between label and junction
                                seed = 32,
                                show.legend = F,
                                size = 3.5)

  }

  return(sashimi_plot)

}

.plot_mutation <- function(plot, mutation){

  plot <- plot +
    geom_point(aes(x = GenomicRanges::start(mutation), y = 0),
               colour = "red", shape = 4, size = 4)
    # geom_segment(aes(x = GenomicRanges::start(mutation),
    #                  xend = GenomicRanges::start(mutation),
    #                  y = 1,
    #                  yend = 0),
    #              colour = "red", linetype = 2)

  return(plot)

}
dzhang32/RNAdiagnosR documentation built on Dec. 5, 2019, 2 a.m.