#' Visualise RNA-seq data in a the form of a sashimi plot
#'
#' \code{plot_sashimi} plots the splicing events and coverage over a specific
#' transcript and/or genomic region of interest. Centres around ggplot
#' functions.
#'
#' @param gene_tx_to_plot gr. Currently only accepts an ensembl transcript id
#' that is found in the ref input. In future, this may be extended to plot
#' genes, however need to think about how to plot the gene_track
#' @param region_to_plot gr. Only plot exons and juncs that overlap this region
#' and are connected to \code{gene_tx_to_plot}
#' @param junc_metadata gr. Junction details. Essentially must contain the
#' co-ordinates of the junctions to plot and annotation of junctions by
#' acceptor/donor using \code{annotate_junc_ref}
#' @param junc_counts_case num. Vector containing the counts of cases with same
#' order and length as \code{junc_metadata}
#' @param junc_counts_ctrl num. Vector containing the counts of ctrls same order
#' and length as \code{junc_metadata}
#' @param coverage_case chr. Path to the bw object of case. Leave as NULL if you
#' don't want this to be plotted.
#' @param ref gr. gtf annotation imported through \code{rtracklayer}.
#'
#' @return ggplot(s) displaying the splicing and coverage surrounding the
#' transcript/region of interest.
#'
#' @export
plot_sashimi <- function(gene_tx_to_plot, region_to_plot = NULL, junc_metadata, junc_counts_case, mutation = NULL, junc_counts_ctrl = NULL,
coverage_case = NULL, coverage_ctrl = NULL, ref, add_junc_label = F, binwidth = 50){
##### Obtain the co-ordinates to plot #####
exons_juncs_to_plot <- .get_exons_juncs_to_plot(gene_tx_to_plot, region_to_plot, junc_metadata, junc_counts_case, junc_counts_ctrl, ref)
junc_metadata_to_plot <- junc_metadata[exons_juncs_to_plot$junc_indexes]
chromosome_to_plot <- exons_juncs_to_plot$ref_exons %>% GenomicRanges::seqnames() %>% as.vector() %>% unique()
strand_to_plot <- exons_juncs_to_plot$ref_exons %>% GenomicRanges::strand() %>% as.vector() %>% unique()
gene_name_to_plot <- exons_juncs_to_plot$gene_name_to_plot
min_start_coord <- c(GenomicRanges::start(exons_juncs_to_plot$ref_exons), GenomicRanges::start(junc_metadata_to_plot)) %>% min()
max_end_coord <- c(GenomicRanges::end(exons_juncs_to_plot$ref_exons), GenomicRanges::end(junc_metadata_to_plot)) %>% max()
range_start_end <- max_end_coord - min_start_coord
xmin <- min_start_coord - range_start_end/30
xmax <- max_end_coord + range_start_end/30
##### Plot exons and junctions #####
gene_track_plot <- .plot_gene_track(ref_exons_to_plot = exons_juncs_to_plot$ref_exons,
gene_tx_start = exons_juncs_to_plot$gene_tx_start,
gene_tx_end = exons_juncs_to_plot$gene_tx_end,
chromosome = chromosome_to_plot,
xmin, xmax,
add_strand = strand_to_plot)
sashimi_plot_list <- list()
# plot junctions for case
sashimi_plot_list[[1]] <- .plot_junc(junc_metadata_to_plot = junc_metadata_to_plot,
counts = junc_counts_case[exons_juncs_to_plot$junc_indexes],
plot = gene_track_plot,
colour = ggpubr::get_palette("jco", 1),
add_junc_label = add_junc_label)
# add mutation annotation for case
if(!is.null(mutation)){
sashimi_plot_list[[1]] <- .plot_mutation(plot = sashimi_plot_list[[1]], mutation = mutation)
}
# plot junctions for ctrl
if(!is.null(junc_counts_ctrl)){
sashimi_plot_list[[1]] <- sashimi_plot_list[[1]] +
ylab("Case") +
theme(axis.title.y = element_text(colour = "black", face = "bold", angle = 90))
sashimi_plot_list[[2]] <- .plot_junc(junc_metadata_to_plot = junc_metadata_to_plot,
counts = junc_counts_ctrl[exons_juncs_to_plot$junc_indexes],
plot = gene_track_plot,
colour = "#DC0000",
add_junc_label = add_junc_label) +
ylab("Control") +
theme(axis.title.y = element_text(colour = "black", face = "bold", angle = 90))
}
# arrange plots into panels
sashimi_plot <- ggpubr::ggarrange(plotlist = sashimi_plot_list,
ncol = 1,
nrow = length(sashimi_plot_list),
common.legend = T,
align = "v",
legend = "bottom")
##### Plot coverage #####
if(!is.null(coverage_case)){
coverage_plot <-
plot_coverage(coverage_case = coverage_case,
coverage_ctrl = coverage_ctrl,
region_to_plot = GenomicRanges::GRanges(stringr::str_c(chromosome_to_plot, ":", xmin, "-", xmax)),
binwidth = binwidth) +
theme(axis.title.x = element_blank())
coverage_plot_leg <- ggpubr::get_legend(coverage_plot)
sashimi_plot_plot_leg <- ggpubr::get_legend(sashimi_plot_list[[1]])
# arrange plots into panels
sashimi_plot <- ggpubr::ggarrange(plotlist = c(list(coverage_plot), sashimi_plot_list),
ncol = 1,
nrow = length(c(list(coverage_plot), sashimi_plot_list)),
align = "v",
legend = "none",
heights = c(1, rep(1.25, length(sashimi_plot_list))))
# add legends
sashimi_plot <- ggpubr::ggarrange(plotlist = list(coverage_plot_leg, sashimi_plot, sashimi_plot_plot_leg),
ncol = 1,
nrow = 3,
heights = c(0.75, 10, 0.75))
}
# Add annotation
sashimi_plot <- sashimi_plot %>%
ggpubr::annotate_figure(top = ggpubr::text_grob(stringr::str_c("Chromosome ", chromosome_to_plot, ", ",
ifelse(gene_name_to_plot == gene_tx_to_plot,
gene_name_to_plot,
stringr::str_c(gene_name_to_plot, ", ", gene_tx_to_plot)),
", strand: ", strand_to_plot),
x = 0.98, face = "italic", size = 10, just = "right"))
return(sashimi_plot)
}
#' Obtain the details of the exons/junctions to plot
#'
#' \code{.get_exons_juncs_to_plot} plots the splicing events and coverage over a specific
#' transcript and/or genomic region of interest. Centres around ggplot
#' functions.
#'
#' @inheritParams plot_sashimi
#'
#' @return
#'
#' @export
.get_exons_juncs_to_plot <- function(gene_tx_to_plot, region_to_plot, junc_metadata, junc_counts_case, junc_counts_ctrl, ref){
# obtain the exons and juncs connected to that transcript
ref_exons_to_plot <- ref[ref$type == "exon"]
if(stringr::str_detect(gene_tx_to_plot, "ENST")){
col_to_filter <- "transcript_id"
}else if(stringr::str_detect(gene_tx_to_plot, "ENSG")){
col_to_filter <- "gene_id"
}else{
col_to_filter <- "gene_name"
}
ref_exons_to_plot <- ref[ref$type == "exon" & GenomicRanges::mcols(ref)[[col_to_filter]] == gene_tx_to_plot]
gene_name_to_plot <- ref_exons_to_plot$gene_name %>% unique()
ref_exons_to_plot <- ref_exons_to_plot %>% GenomicRanges::disjoin()
# currently the any() used here may be too liberal, especially for overlapping genes
# but may be okay, since juncs need to precisely match the exon boundary
junc_indexes_to_plot <-
which(any(GenomicRanges::mcols(junc_metadata)[[stringr::str_c(col_to_filter, "_start")]] == gene_tx_to_plot) |
any(GenomicRanges::mcols(junc_metadata)[[stringr::str_c(col_to_filter, "_end")]] == gene_tx_to_plot))
gene_tx_start <- ref_exons_to_plot %>% GenomicRanges::start() %>% min()
gene_tx_end <- ref_exons_to_plot %>% GenomicRanges::end() %>% max()
# if a region inputted, find only juncs and exons overlapping that region
if(!is.null(region_to_plot)){
if(length(region_to_plot) != 1) stop("region_to_plot must be 1 detail genomic range...")
overlapping_exons <- GenomicRanges::findOverlaps(query = ref_exons_to_plot, subject = region_to_plot)
ref_exons_to_plot <- ref_exons_to_plot[S4Vectors::queryHits(overlapping_exons)]
overlapping_juncs <- GenomicRanges::findOverlaps(query = junc_metadata, subject = region_to_plot)
junc_indexes_to_plot <- junc_indexes_to_plot[junc_indexes_to_plot %in% S4Vectors::queryHits(overlapping_juncs)]
}
# only taking the junc that are expressed with at least > 0 in either the case OR ctrl
if(!is.null(junc_counts_ctrl)){
junc_indexes_count_filter <- which(round(junc_counts_case, 2) > 0 | round(junc_counts_ctrl, 2) > 0)
}else{
junc_indexes_count_filter <- which(round(junc_counts_case, 2) > 0)
}
junc_indexes_to_plot <-
junc_indexes_to_plot[junc_indexes_to_plot %in% junc_indexes_count_filter]
if(length(ref_exons_to_plot) == 0) stop("no exons found to plot...")
if(length(junc_indexes_to_plot) == 0) stop("no junctions found to plot...")
exons_juncs_to_plot <-
list(ref_exons = ref_exons_to_plot,
gene_name_to_plot = gene_name_to_plot,
gene_tx_start = gene_tx_start,
gene_tx_end = gene_tx_end,
junc_indexes = junc_indexes_to_plot)
return(exons_juncs_to_plot)
}
.plot_gene_track <- function(ref_exons_to_plot, gene_tx_start, gene_tx_end, chromosome, xmin, xmax, add_strand = NULL){
ref_exons_to_plot_df <-
ref_exons_to_plot %>%
GenomicRanges::ranges() %>%
as.data.frame()
# extend line to the end of the plot if transcript end falls outside xmin/xmax
segment_x_start <- ifelse(gene_tx_start < xmin, xmin, gene_tx_start)
segment_x_end <- ifelse(gene_tx_end > xmax, xmax, gene_tx_end)
gene_track <-
ggplot() +
geom_segment(aes(x = segment_x_start, xend = segment_x_end,
y = 0, yend = 0),
size = 2) +
geom_rect(data = ref_exons_to_plot_df,
aes(xmin = start, xmax = end,
ymin = -0.25, ymax = 0.25),
colour = "black", , fill = ggpubr::get_palette("Greys", 10)[4]) +
scale_y_continuous(limits = c(-1, 1)) +
scale_x_continuous(name = stringr::str_c("Chromosome ", chromosome),
limits = c(xmin, xmax)) +
ggpubr::theme_pubclean(flip = T) +
theme(axis.line.y = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank())
if(!is.null(add_strand)){
gene_track <- gene_track +
geom_segment(aes(x = ifelse(add_strand == "+",
segment_x_start,
segment_x_end),
xend = ifelse(add_strand == "+",
segment_x_start + (segment_x_end - segment_x_start)/30,
segment_x_end - (segment_x_end - segment_x_start)/30),
y = 0.65, yend = 0.65),
size = 0.75, arrow = arrow(length = unit(0.3, units = "cm")))
}
return(gene_track)
}
.plot_junc <- function(junc_metadata_to_plot, counts, plot, colour, ymin = -0.5, ymax = 0.5, ncp = 25,
add_junc_label = T){
# filter for juncs with count > 1, otherwise these have a geom_label but with no geom_path
# add a junc_id to group juncs for plotting
junc_metadata_to_plot <- junc_metadata_to_plot %>%
as.data.frame() %>%
dplyr::mutate(count = round(counts, 2), # for better vis, does mean e.g. 0.004 will be rounded to 0 and filtered
junc_id = dplyr::row_number())
# using the function from grid, we calculate the points to plot the curve for each junction
# without this (using geom_curve), there's no way of knowing the midpoint of y to add a label
junc_ctrl_points <- grid:::calcControlPoints(x1 = junc_metadata_to_plot$start, x2 = junc_metadata_to_plot$end,
y1 = (ymax + ymin)/2, y2 = (ymax + ymin)/2,
angle = 90, curvature = -0.5,
ncp = ncp) # how many ctrl points per junc?
# format ctrl points, they come in an unnamed vector of x and y points
# so let's add a junc_id to these
junc_ctrl_points <-
dplyr::tibble(pos = junc_ctrl_points$x,
y = junc_ctrl_points$y,
junc_id = junc_metadata_to_plot$junc_id %>%
rep(times = ncp) %>% # repeat these indexes the same number of ctrl points
sort())
# add the intial start/end points, since these are not return from the ctrl points
junc_ctrl_points <-
dplyr::tibble(pos = c(junc_metadata_to_plot$start, junc_metadata_to_plot$end),
y = (ymax + ymin)/2, # start from mid of exons
junc_id = rep(junc_metadata_to_plot$junc_id, 2)) %>%
dplyr::bind_rows(junc_ctrl_points)
# mark the midpoints to label with the count
.mark_mid <- function(x){
mid <- min(x) + (max(x) - min(x))/2
# the point with the lowest absolute diff from the mid
mid_index <- which.min(abs(x - mid))
mid_marked <- logical(length = length(x))
mid_marked[mid_index] <- TRUE
return(mid_marked)
}
junc_ctrl_points <-
junc_ctrl_points %>%
dplyr::group_by(junc_id) %>%
dplyr::mutate(mid = .mark_mid(pos)) %>%
dplyr::ungroup() %>%
mutate(y = y/max(y), # normalise y so values should always sit between -1 and 1
y = ifelse(junc_id %% 2 == 0, -y, y)) # neg even juncs to be plotted on bottom
# also join with the the counts for label and size
# use annotation of acceptor/donor for colour if it's available
use_annot_ref <- "annot_ref" %in% colnames(junc_metadata_to_plot)
junc_ctrl_points <- junc_ctrl_points %>%
dplyr::left_join(junc_metadata_to_plot %>%
dplyr::select(one_of(c("junc_id", "count", if(use_annot_ref) "annot_ref" else NULL))),
by = "junc_id") %>%
dplyr::mutate(linetype = ifelse(count == 0, 2, 1)) %>% # make the 0 count lines dashed
arrange(junc_id, pos)
if(use_annot_ref){
junc_ctrl_points <- junc_ctrl_points %>%
dplyr::mutate(annot_ref = annot_ref %>% factor(levels = c("annotated", "exon_skip", "novel_acceptor", "novel_donor", "none")))
}
sashimi_plot <- plot +
geom_path(data = junc_ctrl_points,
aes(x = pos, y = y,
group = as.factor(junc_id),
size = count,
colour = if(use_annot_ref) junc_ctrl_points$annot_ref else "arb_colour_cat"),
lineend = "round",
linetype = junc_ctrl_points$linetype) +
scale_size_continuous(range = c(0.1, 2), # not starting from 0 so even 0 count plots will be plotted
limits = c(0, 1),
guide = "none") +
scale_colour_manual(name = "Acceptor/donor annotation",
values = if(use_annot_ref) c("#0073C2", "#ffa500", "#DC0000", "#00A087", "grey") else colour,
drop = F) +
theme(legend.key = element_rect(colour = "black", fill = "white"),
axis.title.x = element_blank())
if(add_junc_label){
sashimi_plot <- sashimi_plot +
ggrepel::geom_label_repel(data = junc_ctrl_points %>% dplyr::filter(mid),
aes(x = pos, y = y,
label = count,
colour = if(use_annot_ref) junc_metadata_to_plot$annot_ref else "arb_colour_cat"),
min.segment.length = 0, # always show line between label and junction
seed = 32,
show.legend = F,
size = 3.5)
}
return(sashimi_plot)
}
.plot_mutation <- function(plot, mutation){
plot <- plot +
geom_point(aes(x = GenomicRanges::start(mutation), y = 0),
colour = "red", shape = 4, size = 4)
# geom_segment(aes(x = GenomicRanges::start(mutation),
# xend = GenomicRanges::start(mutation),
# y = 1,
# yend = 0),
# colour = "red", linetype = 2)
return(plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.