# Exported wrapper functions to plot coverage plots from command line:
#' RNA-seq Coverage Plots and Genome Tracks
#'
#' Generate plotly / ggplot RNA-seq genome and coverage plots from command line.
#' Note that these are legacy functions. More expansive functionality is
#' available using [getCoverageData] / [getPlotObject] / [plotView] functions.
#'
#' @details
#' In RNA sequencing, alignments to spliced transcripts will "skip" over genomic
#' regions of introns. This can be illustrated in a plot using a horizontal
#' genomic axis, with the vertical axis representing the number of alignments
#' covering each nucleotide. As a result, the coverage "hills" represent the
#' expression of exons, and "valleys" to introns.
#'
#' Different alternatively-spliced isoforms thus produce different coverage
#' patterns. The change in the coverage across an alternate exon relative to its
#' constitutively-included flanking exons, for example, represents its
#' alternative inclusion or skipping. Similarly, elevation of intron
#' valleys represent increased intron retention.
#'
#' With multiple replicates per sample, coverage is dependent on
#' library size and gene expression. To compare
#' alternative splicing ratios, normalisation of the coverage of the alternate
#' exon (or alternatively retained intron) relative to their constitutive
#' flanking exons, is required. There is no established method for this
#' normalisation, and can be confounded in situations where flanking elements
#' are themselves alternatively spliced.
#'
#' SpliceWiz performs this coverage normalisation using the same method as its
#' estimate of spliced / intronic transcript abundance using the `SpliceOver`
#' method (see details section in [collateData]). This normalisation can be
#' applied to correct for library size and gene expression differences between
#' samples of the same experimental condition. After normalisation, mean and
#' variance of coverage can be computed as ratios relative to total transcript
#' abundance. This method can visualise alternatively included genomic regions
#' including casette exons, alternate splice site usage, and intron retention.
#'
#' `plotCoverage` generates plots showing depth of alignments to
#' the genomic axis. Plots can be generated for individual samples or samples
#' grouped by experimental conditions. In the latter, mean and 95% confidence
#' intervals are shown.
#'
#' `plotGenome` generates genome transcript tracks only. Protein-coding
#' regions are denoted by thick rectangles, whereas non-protein coding
#' transcripts or untranslated regions are denoted with thin rectangles.
#' Introns are denoted as lines.
#'
#' @param se A \linkS4class{NxtSE} object, created by [makeSE].
#' COV files must be linked to the NxtSE object. To do this, see the example
#' in [makeSE]. Required by `plotCoverage`. Not required by `plotGenome` if
#' `reference_path` is supplied.
#' @param reference_path The path of the reference generated by
#' [Build-Reference-methods]. Required by `plotGenome` if a
#' \linkS4class{NxtSE} object is not specified.
#' @param Event The `EventName` of the IR / alternative splicing event to be
#' displayed. Use `rownames(se)` to display a list of valid events.
#' @param Gene Whether to use the range for the given `Gene`. If given,
#' overrides `Event` (but `Event` or `norm_event` will be used to normalise by
#' condition). Valid `Gene` entries include gene_id (Ensembl ID) or gene_name
#' (Gene Symbol).
#' @param seqname,start,end The chromosome (string) and genomic `start/end`
#' coordinates (numeric) of the region to display. If present, overrides both
#' `Event` and `Gene`. E.g. for a given region of chr1:10000-11000,
#' use the parameters: `seqname = "chr1", start = 10000, end = 11000`
#' @param coordinates A string specifying genomic coordinates can be given
#' instead of `seqname,start,end`. Must be of the format "chr:start-end", e.g.
#' "chr1:10000-11000"
#' @param strand Whether to show coverage of both strands "*" (default), or
#' from the "+" or "-" strand only.
#' @param zoom_factor Zoom out from event. Each level of zoom zooms out by a
#' factor of 3. E.g. for a query region of chr1:10000-11000, if a
#' `zoom_factor` of 1.0 is given, chr1:99000-12000 will be displayed.
#' @param bases_flanking (Default = `100`) How many bases flanking the zoomed
#' window. Useful when
#' used in conjunction with zoom_factor == 0. E.g. for a given region of
#' chr1:10000-11000, if `zoom_factor = 0` and `bases_flanking = 100`, the
#' region chr1:9900-11100 will be displayed.
#' @param tracks The names of individual samples,
#' or the names of the different conditions to be plotted. For the latter, set
#' `condition` to the specified condition category.
#' @param track_names The names of the tracks to be displayed. If omitted, the
#' track_names will default to the input in `tracks`
#' @param condition To display normalised coverage per condition, set this to
#' the condition category. If omitted, `tracks` are assumed to refer to the
#' names of individual samples.
#' @param ribbon_mode (default `"sd"`) Whether coverage ribbons signify
#' standard deviation `"sd"`, 95% confidence interval `"ci"`,
#' standard error of the mean `"sem"`,
#' or none `"none"`. Only applicable when `condition` is set.
#' @param reverseGenomeCoords (default `FALSE`) Whether to reverse the genomic
#' coordinates - helpful for intuitive plotting of negative-strand genes
#' @param selected_transcripts (Optional) A vector containing transcript ID
#' or transcript names of transcripts
#' to be displayed on the gene annotation track. Useful to remove minor
#' isoforms that are not relevant to the samples being displayed.
#' @param plotJunctions (default `FALSE`) If `TRUE`, sashimi plot junction arcs
#' are plotted. Currently only implemented for plots of individual samples.
#' @param junctionThreshold (default `0.01`) The threshold expression of
#' junction reads below which junction arcs will be omitted. This removes
#' cluttering of junction arcs from lowly-expressed (rare) junctions.
#' For individual
#' tracks, this is the fraction of coverage height. For by-condition
#' tracks, this is a PSI threshold.
#' @param plot_key_isoforms (default `FALSE`) If `TRUE`, only
#' transcripts involved in the selected `Event` or pair of `Event`s will be
#' displayed.
#' @param condense_tracks (default `FALSE`) Whether to collapse the
#' transcript track annotations by gene.
#' @param stack_tracks (default `FALSE`) Whether to graph all the conditions on
#' a single coverage track. If set to `TRUE`, each condition will be displayed
#' in a different colour on the same track. Ignored if `condition` is not set.
#' @param t_test (default `FALSE`) Whether to perform a pair-wise T-test.
#' Only used if there are TWO condition tracks.
#' @param norm_event Whether to normalise by an event different to that given
#' in "Event". The difference between this and Event is that the genomic
#' coordinates can be centered around a different `Event`, `Gene` or region
#' as given in `seqname/start/end`. If `norm_event` is different to
#' `Event`, `norm_event` will be used for normalisation and `Event` will be
#' used to define the genomic coordinates of the viewing window. `norm_event`
#' is required if `Event` is not set and `condition` is set.
#' @param usePlotly If `TRUE`, returns a `covPlotly` object containing the
#' plotly-based interactive plot. If `FALSE`, returns a ggplot object.
#'
#' @return
#' For `plotCoverage` and `plotGenome`:
#'
#' * If `usePlotly = FALSE` returns a patchwork-assembled static plot
#' * If `usePlotly = TRUE` returns a \linkS4class{covPlotly} object,
#' which generates a plotly interactive plot when shown using `show()`
#'
#' @examples
#' se <- SpliceWiz_example_NxtSE(novelSplicing = TRUE)
#'
#' # Assign annotation of the experimental conditions
#' colData(se)$treatment <- rep(c("A", "B"), each = 3)
#'
#' # Verify that the COV files are linked to the NxtSE object:
#' covfile(se)
#'
#' # Plot the genome track only, with specified gene:
#' plotGenome(se, Gene = "SRSF3")
#'
#' # View the genome track, specifying a genomic region via coordinates:
#' plotGenome(se, coordinates = "chrZ:10000-20000")
#'
#' # Return a list of ggplot and plotly objects, also plotting junction counts
#' plotCoverage(
#' se = se,
#' Event = "SE:SRSF3-203-exon4;SRSF3-202-int3",
#' tracks = colnames(se)[1:4], plotJunctions = TRUE
#' )
#'
#' # Plot the same, but as a plotly interactive plot
#'
#' if(interactive()) {
#' p <- plotCoverage(
#' se = se,
#' Event = "SE:SRSF3-203-exon4;SRSF3-202-int3",
#' tracks = colnames(se)[1:4], plotJunctions = TRUE,
#' usePlotly = TRUE
#' )
#' show(p)
#' }
#'
#' # Plot by condition "treatment", including provisional PSIs
#' plotCoverage(
#' se = se,
#' Event = "SE:SRSF3-203-exon4;SRSF3-202-int3",
#' tracks = c("A", "B"), condition = "treatment", plotJunctions = TRUE
#' )
#'
#' # As above, but stack all traces into the same track
#' # - NB: plotJunctions is disabled when `stack_tracks = TRUE`
#' plotCoverage(
#' se = se,
#' Event = "SE:SRSF3-203-exon4;SRSF3-202-int3",
#' tracks = c("A", "B"), condition = "treatment", stack_tracks = TRUE
#' )
#'
#' # Plot the above, but unstancked, and with t-test track
#' # - NB: plotJunctions is disabled when `stack_tracks = TRUE`
#' plotCoverage(
#' se = se,
#' Event = "SE:SRSF3-203-exon4;SRSF3-202-int3",
#' tracks = c("A", "B"), condition = "treatment", t_test = TRUE
#' )
#'
#' # Select only transcripts involved in the selected alternative splicing event
#' plotCoverage(
#' se = se,
#' Event = "SE:SRSF3-203-exon4;SRSF3-202-int3",
#' tracks = colnames(se)[1:4],
#' plot_key_isoforms = TRUE
#' )
#' @name plotCoverage
#' @md
NULL
#' @describeIn plotCoverage Legacy function - works by internally calling
#' getCoverageData(), getPlotObject(), then plotView()
#' @export
plotCoverage <- function(
se,
Event, Gene,
seqname, start, end, # Optional
coordinates,
strand = c("*", "+", "-"),
zoom_factor = 0.2, bases_flanking = 100,
tracks,
track_names = tracks,
condition,
ribbon_mode = c("sd", "ci", "sem", "none"),
selected_transcripts = "",
reverseGenomeCoords = FALSE,
plotJunctions = FALSE,
junctionThreshold = 0.01,
plot_key_isoforms = FALSE,
condense_tracks = FALSE,
stack_tracks = FALSE,
t_test = FALSE,
norm_event,
usePlotly = FALSE
) {
args <- as.list(match.call())
args[[1]] <- NULL
args[["se"]] <- NULL
for(argname in names(args)) {
args[[argname]] <- eval(args[[argname]], parent.frame())
}
args[["se"]] <- se
strand <- match.arg(strand)
if(!is_valid(strand)) strand <- "*"
ribbon_mode <- match.arg(ribbon_mode)
if(!is_valid(ribbon_mode)) ribbon_mode <- "none"
args[["strand"]] <- strand
args[["zoom_factor"]] <- zoom_factor
args[["bases_flanking"]] <- bases_flanking
dataObj <- do.call(getCoverageData, args)
if(!missing(norm_event)) args[["Event"]] <- norm_event
args[["object"]] <- dataObj
plotObj <- do.call(getPlotObject, args)
args[["centerByEvent"]] <- FALSE
args[["EventZoomFactor"]] <- 0.2
args[["EventBasesFlanking"]] <- 0.2
if(stack_tracks) {
trackList <- list(tracks)
} else {
trackList <- as.list(tracks)
if(!missing(track_names)) names(trackList) <- track_names
}
if(length(tracks) > 1 && t_test) {
diffList <- list(c(tracks[1], tracks[2]))
diff_stat <- "t-test"
} else {
diffList <- list()
diff_stat <- "none"
}
args[["trackList"]] <- trackList
args[["diffList"]] <- diffList
args[["diff_stat"]] <- diff_stat
args[["ribbon_mode"]] <- ribbon_mode
if(!missing(selected_transcripts))
args[["filterByTranscripts"]] <- selected_transcripts
if(!missing(plot_key_isoforms))
args[["filterByEventTranscripts"]] <- plot_key_isoforms
if(!missing(condense_tracks))
args[["condenseTranscripts"]] <- condense_tracks
args[["filterByExpressedTranscripts"]] <- TRUE
args[["plotJuncPSI"]] <- TRUE
args[["object"]] <- plotObj
p <- do.call(plotView, args)
return(p)
}
#' @describeIn plotCoverage Legacy function - works by internally calling
#' getGenomeData(), followed by plotAnnoTrack()
#' @export
plotGenome <- function(
se, reference_path,
Event, Gene, seqname, start, end,
coordinates,
zoom_factor = 0.2, bases_flanking = 100,
reverseGenomeCoords = FALSE,
condense_tracks = FALSE,
selected_transcripts = "",
plot_key_isoforms = FALSE,
usePlotly = FALSE
) {
if(!missing(Event) && missing(se)) .log(paste(
"In plotGenome(),",
"Plotting ASE from a SpliceWiz reference is currently not supported"
))
args <- as.list(match.call())
args[[1]] <- NULL
args[["se"]] <- NULL
for(argname in names(args)) {
args[[argname]] <- eval(args[[argname]], parent.frame())
}
if(!missing(se)) {
args[["se"]] <- se
args[["cov_data"]] <- ref(se)
} else if(!missing(reference_path)) {
if(!file.exists(file.path(reference_path, "cov_data.Rds"))) .log(paste(
"In plotGenome(),",
"`reference_path` is not a valid SpliceWiz reference"
))
args[["cov_data"]] <-
readRDS(file.path(reference_path, "cov_data.Rds"))
} else {
.log(paste(
"In plotGenome(),",
"at least 1 of `se` or `reference_path` must be supplied"
))
}
args[["zoom_factor"]] <- zoom_factor
args[["bases_flanking"]] <- bases_flanking
args <- .gCD_validate_args_coords(args)
args <- .gCD_validate_args(args)
annotation <- .gCD_retrieve_annotations(args)
args[["cov_data"]] <- NULL
args[["object"]] <- covDataObject(
args = args,
annotation = annotation
)
args[["reverseGenomeCoords"]] <- reverseGenomeCoords
args[["condensed"]] <- condense_tracks
args[["selected_transcripts"]] <- selected_transcripts
args[["plot_key_isoforms"]] <- plot_key_isoforms
args[["usePlotly"]] <- usePlotly
p <- do.call(plotAnnoTrack, args)
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.