plotFunctions: Visualization functions for FRASER
In c-mertes/FRASER: Find RAre Splicing Events in RNA-Seq Data

plotManhattan

R Documentation

Visualization functions for FRASER

Description

The FRASER package provides mutliple functions to visualize the data and the results of a full data set analysis.

Plots the p values over the delta psi values, known as volcano plot. Visualizes per sample the outliers. By type and aggregate by gene if requested.

Plot the number of aberrant events per samples

Plots the observed split reads of the junction of interest over all reads coming from the given donor/acceptor.

Plots the observed values of the splice metric across samples for a junction of interest.

Plots the expected psi value over the observed psi value of the given junction.

Plots the quantile-quantile plot

Histogram of the geometric mean per junction based on the filter status

Histogram of minimal delta psi per junction

Count correlation heatmap function

Usage

plotManhattan(object, ...)

## S4 method for signature 'FraserDataSet'
plotVolcano(
  object,
  sampleID,
  type = fitMetrics(object),
  basePlot = TRUE,
  aggregate = FALSE,
  main = NULL,
  label = NULL,
  deltaPsiCutoff = 0.1,
  padjCutoff = 0.1,
  subsetName = NULL,
  ...
)

## S4 method for signature 'FraserDataSet'
plotAberrantPerSample(
  object,
  main,
  type = fitMetrics(object),
  padjCutoff = 0.1,
  deltaPsiCutoff = 0.1,
  aggregate = TRUE,
  subsetName = NULL,
  BPPARAM = bpparam(),
  ...
)

plotExpression(
  fds,
  type = fitMetrics(fds),
  idx = NULL,
  result = NULL,
  colGroup = NULL,
  basePlot = TRUE,
  main = NULL,
  label = "aberrant",
  subsetName = NULL,
  ...
)

plotSpliceMetricRank(
  fds,
  type = fitMetrics(fds),
  idx = NULL,
  result = NULL,
  colGroup = NULL,
  basePlot = TRUE,
  main = NULL,
  label = "aberrant",
  subsetName = NULL,
  ...
)

plotExpectedVsObservedPsi(
  fds,
  type = fitMetrics(fds),
  idx = NULL,
  result = NULL,
  colGroup = NULL,
  main = NULL,
  basePlot = TRUE,
  label = "aberrant",
  subsetName = NULL,
  ...
)

## S4 method for signature 'FraserDataSet'
plotQQ(
  object,
  type = NULL,
  idx = NULL,
  result = NULL,
  aggregate = FALSE,
  global = FALSE,
  main = NULL,
  conf.alpha = 0.05,
  samplingPrecision = 3,
  basePlot = TRUE,
  label = "aberrant",
  Ncpus = min(3, getDTthreads()),
  subsetName = NULL,
  ...
)

## S4 method for signature 'FraserDataSet'
plotEncDimSearch(object, type = psiTypes, plotType = c("auc", "loss"))

plotFilterExpression(
  fds,
  bins = 200,
  legend.position = c(0.8, 0.8),
  onlyVariableIntrons = FALSE
)

plotFilterVariability(
  fds,
  bins = 200,
  legend.position = c(0.8, 0.8),
  onlyExpressedIntrons = FALSE
)

## S4 method for signature 'FraserDataSet'
plotCountCorHeatmap(
  object,
  type = psiTypes,
  logit = FALSE,
  topN = 50000,
  topJ = 5000,
  minMedian = 1,
  minCount = 10,
  main = NULL,
  normalized = FALSE,
  show_rownames = FALSE,
  show_colnames = FALSE,
  minDeltaPsi = 0.1,
  annotation_col = NA,
  annotation_row = NA,
  border_color = NA,
  nClust = 5,
  plotType = c("sampleCorrelation", "junctionSample"),
  sampleClustering = NULL,
  plotMeanPsi = TRUE,
  plotCov = TRUE,
  ...
)

plotBamCoverage(
  fds,
  gr,
  sampleID,
  control_samples = sample(samples(fds[, which(samples(fds) != sampleID)]), min(3,
    ncol(fds) - length(sampleID))),
  txdb = NULL,
  min_junction_count = 20,
  highlight_range = NULL,
  highlight_range_color = "firebrick",
  color_annotated = "gray",
  color_novel = "goldenrod3",
  color_sample_interest = "firebrick",
  color_control_samples = "dodgerblue4",
  toscale = c("exon", "gene", "none"),
  mar = c(2, 10, 0.1, 5),
  curvature_splicegraph = 1,
  curvature_coverage = 1,
  cex = 1,
  splicegraph_labels = c("genomic_range", "id", "name", "none"),
  splicegraph_position = c("top", "bottom"),
  ...
)

plotBamCoverageFromResultTable(
  fds,
  result,
  show_full_gene = FALSE,
  txdb = NULL,
  orgDb = NULL,
  res_gene_col = "hgncSymbol",
  res_geneid_type = "SYMBOL",
  txdb_geneid_type = "ENTREZID",
  left_extension = 1000,
  right_extension = 1000,
  ...
)

## S4 method for signature 'FraserDataSet'
plotManhattan(
  object,
  sampleID,
  value = "pvalue",
  type = fitMetrics(object),
  chr = NULL,
  main = paste0("sample: ", sampleID),
  chrColor = c("black", "darkgrey"),
  subsetName = NULL,
  ...
)

Arguments

`object`, `fds`	An `FraserDataSet` object.
`...`	Additional parameters passed to plot() or plot_ly() if not stated otherwise in the details for each plot function
`sampleID`	A sample ID which should be plotted. Can also be a vector. Integers are treated as indices.
`type`	The psi type: either psi5, psi3 or theta (for SE).
`basePlot`	if `TRUE` (default), use the R base plot version, else use the plotly framework.
`aggregate`	If TRUE, the pvalues are aggregated by gene (default), otherwise junction level pvalues are used (default for Q-Q plot).
`main`	Title for the plot, if missing a default title will be used.
`label`	Indicates the genes or samples that will be labelled in the plot (only for `basePlot=TRUE`). Setting `label="aberrant"` will label all aberrant genes or samples. Labelling can be turned off by setting `label=NULL`. The user can also provide a custom list of gene symbols or sampleIDs.
`padjCutoff`, `deltaPsiCutoff`	Significance or delta psi cutoff to mark outliers
`subsetName`	The name of a subset of genes of interest for which FDR corrected pvalues were previously computed. Those FDR values on the subset will then be used to determine aberrant status. Default is NULL (using transcriptome-wide FDR corrected pvalues).
`BPPARAM`	BiocParallel parameter to use.
`idx`	A junction site ID or gene ID or one of both, which should be plotted. Can also be a vector. Integers are treated as indices.
`result`	The result table to be used by the method.
`colGroup`	Group of samples that should be colored.
`global`	Flag to plot a global Q-Q plot, default FALSE
`conf.alpha`	If set, a confidence interval is plotted, defaults to 0.05
`samplingPrecision`	Plot only non overlapping points in Q-Q plot to reduce number of points to plot. Defines the digits to round to.
`Ncpus`	Number of cores to use.
`plotType`	The type of plot that should be shown as character string. For plotEncDimSearch, it has to be either `"auc"` for a plot of the area under the curve (AUC) or `"loss"` for the model loss. For the correlation heatmap, it can be either `"sampleCorrelation"` for a sample-sample correlation heatmap or `"junctionSample"` for a junction-sample correlation heatmap.
`bins`	Set the number of bins to be used in the histogram.
`legend.position`	Set legend position (x and y coordinate), defaults to the top right corner.
`onlyVariableIntrons`	Logical value indicating whether to show only introns that also pass the variability filter. Defaults to FALSE.
`onlyExpressedIntrons`	Logical value indicating whether to show only introns that also pass the expression filter. Defaults to FALSE.
`logit`	If TRUE, the default, psi values are plotted in logit space.
`topN`	Top x most variable junctions that should be used for the calculation of sample x sample correlations.
`topJ`	Top x most variable junctions that should be displayed in the junction-sample correlation heatmap. Only applies if plotType is "junctionSample".
`minMedian`, `minCount`, `minDeltaPsi`	Minimal median (`m \ge 1`), delta psi (`\|\Delta\psi\| > 0.1`), read count (`n \ge 10`) value of a junction to be considered for the correlation heatmap.
`normalized`	If TRUE, the normalized psi values are used, the default, otherwise the raw psi values
`show_rownames`, `show_colnames`	Logical value indicating whether to show row or column names on the heatmap axes.
`annotation_col`, `annotation_row`	Row or column annotations that should be plotted on the heatmap.
`border_color`	Sets the border color of the heatmap
`nClust`	Number of clusters to show in the row and column dendrograms.
`sampleClustering`	A clustering of the samples that should be used as an annotation of the heatmap.
`plotMeanPsi`, `plotCov`	If `TRUE`, then the heatmap is annotated with the mean psi values or the junction coverage.
`gr`	A GRanges object indicating the genomic range that should be shown in `plotBamCoverage`.
`control_samples`	The sampleIDs of the samples used as control in `plotBamCoverage`.
`txdb`	A TxDb object giving the gene/transcript annotation to use.
`min_junction_count`	The minimal junction count across samples required for a junction to appear in the splicegraph and coverage tracks of `plotBamCoverage`.
`highlight_range`	A `GenomicRanges` or `GenomicRangesList` object of ranges to be highlighted in the splicegraph of `plotBamCoverage`.
`highlight_range_color`	The color of highlighted ranges in the splicegraph of `plotBamCoverage`.
`color_annotated`	The color for exons and junctions present in the given annotation (in the splicegraph of `plotBamCoverage`).
`color_novel`	The color for novel exons and junctions not present in the given annotation (in the splicegraph of `plotBamCoverage`).
`color_sample_interest`	The color in `plotBamCoverage` for the sample of interest.
`color_control_samples`	The color in `plotBamCoverage` for the samples used as controls.
`toscale`	In `plotBamCoverage`, indicates which part of the plotted region should be drawn to scale. Possible values are 'exon' (exonic regions are drawn to scale), 'gene' (both exonic and intronic regions are drawn to scale) or 'none' (exonic and intronic regions have constant length) (see SGSeq package).
`mar`	The margin of the plot area for `plotBamCoverage` (b,l,t,r).
`curvature_splicegraph`	The curvature of the junction arcs in the splicegraph in `plotBamCoverage`. Decrease this value for flatter arcs and increase it for steeper arcs.
`curvature_coverage`	The curvature of the junction arcs in the coverage tracks of `plotBamCoverage`. Decrease this value for flatter arcs and increase it for steeper arcs.
`cex`	For controlling the size of text and numbers in `plotBamCoverage`.
`splicegraph_labels`	Indicated the format of exon/splice junction labels in the splicegraph of `plotBamCoverage`. Possible values are 'genomic_range' (gives the start position of the first exon and the end position of the last exon that are shown), 'id' (format E1,... J1,...), 'name' (format type:chromosome:start-end:strand for each feature), 'none' for no labels (see SGSeq package).
`splicegraph_position`	The position of the splicegraph relative to the coverage tracks in `plotBamCoverage`. Possible values are 'top' (default) and 'bottom'.
`show_full_gene`	Should the full genomic range of the gene be shown in `plotBamCoverageFromResultTable` (default: FALSE)? If FALSE, only a certain region (see parameters left_extension and right_extension) around the outlier junction is shown.
`orgDb`	A OrgDb object giving the mapping of gene ids and symbols.
`res_gene_col`	The column name in the given results table that contains the gene annotation.
`res_geneid_type`	The type of gene annotation in the results table in `res_gene_col` (e.g. SYMBOL or ENTREZID etc.). This information is needed for mapping between the results table and the provided annotation in the txdb object.
`txdb_geneid_type`	The type of gene_id present in `genes(txdb)` (e.g. ENTREZID). This information is needed for mapping between the results table and the provided annotation in the txdb object.
`left_extension`	Indicating how far the plotted range around the outlier junction should be extended to the left in `plotBamCoverageFromResultTable`.
`right_extension`	Indicating how far the plotted range around the outlier junction should be extended to the right in `plotBamCoverageFromResultTable`.
`value`	Indicates which assay is shown in the manhattan plot. Defaults to 'pvalue'. Other options are 'deltaPsi' and 'zScore'.
`chr`	Vector of chromosome names to show in `plotManhattan`. The default is to show all chromosomes.
`chrColor`	Interchanging colors by chromosome for `plotManhattan`.

Details

This is the list of all plotting function provided by FRASER:

plotAberrantPerSample()
plotVolcano()
plotExpression()
plotQQ()
plotExpectedVsObservedPsi()
plotCountCorHeatmap()
plotFilterExpression()
plotFilterVariability()
plotEncDimSearch()
plotBamCoverage()
plotBamCoverageFromResultTable()
plotManhattan()
plotSpliceMetricRank()

For a detailed description of each plot function please see the details. Most of the functions share the same parameters.

plotAberrantPerSample: The number of aberrant events per sample are plotted sorted by rank. The ... parameters are passed on to the aberrant function.

plotVolcano: the volcano plot is sample-centric. It plots for a given sample and psi type the negative log10 nominal P-values against the delta psi values for all splice sites or aggregates by gene if requested.

plotExpression: This function plots for a given site the read count at this site (i.e. K) against the total coverage (i.e. N) for the given psi type (\psi_5, \psi_3, or \theta (SE)) for all samples.

plotQQ: the quantile-quantile plot for a given gene or if global is set to TRUE over the full data set. Here the observed P-values are plotted against the expected ones in the negative log10 space.

plotExpectedVsObservedPsi: A scatter plot of the observed psi against the predicted psi for a given site.

plotSpliceMetricRank: This function plots for a given intron the observed values of the selected splice metrix against the sample rank.

plotCountCorHeatmap: The correlation heatmap of the count data either of the full data set (i.e. sample-sample correlations) or of the top x most variable junctions (i.e. junction-sample correlations). By default the values are log transformed and row centered. The ... arguments are passed to the pheatmap function.

plotFilterExpression: The distribution of FPKM values. If the FraserDataSet object contains the passedFilter column, it will plot both FPKM distributions for the expressed introns and for the filtered introns.

plotFilterVariability: The distribution of maximal delta Psi values. If the FraserDataSet object contains the passedFilter column, it will plot both maximal delta Psi distributions for the variable introns and for the filtered (i.e. non-variable) introns.

plotEncDimSearch: Visualization of the hyperparameter optimization. It plots the encoding dimension against the achieved loss (area under the precision-recall curve). From this plot the optimum should be choosen for the q in fitting process.

plotManhattan: A Manhattan plot showing the junction pvalues by genomic position. Useful to identify if outliers cluster by genomic position.

plotBamCoverage: A sashimi plot showing the read coverage from the underlying bam files for a given genomic range and sampleIDs.

plotBamCoverageFromResultTable: A sashimi plot showing the read coverage from the underlying bam files for a row in the results table. Can either show the full range of the gene with the outlier junction or only a certain region around the outlier.

Value

If base R graphics are used nothing is returned else the plotly or the gplot object is returned.

Examples


# create full FRASER object 
fds <- makeSimulatedFraserDataSet(m=40, j=200)
fds <- calculatePSIValues(fds)
fds <- filterExpressionAndVariability(fds, filter=FALSE)
# this step should be done for more dimensions in practice
fds <- optimHyperParams(fds, "jaccard", q_param=c(2,5,10,25))

# assign gene names to show functionality on test dataset
# use fds <- annotateRanges(fds) on real data
mcols(fds, type="j")$hgnc_symbol <- 
    paste0("gene", sample(1:25, nrow(fds), replace=TRUE))

# fit and calculate pvalues
genesOfInterest <- rep(list(paste0("gene", sample(1:25, 10))), 4)
names(genesOfInterest) <- c("sample1", "sample6", "sample15", "sample23")
fds <- FRASER(fds, subsets=list("testSet"=genesOfInterest))

# QC plotting
plotFilterExpression(fds)
plotFilterVariability(fds)
plotCountCorHeatmap(fds, "jaccard")
plotCountCorHeatmap(fds, "jaccard", normalized=TRUE)
plotEncDimSearch(fds, type="jaccard")

# extract results 
plotAberrantPerSample(fds, aggregate=FALSE)
plotAberrantPerSample(fds, aggregate=TRUE, subsetName="testSet")
plotVolcano(fds, "sample2", "jaccard", label="aberrant")
plotVolcano(fds, "sample1", "jaccard", aggregate=TRUE, subsetName="testSet")

# dive into gene/sample level results
res <- as.data.table(results(fds))
res
plotExpression(fds, result=res[1])
plotQQ(fds, result=res[1])
plotExpectedVsObservedPsi(fds, res=res[1])
plotSpliceMetricRank(fds, res=res[1])

# other ways to call these plotting functions
plotExpression(fds, idx=10, sampleID="sample1", type="jaccard")
plotExpression(fds, result=res[1], subsetName="testSet")
plotQQ(fds, idx=10, sampleID="sample1", type="jaccard")
plotQQ(fds, result=res[1], subsetName="testSet")
plotExpectedVsObservedPsi(fds, idx=10, sampleID="sample1", type="jaccard")
plotExpectedVsObservedPsi(fds, result=res[1], subsetName="testSet")
plotSpliceMetricRank(fds, idx=10, sampleID="sample1", type="jaccard")
plotSpliceMetricRank(fds, result=res[1], subsetName="testSet")

# create manhattan plot of pvalues by genomic position
if(require(ggbio)){
    plotManhattan(fds, type="jaccard", sampleID="sample10")
}

# plot splice graph and coverage from bam files in a given region
if(require(SGSeq)){
    fds <- createTestFraserSettings()
    gr <- GRanges(seqnames="chr19", 
        IRanges(start=7587496, end=7598895), 
        strand="+")
    plotBamCoverage(fds, gr=gr, sampleID="sample3", 
        control_samples="sample2", min_junction_count=5,
        curvature_splicegraph=1, curvature_coverage=1, 
        mar=c(1, 7, 0.1, 3))

    # plot coverage from bam file for a row in the result table
    fds <- createTestFraserDataSet()
    require(TxDb.Hsapiens.UCSC.hg19.knownGene)
    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
    require(org.Hs.eg.db)
    orgDb <- org.Hs.eg.db
 
    res <- results(fds, padjCutoff=NA, deltaPsiCutoff=NA)
    res_dt <- as.data.table(res)
    res_dt <- res_dt[sampleID == "sample2",]
    
    # plot full range of gene containing outlier junction
    plotBamCoverageFromResultTable(fds, result=res_dt[1,], show_full_gene=TRUE,
        txdb=txdb, orgDb=orgDb, control_samples="sample3")
    
    # plot only certain range around outlier junction
    plotBamCoverageFromResultTable(fds, result=res_dt[1,], show_full_gene=FALSE, 
        control_samples="sample3", curvature_splicegraph=0.5, txdb=txdb,
        curvature_coverage=0.5, right_extension=5000, left_extension=5000,
        splicegraph_labels="id")
}

c-mertes/FRASER documentation built on Feb. 22, 2025, 1:24 a.m.