plotFunctions: Visualization functions for FRASER

plotManhattanR Documentation

Visualization functions for FRASER

Description

The FRASER package provides mutliple functions to visualize the data and the results of a full data set analysis.

Plots the p values over the delta psi values, known as volcano plot. Visualizes per sample the outliers. By type and aggregate by gene if requested.

Plot the number of aberrant events per samples

Plots the observed split reads of the junction of interest over all reads coming from the given donor/acceptor.

Plots the observed values of the splice metric across samples for a junction of interest.

Plots the expected psi value over the observed psi value of the given junction.

Plots the quantile-quantile plot

Histogram of the geometric mean per junction based on the filter status

Histogram of minimal delta psi per junction

Count correlation heatmap function

Usage

plotManhattan(object, ...)

## S4 method for signature 'FraserDataSet'
plotVolcano(
  object,
  sampleID,
  type = fitMetrics(object),
  basePlot = TRUE,
  aggregate = FALSE,
  main = NULL,
  label = NULL,
  deltaPsiCutoff = 0.1,
  padjCutoff = 0.1,
  subsetName = NULL,
  ...
)

## S4 method for signature 'FraserDataSet'
plotAberrantPerSample(
  object,
  main,
  type = fitMetrics(object),
  padjCutoff = 0.1,
  deltaPsiCutoff = 0.1,
  aggregate = TRUE,
  subsetName = NULL,
  BPPARAM = bpparam(),
  ...
)

plotExpression(
  fds,
  type = fitMetrics(fds),
  idx = NULL,
  result = NULL,
  colGroup = NULL,
  basePlot = TRUE,
  main = NULL,
  label = "aberrant",
  subsetName = NULL,
  ...
)

plotSpliceMetricRank(
  fds,
  type = fitMetrics(fds),
  idx = NULL,
  result = NULL,
  colGroup = NULL,
  basePlot = TRUE,
  main = NULL,
  label = "aberrant",
  subsetName = NULL,
  ...
)

plotExpectedVsObservedPsi(
  fds,
  type = fitMetrics(fds),
  idx = NULL,
  result = NULL,
  colGroup = NULL,
  main = NULL,
  basePlot = TRUE,
  label = "aberrant",
  subsetName = NULL,
  ...
)

## S4 method for signature 'FraserDataSet'
plotQQ(
  object,
  type = NULL,
  idx = NULL,
  result = NULL,
  aggregate = FALSE,
  global = FALSE,
  main = NULL,
  conf.alpha = 0.05,
  samplingPrecision = 3,
  basePlot = TRUE,
  label = "aberrant",
  Ncpus = min(3, getDTthreads()),
  subsetName = NULL,
  ...
)

## S4 method for signature 'FraserDataSet'
plotEncDimSearch(object, type = psiTypes, plotType = c("auc", "loss"))

plotFilterExpression(
  fds,
  bins = 200,
  legend.position = c(0.8, 0.8),
  onlyVariableIntrons = FALSE
)

plotFilterVariability(
  fds,
  bins = 200,
  legend.position = c(0.8, 0.8),
  onlyExpressedIntrons = FALSE
)

## S4 method for signature 'FraserDataSet'
plotCountCorHeatmap(
  object,
  type = psiTypes,
  logit = FALSE,
  topN = 50000,
  topJ = 5000,
  minMedian = 1,
  minCount = 10,
  main = NULL,
  normalized = FALSE,
  show_rownames = FALSE,
  show_colnames = FALSE,
  minDeltaPsi = 0.1,
  annotation_col = NA,
  annotation_row = NA,
  border_color = NA,
  nClust = 5,
  plotType = c("sampleCorrelation", "junctionSample"),
  sampleClustering = NULL,
  plotMeanPsi = TRUE,
  plotCov = TRUE,
  ...
)

plotBamCoverage(
  fds,
  gr,
  sampleID,
  control_samples = sample(samples(fds[, which(samples(fds) != sampleID)]), min(3,
    ncol(fds) - length(sampleID))),
  txdb = NULL,
  min_junction_count = 20,
  highlight_range = NULL,
  highlight_range_color = "firebrick",
  color_annotated = "gray",
  color_novel = "goldenrod3",
  color_sample_interest = "firebrick",
  color_control_samples = "dodgerblue4",
  toscale = c("exon", "gene", "none"),
  mar = c(2, 10, 0.1, 5),
  curvature_splicegraph = 1,
  curvature_coverage = 1,
  cex = 1,
  splicegraph_labels = c("genomic_range", "id", "name", "none"),
  splicegraph_position = c("top", "bottom"),
  ...
)

plotBamCoverageFromResultTable(
  fds,
  result,
  show_full_gene = FALSE,
  txdb = NULL,
  orgDb = NULL,
  res_gene_col = "hgncSymbol",
  res_geneid_type = "SYMBOL",
  txdb_geneid_type = "ENTREZID",
  left_extension = 1000,
  right_extension = 1000,
  ...
)

## S4 method for signature 'FraserDataSet'
plotManhattan(
  object,
  sampleID,
  value = "pvalue",
  type = fitMetrics(object),
  chr = NULL,
  main = paste0("sample: ", sampleID),
  chrColor = c("black", "darkgrey"),
  subsetName = NULL,
  ...
)

Arguments

object, fds

An FraserDataSet object.

...

Additional parameters passed to plot() or plot_ly() if not stated otherwise in the details for each plot function

sampleID

A sample ID which should be plotted. Can also be a vector. Integers are treated as indices.

type

The psi type: either psi5, psi3 or theta (for SE).

basePlot

if TRUE (default), use the R base plot version, else use the plotly framework.

aggregate

If TRUE, the pvalues are aggregated by gene (default), otherwise junction level pvalues are used (default for Q-Q plot).

main

Title for the plot, if missing a default title will be used.

label

Indicates the genes or samples that will be labelled in the plot (only for basePlot=TRUE). Setting label="aberrant" will label all aberrant genes or samples. Labelling can be turned off by setting label=NULL. The user can also provide a custom list of gene symbols or sampleIDs.

padjCutoff, deltaPsiCutoff

Significance or delta psi cutoff to mark outliers

subsetName

The name of a subset of genes of interest for which FDR corrected pvalues were previously computed. Those FDR values on the subset will then be used to determine aberrant status. Default is NULL (using transcriptome-wide FDR corrected pvalues).

BPPARAM

BiocParallel parameter to use.

idx

A junction site ID or gene ID or one of both, which should be plotted. Can also be a vector. Integers are treated as indices.

result

The result table to be used by the method.

colGroup

Group of samples that should be colored.

global

Flag to plot a global Q-Q plot, default FALSE

conf.alpha

If set, a confidence interval is plotted, defaults to 0.05

samplingPrecision

Plot only non overlapping points in Q-Q plot to reduce number of points to plot. Defines the digits to round to.

Ncpus

Number of cores to use.

plotType

The type of plot that should be shown as character string. For plotEncDimSearch, it has to be either "auc" for a plot of the area under the curve (AUC) or "loss" for the model loss. For the correlation heatmap, it can be either "sampleCorrelation" for a sample-sample correlation heatmap or "junctionSample" for a junction-sample correlation heatmap.

bins

Set the number of bins to be used in the histogram.

legend.position

Set legend position (x and y coordinate), defaults to the top right corner.

onlyVariableIntrons

Logical value indicating whether to show only introns that also pass the variability filter. Defaults to FALSE.

onlyExpressedIntrons

Logical value indicating whether to show only introns that also pass the expression filter. Defaults to FALSE.

logit

If TRUE, the default, psi values are plotted in logit space.

topN

Top x most variable junctions that should be used for the calculation of sample x sample correlations.

topJ

Top x most variable junctions that should be displayed in the junction-sample correlation heatmap. Only applies if plotType is "junctionSample".

minMedian, minCount, minDeltaPsi

Minimal median (m \ge 1), delta psi (|\Delta\psi| > 0.1), read count (n \ge 10) value of a junction to be considered for the correlation heatmap.

normalized

If TRUE, the normalized psi values are used, the default, otherwise the raw psi values

show_rownames, show_colnames

Logical value indicating whether to show row or column names on the heatmap axes.

annotation_col, annotation_row

Row or column annotations that should be plotted on the heatmap.

border_color

Sets the border color of the heatmap

nClust

Number of clusters to show in the row and column dendrograms.

sampleClustering

A clustering of the samples that should be used as an annotation of the heatmap.

plotMeanPsi, plotCov

If TRUE, then the heatmap is annotated with the mean psi values or the junction coverage.

gr

A GRanges object indicating the genomic range that should be shown in plotBamCoverage.

control_samples

The sampleIDs of the samples used as control in plotBamCoverage.

txdb

A TxDb object giving the gene/transcript annotation to use.

min_junction_count

The minimal junction count across samples required for a junction to appear in the splicegraph and coverage tracks of plotBamCoverage.

highlight_range

A GenomicRanges or GenomicRangesList object of ranges to be highlighted in the splicegraph of plotBamCoverage.

highlight_range_color

The color of highlighted ranges in the splicegraph of plotBamCoverage.

color_annotated

The color for exons and junctions present in the given annotation (in the splicegraph of plotBamCoverage).

color_novel

The color for novel exons and junctions not present in the given annotation (in the splicegraph of plotBamCoverage).

color_sample_interest

The color in plotBamCoverage for the sample of interest.

color_control_samples

The color in plotBamCoverage for the samples used as controls.

toscale

In plotBamCoverage, indicates which part of the plotted region should be drawn to scale. Possible values are 'exon' (exonic regions are drawn to scale), 'gene' (both exonic and intronic regions are drawn to scale) or 'none' (exonic and intronic regions have constant length) (see SGSeq package).

mar

The margin of the plot area for plotBamCoverage (b,l,t,r).

curvature_splicegraph

The curvature of the junction arcs in the splicegraph in plotBamCoverage. Decrease this value for flatter arcs and increase it for steeper arcs.

curvature_coverage

The curvature of the junction arcs in the coverage tracks of plotBamCoverage. Decrease this value for flatter arcs and increase it for steeper arcs.

cex

For controlling the size of text and numbers in plotBamCoverage.

splicegraph_labels

Indicated the format of exon/splice junction labels in the splicegraph of plotBamCoverage. Possible values are 'genomic_range' (gives the start position of the first exon and the end position of the last exon that are shown), 'id' (format E1,... J1,...), 'name' (format type:chromosome:start-end:strand for each feature), 'none' for no labels (see SGSeq package).

splicegraph_position

The position of the splicegraph relative to the coverage tracks in plotBamCoverage. Possible values are 'top' (default) and 'bottom'.

show_full_gene

Should the full genomic range of the gene be shown in plotBamCoverageFromResultTable (default: FALSE)? If FALSE, only a certain region (see parameters left_extension and right_extension) around the outlier junction is shown.

orgDb

A OrgDb object giving the mapping of gene ids and symbols.

res_gene_col

The column name in the given results table that contains the gene annotation.

res_geneid_type

The type of gene annotation in the results table in res_gene_col (e.g. SYMBOL or ENTREZID etc.). This information is needed for mapping between the results table and the provided annotation in the txdb object.

txdb_geneid_type

The type of gene_id present in genes(txdb) (e.g. ENTREZID). This information is needed for mapping between the results table and the provided annotation in the txdb object.

left_extension

Indicating how far the plotted range around the outlier junction should be extended to the left in plotBamCoverageFromResultTable.

right_extension

Indicating how far the plotted range around the outlier junction should be extended to the right in plotBamCoverageFromResultTable.

value

Indicates which assay is shown in the manhattan plot. Defaults to 'pvalue'. Other options are 'deltaPsi' and 'zScore'.

chr

Vector of chromosome names to show in plotManhattan. The default is to show all chromosomes.

chrColor

Interchanging colors by chromosome for plotManhattan.

Details

This is the list of all plotting function provided by FRASER:

  • plotAberrantPerSample()

  • plotVolcano()

  • plotExpression()

  • plotQQ()

  • plotExpectedVsObservedPsi()

  • plotCountCorHeatmap()

  • plotFilterExpression()

  • plotFilterVariability()

  • plotEncDimSearch()

  • plotBamCoverage()

  • plotBamCoverageFromResultTable()

  • plotManhattan()

  • plotSpliceMetricRank()

For a detailed description of each plot function please see the details. Most of the functions share the same parameters.

plotAberrantPerSample: The number of aberrant events per sample are plotted sorted by rank. The ... parameters are passed on to the aberrant function.

plotVolcano: the volcano plot is sample-centric. It plots for a given sample and psi type the negative log10 nominal P-values against the delta psi values for all splice sites or aggregates by gene if requested.

plotExpression: This function plots for a given site the read count at this site (i.e. K) against the total coverage (i.e. N) for the given psi type (\psi_5, \psi_3, or \theta (SE)) for all samples.

plotQQ: the quantile-quantile plot for a given gene or if global is set to TRUE over the full data set. Here the observed P-values are plotted against the expected ones in the negative log10 space.

plotExpectedVsObservedPsi: A scatter plot of the observed psi against the predicted psi for a given site.

plotSpliceMetricRank: This function plots for a given intron the observed values of the selected splice metrix against the sample rank.

plotCountCorHeatmap: The correlation heatmap of the count data either of the full data set (i.e. sample-sample correlations) or of the top x most variable junctions (i.e. junction-sample correlations). By default the values are log transformed and row centered. The ... arguments are passed to the pheatmap function.

plotFilterExpression: The distribution of FPKM values. If the FraserDataSet object contains the passedFilter column, it will plot both FPKM distributions for the expressed introns and for the filtered introns.

plotFilterVariability: The distribution of maximal delta Psi values. If the FraserDataSet object contains the passedFilter column, it will plot both maximal delta Psi distributions for the variable introns and for the filtered (i.e. non-variable) introns.

plotEncDimSearch: Visualization of the hyperparameter optimization. It plots the encoding dimension against the achieved loss (area under the precision-recall curve). From this plot the optimum should be choosen for the q in fitting process.

plotManhattan: A Manhattan plot showing the junction pvalues by genomic position. Useful to identify if outliers cluster by genomic position.

plotBamCoverage: A sashimi plot showing the read coverage from the underlying bam files for a given genomic range and sampleIDs.

plotBamCoverageFromResultTable: A sashimi plot showing the read coverage from the underlying bam files for a row in the results table. Can either show the full range of the gene with the outlier junction or only a certain region around the outlier.

Value

If base R graphics are used nothing is returned else the plotly or the gplot object is returned.

Examples


# create full FRASER object 
fds <- makeSimulatedFraserDataSet(m=40, j=200)
fds <- calculatePSIValues(fds)
fds <- filterExpressionAndVariability(fds, filter=FALSE)
# this step should be done for more dimensions in practice
fds <- optimHyperParams(fds, "jaccard", q_param=c(2,5,10,25))

# assign gene names to show functionality on test dataset
# use fds <- annotateRanges(fds) on real data
mcols(fds, type="j")$hgnc_symbol <- 
    paste0("gene", sample(1:25, nrow(fds), replace=TRUE))

# fit and calculate pvalues
genesOfInterest <- rep(list(paste0("gene", sample(1:25, 10))), 4)
names(genesOfInterest) <- c("sample1", "sample6", "sample15", "sample23")
fds <- FRASER(fds, subsets=list("testSet"=genesOfInterest))

# QC plotting
plotFilterExpression(fds)
plotFilterVariability(fds)
plotCountCorHeatmap(fds, "jaccard")
plotCountCorHeatmap(fds, "jaccard", normalized=TRUE)
plotEncDimSearch(fds, type="jaccard")

# extract results 
plotAberrantPerSample(fds, aggregate=FALSE)
plotAberrantPerSample(fds, aggregate=TRUE, subsetName="testSet")
plotVolcano(fds, "sample2", "jaccard", label="aberrant")
plotVolcano(fds, "sample1", "jaccard", aggregate=TRUE, subsetName="testSet")

# dive into gene/sample level results
res <- as.data.table(results(fds))
res
plotExpression(fds, result=res[1])
plotQQ(fds, result=res[1])
plotExpectedVsObservedPsi(fds, res=res[1])
plotSpliceMetricRank(fds, res=res[1])

# other ways to call these plotting functions
plotExpression(fds, idx=10, sampleID="sample1", type="jaccard")
plotExpression(fds, result=res[FDR_set == "testSet",][1], 
                 subsetName="testSet")
plotQQ(fds, idx=10, sampleID="sample1", type="jaccard")
plotQQ(fds, result=res[FDR_set == "testSet",][1], subsetName="testSet")
plotExpectedVsObservedPsi(fds, idx=10, sampleID="sample1", type="jaccard")
plotExpectedVsObservedPsi(fds, result=res[FDR_set == "testSet",][1], 
                 subsetName="testSet")
plotSpliceMetricRank(fds, idx=10, sampleID="sample1", type="jaccard")
plotSpliceMetricRank(fds, result=res[FDR_set == "testSet",][1], 
                 subsetName="testSet")

# create manhattan plot of pvalues by genomic position
if(require(ggbio)){
    plotManhattan(fds, type="jaccard", sampleID="sample10")
}

# plot splice graph and coverage from bam files in a given region
if(require(SGSeq)){
    fds <- createTestFraserSettings()
    gr <- GRanges(seqnames="chr19", 
        IRanges(start=7587496, end=7598895), 
        strand="+")
    plotBamCoverage(fds, gr=gr, sampleID="sample3", 
        control_samples="sample2", min_junction_count=5,
        curvature_splicegraph=1, curvature_coverage=1, 
        mar=c(1, 7, 0.1, 3))

    # plot coverage from bam file for a row in the result table
    fds <- createTestFraserDataSet()
    require(TxDb.Hsapiens.UCSC.hg19.knownGene)
    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
    require(org.Hs.eg.db)
    orgDb <- org.Hs.eg.db
 
    res <- results(fds, padjCutoff=NA, deltaPsiCutoff=NA)
    res_dt <- as.data.table(res)
    res_dt <- res_dt[sampleID == "sample2",]
    
    # plot full range of gene containing outlier junction
    plotBamCoverageFromResultTable(fds, result=res_dt[1,], show_full_gene=TRUE,
        txdb=txdb, orgDb=orgDb, control_samples="sample3")
    
    # plot only certain range around outlier junction
    plotBamCoverageFromResultTable(fds, result=res_dt[1,], show_full_gene=FALSE, 
        control_samples="sample3", curvature_splicegraph=0.5, txdb=txdb,
        curvature_coverage=0.5, right_extension=5000, left_extension=5000,
        splicegraph_labels="id")
}


c-mertes/FRASER documentation built on Feb. 13, 2024, 12:49 a.m.