Nothing
## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()
## ----parallel, echo=FALSE, cache=FALSE, results="hide"---------------------
library(BiocParallel)
# for speed we run everything in serial mode
register(SerialParam())
## ----knitr, echo=FALSE, cache=FALSE, results="hide"------------------------
library(knitr)
opts_chunk$set(
tidy=FALSE,
dev="png",
fig.width=7,
fig.height=7,
dpi=300,
message=FALSE,
warning=FALSE,
cache=FALSE
)
## ----include=FALSE---------------------------------------------------------
opts_chunk$set(concordance=TRUE)
## ----loadFraser, echo=FALSE------------------------------------------------
suppressPackageStartupMessages({
library(FRASER)
})
## ----quick_fraser_guide, echo=TRUE-----------------------------------------
# load FRASER library
library(FRASER)
# count data
fds <- createTestFraserSettings()
fds <- countRNAData(fds)
fds
# compute stats
fds <- calculatePSIValues(fds)
# filtering junction with low expression
fds <- filterExpressionAndVariability(fds, minExpressionInOneSample=20,
minDeltaPsi=0.0, filter=TRUE)
# fit the splicing model for each metric
# with a specific latentsapce dimension
fds <- FRASER(fds, q=c(psi5=2, psi3=3, theta=3))
# we provide two ways to anntoate introns with the corresponding gene symbols:
# the first way uses TxDb-objects provided by the user as shown here
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(org.Hs.eg.db)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
orgDb <- org.Hs.eg.db
fds <- annotateRangesWithTxDb(fds, txdb=txdb, orgDb=orgDb)
# alternatively, we also provide a way to use biomart for the annotation:
# fds <- annotateRanges(fds)
# get results: we recommend to use an FDR cutoff 0.05, but due to the small
# dataset size we extract all events and their associated values
# eg: res <- results(fds, zScoreCutoff=NA, padjCutoff=0.05, deltaPsiCutoff=0.3)
res <- results(fds, zScoreCutoff=NA, padjCutoff=NA, deltaPsiCutoff=NA)
res
# result visualization
plotVolcano(fds, sampleID="sample1", type="psi5", aggregate=TRUE)
## ----sampleData Table, echo=TRUE-------------------------------------------
sampleTable <- fread(system.file(
"extdata", "sampleTable.tsv", package="FRASER", mustWork=TRUE))
head(sampleTable)
## ----FRASER setting example1, echo=TRUE------------------------------------
# convert it to a bamFile list
bamFiles <- system.file(sampleTable[,bamFile], package="FRASER", mustWork=TRUE)
sampleTable[,bamFile:=bamFiles]
# create FRASER object
settings <- FraserDataSet(colData=sampleTable,
workingDir=tempdir())
# show the FraserSettings object
settings
## ----FRASER setting example2, echo=TRUE------------------------------------
settings <- createTestFraserSettings()
settings
## ----parallelize example, eval=FALSE---------------------------------------
# # example of how to use parallelization: use 10 cores or the maximal number of
# # available cores if fewer than 10 are available and use Snow if on Windows
# if(.Platform$OS.type == "unix") {
# register(MulticoreParam(workers=min(10, multicoreWorkers())))
# } else {
# register(SnowParam(workers=min(10, multicoreWorkers())))
# }
## ----counting reads, echo=TRUE---------------------------------------------
# count reads
fds <- countRNAData(settings)
fds
## ----create fds with counts, echo=TRUE-------------------------------------
# example sample annoation for precalculated count matrices
sampleTable <- fread(system.file("extdata", "sampleTable_countTable.tsv",
package="FRASER", mustWork=TRUE))
head(sampleTable)
# get raw counts
junctionCts <- fread(system.file("extdata", "raw_junction_counts.tsv.gz",
package="FRASER", mustWork=TRUE))
head(junctionCts)
spliceSiteCts <- fread(system.file("extdata", "raw_site_counts.tsv.gz",
package="FRASER", mustWork=TRUE))
head(spliceSiteCts)
# create FRASER object
fds <- FraserDataSet(colData=sampleTable, junctions=junctionCts,
spliceSites=spliceSiteCts, workingDir=tempdir())
fds
## ----calculate psi/zscore values, echo=TRUE--------------------------------
fds <- calculatePSIValues(fds)
fds
## ----filter_junctions, echo=TRUE-------------------------------------------
fds <- filterExpressionAndVariability(fds, minDeltaPsi=0.0, filter=FALSE)
plotFilterExpression(fds, bins=100)
## ----subset to filtered junctions, echo=TRUE-------------------------------
fds_filtered <- fds[mcols(fds, type="j")[,"passed"],]
fds_filtered
# filtered_fds not further used for this tutorial because the example dataset
# is otherwise too small
## ----sample_covariation, echo=TRUE-----------------------------------------
# Heatmap of the sample correlation
plotCountCorHeatmap(fds, type="psi5", logit=TRUE, normalized=FALSE)
## ----intron_sample_correlation, echo=TRUE, eval=FALSE----------------------
# # Heatmap of the intron/sample expression
# plotCountCorHeatmap(fds, type="psi5", logit=TRUE, normalized=FALSE,
# plotType="junctionSample", topJ=100, minDeltaPsi = 0.01)
## ----model fitting, echo=TRUE----------------------------------------------
# This is computational heavy on real size datasets and can take awhile
fds <- FRASER(fds, q=c(psi5=3, psi3=5, theta=2))
## ----covariation_after_fitting, echo=TRUE----------------------------------
plotCountCorHeatmap(fds, type="psi5", normalized=TRUE, logit=TRUE)
## ----result table, echo=TRUE-----------------------------------------------
# annotate introns with the HGNC symbols of the corresponding gene
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(org.Hs.eg.db)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
orgDb <- org.Hs.eg.db
fds <- annotateRangesWithTxDb(fds, txdb=txdb, orgDb=orgDb)
# fds <- annotateRanges(fds) # alternative way using biomaRt
# retrieve results with default and recommended cutoffs (padj <= 0.05 and
# |deltaPsi| >= 0.3)
res <- results(fds)
## ----result_table, echo=TRUE-----------------------------------------------
# to show result visualization functions for this tuturial, zScore cutoff used
res <- results(fds, zScoreCutoff=2, padjCutoff=NA, deltaPsiCutoff=0.1)
res
## ----finding_candidates, echo=TRUE-----------------------------------------
plotVolcano(fds, type="psi5", "sample10")
## ----sample result, echo=TRUE----------------------------------------------
sampleRes <- res[res$sampleID == "sample10"]
sampleRes
## ----plot_expression, echo=TRUE, eval=FALSE--------------------------------
# plotExpression(fds, type="psi5", result=sampleRes[1])
# plotExpectedVsObservedPsi(fds, result=sampleRes[1])
## ----save_load, echo=TRUE--------------------------------------------------
# saving a fds
workingDir(fds) <- tempdir()
name(fds) <- "ExampleAnalysis"
saveFraserDataSet(fds, dir=workingDir(fds), name=name(fds))
# two ways of loading a fds by either specifying the directory and anaysis name
# or directly giving the path the to fds-object.RDS file
fds <- loadFraserDataSet(dir=workingDir(fds), name=name(fds))
fds <- loadFraserDataSet(file=file.path(workingDir(fds),
"savedObjects", "ExampleAnalysis", "fds-object.RDS"))
## ----control confounders, echo=TRUE----------------------------------------
# Using an alternative way to correct splicing ratios
# here: only 2 iteration to speed the calculation up
# for the vignette, the default is 15 iterations
fds <- fit(fds, q=3, type="psi5", implementation="PCA-BB-Decoder",
iterations=2)
## ----findBestQ, echo=TRUE--------------------------------------------------
set.seed(42)
# hyperparameter opimization
fds <- optimHyperParams(fds, type="psi5", plot=FALSE)
# retrieve the estimated optimal dimension of the latent space
bestQ(fds, type="psi5")
## ----figure_findBestQ, echo=TRUE-------------------------------------------
plotEncDimSearch(fds, type="psi5")
## ----p-value calculation, echo=TRUE----------------------------------------
fds <- calculatePvalues(fds, type="psi5")
head(pVals(fds, type="psi5"))
## ----p-adj calculation, echo=TRUE------------------------------------------
fds <- calculatePadjValues(fds, type="psi5", method="BY")
head(padjVals(fds,type="psi5"))
## ----z-score calculation, echo=TRUE----------------------------------------
fds <- calculateZscore(fds, type="psi5")
head(zScores(fds, type="psi5"))
## ----result_visualization, echo=TRUE---------------------------------------
plotAberrantPerSample(fds)
# qq-plot for single junction
plotQQ(fds, result=res[1])
# global qq-plot (on gene level since aggregate=TRUE)
plotQQ(fds, aggregate=TRUE, global=TRUE)
## ----sessionInfo, echo=FALSE-----------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.