DEXSeq.R
In DEXSeq: Inference of differential exon usage in RNA-Seq

## ----echo=FALSE, include=FALSE------------------------------------------------
knitr::opts_chunk$set(tidy = FALSE,
                      cache = TRUE,
                      dev = "png",
                      message = FALSE,
                      error = FALSE,
                      warning = TRUE)

## ----systemFile---------------------------------------------------------------
pythonScriptsDir = system.file( "python_scripts", package="DEXSeq" )
list.files(pythonScriptsDir)
system.file( "python_scripts", package="DEXSeq", mustWork=TRUE )

## ----loadDEXSeq---------------------------------------------------------------
inDir = system.file("extdata", package="pasilla")
countFiles = list.files(inDir, pattern="fb.txt$", full.names=TRUE)
basename(countFiles)
flattenedFile = list.files(inDir, pattern="gff$", full.names=TRUE)
basename(flattenedFile)

## ----sampleTable--------------------------------------------------------------
sampleTable = data.frame(
   row.names = c( "treated1", "treated2", "treated3", 
      "untreated1", "untreated2", "untreated3", "untreated4" ),
   condition = c("knockdown", "knockdown", "knockdown",  
      "control", "control", "control", "control" ),
   libType = c( "single-end", "paired-end", "paired-end", 
      "single-end", "single-end", "paired-end", "paired-end" ) )

## ----check--------------------------------------------------------------------
sampleTable

## ----message=FALSE------------------------------------------------------------
library( "DEXSeq" )

dxd = DEXSeqDataSetFromHTSeq(
   countFiles,
   sampleData=sampleTable,
   design= ~ sample + exon + condition:exon,
   flattenedfile=flattenedFile )

## ----startVignette------------------------------------------------------------
genesForSubset = read.table( 
  file.path(inDir, "geneIDsinsubset.txt"), 
  stringsAsFactors=FALSE)[[1]]

dxd = dxd[geneIDs( dxd ) %in% genesForSubset,]

## -----------------------------------------------------------------------------
colData(dxd)

## -----------------------------------------------------------------------------
head( counts(dxd), 5 )

## -----------------------------------------------------------------------------
split( seq_len(ncol(dxd)), colData(dxd)$exon )

## -----------------------------------------------------------------------------
head( featureCounts(dxd), 5 )

## -----------------------------------------------------------------------------
head( rowRanges(dxd), 3 )

## -----------------------------------------------------------------------------
sampleAnnotation( dxd )

## -----------------------------------------------------------------------------
dxd = estimateSizeFactors( dxd )

## -----------------------------------------------------------------------------
dxd = estimateDispersions( dxd )

## ----fitdiagnostics, fig.small=TRUE, fig.cap="Fit Diagnostics. The initial per-exon dispersion estimates (shown by black points), the fitted mean-dispersion values function (red line), and the shrinked values in blue"----
plotDispEsts( dxd )

## ----testForDEU1--------------------------------------------------------------
dxd = testForDEU( dxd )

## ----estFC--------------------------------------------------------------------
dxd = estimateExonFoldChanges( dxd, fitExpToVar="condition")

## ----results1-----------------------------------------------------------------
dxr1 = DEXSeqResults( dxd )
dxr1

## ----results2-----------------------------------------------------------------
mcols(dxr1)$description

## ----tallyExons---------------------------------------------------------------
table ( dxr1$padj < 0.1 )

## ----tallyGenes---------------------------------------------------------------
table ( tapply( dxr1$padj < 0.1, dxr1$groupID, any ) )

## ----MvsA, fig.small=TRUE, fig.cap="MA plot. Mean expression versus $log{2}$ fold change plot. Significant hits at an $FDR=0.1$ are coloured in red."----
plotMA( dxr1, cex=0.8 )

## ----design-------------------------------------------------------------------
sampleAnnotation(dxd)

## ----formulas2----------------------------------------------------------------
formulaFullModel    =  ~ sample + exon + libType:exon + condition:exon
formulaReducedModel =  ~ sample + exon + libType:exon 

## ----estDisps_again-----------------------------------------------------------
dxd = estimateDispersions( dxd, formula = formulaFullModel )

## ----test_again---------------------------------------------------------------
dxd = testForDEU( dxd, 
	reducedModel = formulaReducedModel, 
        fullModel = formulaFullModel )

## ----res_again----------------------------------------------------------------
dxr2 = DEXSeqResults( dxd )

## ----table2-------------------------------------------------------------------
table( dxr2$padj < 0.1 )

## ----table3-------------------------------------------------------------------
table( before = dxr1$padj < 0.1, now = dxr2$padj < 0.1 )

## ----plot1, fig.height=8, fig.width=12, fig.cap="Fitted expression. The plot represents the expression estimates from a call to `testForDEU()`. Shown in red is the exon that showed significant differential exon usage."----
plotDEXSeq( dxr2, "FBgn0010909", legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )

## ----checkClaim---------------------------------------------------------------
wh = (dxr2$groupID=="FBgn0010909")
stopifnot(sum(dxr2$padj[wh] < formals(plotDEXSeq)$FDR)==1)

## ----plot2, fig.height=8, fig.width=12, fig.cap="Transcripts. As in the previous plots, but including the annotated transcript models"----
plotDEXSeq( dxr2, "FBgn0010909", displayTranscripts=TRUE, legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )

## ----plot3, fig.height=8, fig.width=12, fig.cap="Normalized counts. As in the previous plots, with normalized count values of each exon in each of the samples."----
plotDEXSeq( dxr2, "FBgn0010909", expression=FALSE, norCounts=TRUE,
   legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )

## ----plot4, fig.height=8, fig.width=12, fig.cap="Fitted splicing. As in the previous plots, but after subtraction of overall changes in gene expression."----
plotDEXSeq( dxr2, "FBgn0010909", expression=FALSE, splicing=TRUE,
   legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )

## ----DEXSeqHTML, cache=TRUE, eval=FALSE---------------------------------------
#  DEXSeqHTML( dxr2, FDR=0.1, color=c("#FF000080", "#0000FF80") )

## ----para1,cache=TRUE,results='hide', eval=FALSE------------------------------
#  BPPARAM = MultiCoreParam(4)
#  dxd = estimateSizeFactors( dxd )
#  dxd = estimateDispersions( dxd, BPPARAM=BPPARAM)
#  dxd = testForDEU( dxd, BPPARAM=BPPARAM)
#  dxd = estimateExonFoldChanges(dxd, BPPARAM=BPPARAM)

## ----alldeu-------------------------------------------------------------------
dxr = DEXSeq(dxd)
class(dxr)

## ----pergeneqval--------------------------------------------------------------
numbOfGenes <- sum( perGeneQValue(dxr) < 0.1)
numbOfGenes

## ----buildExonCountSetLoadPacks, eval=FALSE-----------------------------------
#  library(GenomicRanges)
#  library(GenomicFeatures)
#  library(GenomicAlignments)

## ----buildExonCountSetDownloadAnno, eval=FALSE--------------------------------
#  hse = makeTxDbFromBiomart( biomart="ensembl", dataset="hsapiens_gene_ensembl", host="grch37.ensembl.org" )

## ----buildExonCountSetDisjoin,cache=TRUE, eval=FALSE--------------------------
#  exonicParts = exonicParts( hse, linked.to.single.gene.only = TRUE )

## ----buildExonCountSet2FindBAMs, cache=TRUE, eval=FALSE-----------------------
#  bamDir = system.file( "extdata", package="parathyroidSE", mustWork=TRUE )
#  fls = list.files( bamDir, pattern="bam$", full=TRUE )

## ----buildExonCountSet2FindBAMs2,  eval=FALSE---------------------------------
#  bamlst = BamFileList( fls, index=character(), yieldSize=100000, obeyQname=TRUE )
#  SE = summarizeOverlaps( exonicParts, bamlst, mode="Union", singleEnd=FALSE, ignore.strand=TRUE, inter.feature=FALSE, fragments=TRUE )

## ----buildExonCountSet3, eval=FALSE-------------------------------------------
#  colData(SE)$condition = c("A", "A", "B")
#  DEXSeqDataSetFromSE( SE, design= ~ sample + exon + condition:exon )

## ----acc----------------------------------------------------------------------
head( geneIDs(dxd) )
head( exonIDs(dxd) )

## ----grmethods----------------------------------------------------------------
interestingRegion = GRanges( "chr2L", 
   IRanges(start=3872658, end=3875302) )
subsetByOverlaps( x=dxr, ranges=interestingRegion )
findOverlaps( query=dxr, subject=interestingRegion )

## ----sessionInfo--------------------------------------------------------------
sessionInfo()