inst/doc/HTSFilter.R

## ---- fig.align = "center", echo=FALSE----------------------------------------
options(rmarkdown.html_vignette.check_title = FALSE)

## ----loadingPackages, message=FALSE, fig.cap="Histogram of log transformed counts from the Sultan data. This illustrates the large number of genes with very small counts as well as the large heterogeneity in counts observed."----
library(HTSFilter)
library(edgeR)
library(DESeq2)
data("sultan")
hist(log(exprs(sultan)+1), col="grey", breaks=25, main="", 
  xlab="Log(counts+1)")
pData(sultan)
dim(sultan)

## ----matrix, fig.cap="Global Jaccard index for the Sultan data. The index is calculated for a variety of threshold values after TMM normalization, with a loess curve (blue line) superposed and data-based threshold values (red cross and red dotted line) equal to 11.764."----
mat <- exprs(sultan)
conds <- as.character(pData(sultan)$cell.line)

## Only 25 tested thresholds to reduce computation time
filter <- HTSFilter(mat, conds, s.min=1, s.max=200, s.len=25)
mat <- filter$filteredData
dim(mat)
dim(filter$removedData)

## ----refilter, fig.cap="HTSFilter on pre-filtered Sultan data. (left) The global Jaccard index on the pre-filtered data is calculated for a variety of threshold values after TMM normalization, with a loess curve (blue line) superposed and data-based threshold values (red cross and red dotted line) equal to 1.64. (right) Histogram of the re-filtered data after applying HTSFilter."----
par(mfrow = c(1,2), mar = c(4,4,2,2))
filter.2 <- HTSFilter(mat, conds, s.len=25)
dim(filter.2$removedData)
hist(log(filter.2$filteredData+1), col="grey", breaks=25, main="", 
  xlab="Log(counts+1)")

## ----dge----------------------------------------------------------------------
dge <- DGEList(counts=exprs(sultan), group=conds)
dge <- calcNormFactors(dge)
dge <- estimateDisp(dge)
et <- exactTest(dge)
et <- HTSFilter(et, DGEList=dge, s.len=25, plot=FALSE)$filteredData
dim(et)
class(et)
topTags(et)

## ----dgetoptags---------------------------------------------------------------
topTags(et)

## ----dge2---------------------------------------------------------------------
design <- model.matrix(~conds)
dge <- DGEList(counts=exprs(sultan), group=conds)
dge <- calcNormFactors(dge)
dge <- estimateDisp(dge, design)
fit <- glmFit(dge,design)
lrt <- glmLRT(fit,coef=2)
lrt <- HTSFilter(lrt, DGEGLM=fit, s.len=25, plot=FALSE)$filteredData
dim(lrt)
class(lrt)

## ----dge2toptags--------------------------------------------------------------
topTags(lrt)

## ----cds2---------------------------------------------------------------------
conds <- gsub(" ", ".", conds)
dds <- DESeqDataSetFromMatrix(countData = exprs(sultan),
                              colData = data.frame(cell.line = factor(conds)),
                              design = ~ cell.line)
dds <- DESeq(dds)
filter <- HTSFilter(dds, s.len=25, plot=FALSE)$filteredData
class(filter)
dim(filter)
res <- results(filter, independentFiltering=FALSE)
head(res)

## ----ses2, eval=FALSE---------------------------------------------------------
#  library(EDASeq)
#  ses <- newSeqExpressionSet(exprs(sultan),
#         phenoData=pData(sultan))
#  ses.norm <- betweenLaneNormalization(ses, which="full")

## ----ses3, eval=FALSE---------------------------------------------------------
#  filter <- HTSFilter(counts(ses.norm), conds, s.len=25, norm="none",
#            plot=FALSE)
#  head(filter$on)
#  table(filter$on)

## ----sessionInfo--------------------------------------------------------------
sessionInfo()

Try the HTSFilter package in your browser

Any scripts or data that you put into this service are public.

HTSFilter documentation built on Dec. 18, 2020, 2 a.m.