scran.R
In scran: Methods for Single-Cell RNA-Seq Data Analysis

## ---- echo=FALSE, results="hide", message=FALSE-------------------------------
require(knitr)
opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)

## ----setup, echo=FALSE, message=FALSE-----------------------------------------
library(scran)
library(BiocParallel)
register(SerialParam()) # avoid problems with fastMNN parallelization.
set.seed(100)

## -----------------------------------------------------------------------------
library(scRNAseq)
sce <- GrunPancreasData()
sce

## -----------------------------------------------------------------------------
library(scuttle)
qcstats <- perCellQCMetrics(sce)
qcfilter <- quickPerCellQC(qcstats, percent_subsets="altexps_ERCC_percent")
sce <- sce[,!qcfilter$discard]
summary(qcfilter$discard)

## -----------------------------------------------------------------------------
library(scran)
clusters <- quickCluster(sce)
sce <- computeSumFactors(sce, clusters=clusters)
summary(sizeFactors(sce))

## -----------------------------------------------------------------------------
sce2 <- computeSpikeFactors(sce, "ERCC")
summary(sizeFactors(sce2))

## -----------------------------------------------------------------------------
sce <- logNormCounts(sce)

## -----------------------------------------------------------------------------
dec <- modelGeneVar(sce)
plot(dec$mean, dec$total, xlab="Mean log-expression", ylab="Variance")
curve(metadata(dec)$trend(x), col="blue", add=TRUE)

## -----------------------------------------------------------------------------
dec2 <- modelGeneVarWithSpikes(sce, 'ERCC')
plot(dec2$mean, dec2$total, xlab="Mean log-expression", ylab="Variance")
points(metadata(dec2)$mean, metadata(dec2)$var, col="red")
curve(metadata(dec2)$trend(x), col="blue", add=TRUE)

## ---- fig.wide=TRUE, fig.asp=1.5----------------------------------------------
dec3 <- modelGeneVar(sce, block=sce$donor)
per.block <- dec3$per.block
par(mfrow=c(3, 2))
for (i in seq_along(per.block)) {
    decX <- per.block[[i]]
    plot(decX$mean, decX$total, xlab="Mean log-expression", 
        ylab="Variance", main=names(per.block)[i])
    curve(metadata(decX)$trend(x), col="blue", add=TRUE)
}

## -----------------------------------------------------------------------------
# Get the top 10% of genes.
top.hvgs <- getTopHVGs(dec, prop=0.1)

# Get the top 2000 genes.
top.hvgs2 <- getTopHVGs(dec, n=2000)

# Get all genes with positive biological components.
top.hvgs3 <- getTopHVGs(dec, var.threshold=0)

# Get all genes with FDR below 5%.
top.hvgs4 <- getTopHVGs(dec, fdr.threshold=0.05)

## -----------------------------------------------------------------------------
# Running the PCA with the 10% of HVGs.
library(scater)
sce <- runPCA(sce, subset_row=top.hvgs)
reducedDimNames(sce)

## -----------------------------------------------------------------------------
sced <- denoisePCA(sce, dec2, subset.row=getTopHVGs(dec2, prop=0.1))
ncol(reducedDim(sced, "PCA"))

## -----------------------------------------------------------------------------
output <- getClusteredPCs(reducedDim(sce))
npcs <- metadata(output)$chosen
reducedDim(sce, "PCAsub") <- reducedDim(sce, "PCA")[,1:npcs,drop=FALSE]
npcs

## -----------------------------------------------------------------------------
# In this case, using the PCs that we chose from getClusteredPCs().
g <- buildSNNGraph(sce, use.dimred="PCAsub")
cluster <- igraph::cluster_walktrap(g)$membership

# Assigning to the 'colLabels' of the 'sce'.
colLabels(sce) <- factor(cluster)
table(colLabels(sce))

## -----------------------------------------------------------------------------
sce <- runTSNE(sce, dimred="PCAsub")
plotTSNE(sce, colour_by="label", text_by="label")

## -----------------------------------------------------------------------------
ratio <- clusterModularity(g, cluster, as.ratio=TRUE)

library(pheatmap)
pheatmap(log10(ratio+1), cluster_cols=FALSE, cluster_rows=FALSE,
    col=rev(heat.colors(100)))

## -----------------------------------------------------------------------------
ass.prob <- bootstrapCluster(sce, FUN=function(x) {
    g <- buildSNNGraph(x, use.dimred="PCAsub")
    igraph::cluster_walktrap(g)$membership
}, clusters=sce$cluster)

pheatmap(ass.prob, cluster_cols=FALSE, cluster_rows=FALSE,
    col=colorRampPalette(c("white", "blue"))(100))

## -----------------------------------------------------------------------------
subout <- quickSubCluster(sce, groups=colLabels(sce))
table(metadata(subout)$subcluster) # formatted as '<parent>.<subcluster>'

## -----------------------------------------------------------------------------
# Uses clustering information from 'colLabels(sce)' by default:
markers <- findMarkers(sce)
markers[[1]][,1:3]

## -----------------------------------------------------------------------------
wmarkers <- findMarkers(sce, test.type="wilcox", direction="up", lfc=1)
wmarkers[[1]][,1:3]

## -----------------------------------------------------------------------------
markers <- findMarkers(sce, pval.type="all")
markers[[1]][,1:2]

## -----------------------------------------------------------------------------
# Using the first 200 HVs, which are the most interesting anyway.
# Also turning down the number of iterations for speed.
of.interest <- top.hvgs[1:200]
cor.pairs <- correlatePairs(sce, subset.row=of.interest, iters=1e5)
cor.pairs

## -----------------------------------------------------------------------------
cor.pairs2 <- correlatePairs(sce, subset.row=of.interest, 
    block=sce$donor, iters=1e5)

## -----------------------------------------------------------------------------
cor.genes <- correlateGenes(cor.pairs)
cor.genes

## -----------------------------------------------------------------------------
y <- convertTo(sce, type="edgeR")

## -----------------------------------------------------------------------------
sessionInfo()