inst/doc/swish.R

## ----setup, echo=FALSE, results="hide"----------------------------------------
knitr::opts_chunk$set(tidy=FALSE, cache=FALSE, dev="png",
                      message=FALSE, error=FALSE, warning=FALSE)

## ----eval=FALSE---------------------------------------------------------------
#  # 'coldata.csv': sample information table
#  coldata <- read.csv("coldata.csv")
#  library(tximeta)
#  y <- tximeta(coldata) # reads in counts
#  library(swish)
#  y <- scaleInfReps(y) # scales counts
#  y <- labelKeep(y) # labels genes to keep
#  set.seed(1)
#  y <- swish(y, x="condition") # simplest Swish case

## ----eval=FALSE---------------------------------------------------------------
#  table(mcols(y)$qvalue < .05)

## ----eval=FALSE---------------------------------------------------------------
#  y <- y[mcols(y)$keep,]

## -----------------------------------------------------------------------------
library(macrophage)
dir <- system.file("extdata", package="macrophage")

## -----------------------------------------------------------------------------
coldata <- read.csv(file.path(dir, "coldata.csv"))
head(coldata)

## -----------------------------------------------------------------------------
coldata <- coldata[,c(1,2,3,5)]
names(coldata) <- c("names","id","line","condition")

## -----------------------------------------------------------------------------
coldata$files <- file.path(dir, "quants", coldata$names, "quant.sf.gz")
all(file.exists(coldata$files))

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(library(SummarizedExperiment))

## ----include=FALSE------------------------------------------------------------
# This hidden code chunk is only needed for Bioc build machines,
# so that 'fishpond' will build regardless of whether
# the machine can connect to ftp.ebi.ac.uk.
# Using linkedTxomes to point to a GTF that lives in the macrophage pkg.
# The chunk can be skipped if you have internet connection,
# as tximeta will automatically ID the transcriptome and DL the GTF.
library(tximeta)
makeLinkedTxome(
  indexDir=file.path(dir, "gencode.v29_salmon_0.12.0"),
  source="Gencode",
  organism="Homo sapiens",
  release="29",
  genome="GRCh38",
  fasta="ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_29/gencode.v29.transcripts.fa.gz",
  gtf=file.path(dir, "gencode.v29.annotation.gtf.gz"), # local version
  write=FALSE
)

## -----------------------------------------------------------------------------
library(tximeta)
se <- tximeta(coldata)

## -----------------------------------------------------------------------------
assayNames(se)

## -----------------------------------------------------------------------------
head(rownames(se))

## -----------------------------------------------------------------------------
y <- se
y <- y[seqnames(y) == "chr1",]

## -----------------------------------------------------------------------------
y <- y[,y$condition %in% c("naive","IFNg")]
y$condition <- factor(y$condition, c("naive","IFNg"))

## ----results="hide", message=FALSE--------------------------------------------
library(fishpond)
y <- scaleInfReps(y)
y <- labelKeep(y)
y <- y[mcols(y)$keep,]
set.seed(1)
y <- swish(y, x="condition", pair="line", nperms=64)

## -----------------------------------------------------------------------------
table(mcols(y)$qvalue < .05)

## -----------------------------------------------------------------------------
hist(mcols(y)$pvalue, col="grey")

## -----------------------------------------------------------------------------
with(mcols(y),
     table(sig=qvalue < .05, sign.lfc=sign(log2FC))
     )
sig <- mcols(y)$qvalue < .05
lo <- order(mcols(y)$log2FC * sig)
hi <- order(-mcols(y)$log2FC * sig)

## -----------------------------------------------------------------------------
top.up <- mcols(y)[head(hi),]
names(top.up)
cols <- c("log10mean","log2FC","pvalue","qvalue")
print(as.data.frame(top.up)[,cols], digits=3)

## -----------------------------------------------------------------------------
top.down <- mcols(y)[head(lo),]
print(as.data.frame(top.down)[,cols], digits=3)

## -----------------------------------------------------------------------------
plotInfReps(y, idx=hi[100], x="condition", cov="line")

## -----------------------------------------------------------------------------
plotMASwish(y, alpha=.05)

## -----------------------------------------------------------------------------
library(org.Hs.eg.db)
y <- addIds(y, "SYMBOL", gene=TRUE)

## -----------------------------------------------------------------------------
plotMASwish(y, alpha=.05, xlim=c(.5,5.5))
with(
  subset(mcols(y), qvalue < .05 & abs(log2FC) > 4),
     text(log10mean, log2FC, SYMBOL,
          col="blue", pos=4, cex=.7)
)

## -----------------------------------------------------------------------------
gse <- summarizeToGene(se)
gy <- gse
gy <- gy[seqnames(gy) == "chr1",]

## -----------------------------------------------------------------------------
gy <- gy[,gy$condition %in% c("naive","IFNg")]
gy$condition <- factor(gy$condition, c("naive","IFNg"))

## ----results="hide", message=FALSE--------------------------------------------
gy <- scaleInfReps(gy)
gy <- labelKeep(gy)
gy <- gy[mcols(gy)$keep,]
set.seed(1)
gy <- swish(gy, x="condition", pair="line", nperms=64)

## -----------------------------------------------------------------------------
table(mcols(gy)$qvalue < .05)

## -----------------------------------------------------------------------------
hist(mcols(y)$pvalue, col="grey")

## -----------------------------------------------------------------------------
with(mcols(gy),
     table(sig=qvalue < .05, sign.lfc=sign(log2FC))
     )     
sig <- mcols(gy)$qvalue < .05
glo <- order(mcols(gy)$log2FC * sig)
ghi <- order(-mcols(gy)$log2FC * sig)

## -----------------------------------------------------------------------------
gtop.up <- mcols(gy)[head(ghi),]
print(as.data.frame(gtop.up)[,cols], digits=3)
gtop.down <- mcols(gy)[head(glo),]
print(as.data.frame(gtop.down)[,cols], digits=3)

## -----------------------------------------------------------------------------
plotInfReps(gy, idx=ghi[100], x="condition", cov="line")

## -----------------------------------------------------------------------------
plotMASwish(gy, alpha=.05)

## -----------------------------------------------------------------------------
library(org.Hs.eg.db)
gy <- addIds(gy, "SYMBOL", gene=TRUE)

## -----------------------------------------------------------------------------
plotMASwish(gy, alpha=.05, xlim=c(.5,5.5))
with(
  subset(mcols(gy), qvalue < .05 & abs(log2FC) > 3),
     text(log10mean, log2FC, SYMBOL,
          col="blue", pos=4, cex=.7)
)

## -----------------------------------------------------------------------------
# run on the transcript-level dataset
iso <- isoformProportions(y)
iso <- swish(iso, x="condition", pair="line", nperms=64)

## ----eval=FALSE, echo=FALSE---------------------------------------------------
#  # some unevaluated code for looking into DTE within non-DGE gene
#  # (DTE vs DGE plot)
#  fisherP <- function(p) {
#    pchisq(-2 * sum(log(p)), 2*length(p), lower.tail=FALSE)
#  }
#  stopifnot(all(lengths(mcols(y)$gene_id) == 1))
#  dat <- as.data.frame(mcols(y)[,c("gene_id","pvalue")])
#  dat$gene_id <- unlist(dat$gene_id)
#  pvals <- tapply(dat$pvalue, dat$gene_id, fisherP)
#  dte <- data.frame(gene_id=names(pvals), pvalue=pvals)
#  dte <- dte[rownames(gy),]
#  plot(-log10(mcols(gy)$pvalue), -log10(dte$pvalue))
#  #identify(-log10(mcols(gy)$pvalue), -log10(dte$pvalue))
#  idx <- 193
#  idx2 <- which(unlist(mcols(y)$gene_id) == rownames(gy)[idx])
#  plotInfReps(gy, idx, x="condition", cov="line", xaxis=FALSE)
#  par(mfrow=c(1,3))
#  for (i in 1:3) {
#    plotInfReps(y, idx2[i], x="condition", cov="line", xaxis=FALSE)
#  }

## -----------------------------------------------------------------------------
se$ifng <- factor(ifelse(
  grepl("IFNg",se$condition),
  "treated","control"))
se$salmonella <- factor(ifelse(
  grepl("SL1344",se$condition),
  "infected","control"))
with(colData(se),
     table(ifng, salmonella)
     )

## -----------------------------------------------------------------------------
y2 <- se
y2 <- y2[seqnames(y2) == "chr1",]

## -----------------------------------------------------------------------------
y2$pair <- as.numeric(factor(y2$line))
y2$pair[y2$ifng == "control"]
y2$pair[y2$ifng == "treated"]
y2$pair[y2$ifng == "treated"] <- rep(7:12,each=2)
y2$pair <- factor(y2$pair)
table(y2$pair, y2$salmonella)

## ----results="hide", message=FALSE--------------------------------------------
y2 <- scaleInfReps(y2)
y2 <- labelKeep(y2)
y2 <- y2[mcols(y2)$keep,]
set.seed(1)
y2 <- swish(y2, x="salmonella", cov="ifng", pair="pair", interaction=TRUE)

## -----------------------------------------------------------------------------
hist(mcols(y2)$pvalue, col="grey")

## -----------------------------------------------------------------------------
plotMASwish(y2, alpha=.05)

## -----------------------------------------------------------------------------
idx <- with(mcols(y2), which(qvalue < .05 & log2FC > 5))
plotInfReps(y2, idx[1], x="ifng", cov="salmonella")
plotInfReps(y2, idx[2], x="ifng", cov="salmonella")

## -----------------------------------------------------------------------------
dir <- system.file("extdata", package="tximportData")
files <- file.path(dir,"alevin/neurons_900_v014/alevin/quants_mat.gz")
neurons <- tximeta(files, type="alevin",
                   skipMeta=TRUE, # just for vignette
                   dropInfReps=TRUE,
                   alevinArgs=list(filterBarcodes=TRUE))

## -----------------------------------------------------------------------------
library(SingleCellExperiment)
sce <- as(neurons, "SingleCellExperiment")
sce$cluster <- factor(paste0("cl",sample(1:6,ncol(sce),TRUE)))
par(mfrow=c(2,1), mar=c(2,4,2,1))
plotInfReps(sce, "ENSMUSG00000072235.6", x="cluster",
            legend=TRUE)
plotInfReps(sce, "ENSMUSG00000072235.6", x="cluster",
            reorder=FALSE)

## ----echo=FALSE---------------------------------------------------------------
par(mfrow=c(2,1), mar=c(2,4,2,1))
plotInfReps(sce[,1:50], "ENSMUSG00000072235.6", x="cluster")
plotInfReps(sce[,1:150], "ENSMUSG00000072235.6", x="cluster")

## ----eval=FALSE---------------------------------------------------------------
#  y <- labelKeep(y, minCount=3, minN=10)
#  y <- y[mcols(y)$keep,] # subset genes

## ----eval=FALSE---------------------------------------------------------------
#  assays(y) <- lapply(assays(y), as.matrix) # make dense matrices
#  y <- scaleInfReps(y, lengthCorrect=FALSE, sfFun=sfFun)
#  y <- swish(y, x="condition")

## ---- eval=FALSE--------------------------------------------------------------
#  y <- makeInfReps(y, 20)

## ---- eval=FALSE--------------------------------------------------------------
#  library(SingleCellExperiment)
#  y <- as(y, "SingleCellExperiment")
#  # then, after filtering genes and cells...
#  
#  # compute and store sizeFactors calculated over all genes
#  library(scran)
#  y <- computeSumFactors(y)
#  
#  # split swish objects into 8 RDS files:
#  splitSwish(y, nsplits=8, prefix="swish", snakefile="Snakefile")
#  
#  # now, run snakemake from command line
#  
#  # after finished, results back into R:
#  y <- addStatsFromCSV(y, "summary.csv")

## -----------------------------------------------------------------------------
gres <- mcols(gy)[mcols(gy)$keep,]
min(gres$qvalue, na.rm=TRUE) # min nominal FDR is not 0
with(gres, plot(stat, -log10(qvalue)))
with(gres, plot(log2FC, -log10(qvalue)))
abline(v=0, col="red")
with(gres, plot(log2FC, -log10(qvalue),
                xlim=c(-1.5,1.5), ylim=c(0,1.5)))
abline(v=0, col="red")

## -----------------------------------------------------------------------------
y3 <- se
y3 <- y3[seqnames(y3) == "chr1",]
y3 <- y3[,y3$condition %in% c("naive","IFNg")]
y3 <- labelKeep(y3)
y3 <- y3[mcols(y3)$keep,]
y3 <- computeInfRV(y3)
mcols(y3)$meanCts <- rowMeans(assays(y3)[["counts"]])
with(mcols(y3), plot(meanCts, meanInfRV, log="xy"))
hist(log10(mcols(y3)$meanInfRV),
     col="grey50", border="white", breaks=20,
     xlab="mean InfRV", main="Txp-level inferential uncertainty")

## ----echo=FALSE---------------------------------------------------------------
n <- 8
condition <- rep(1:2,length=2*n)
group <- rep(1:2,each=n)
pair <- rep(c(1:n),each=2)
cols <- c("dodgerblue","goldenrod4")
plot(1:(2*n), rep(0,2*n), ylim=c(-.5,3.5),
     type="n", xaxt="n", yaxt="n",
     xlab="samples", ylab="permutation")
abline(v=8.5, lty=2)
axis(2, 0:3, c("orig",1:3), las=2)
text(1:(2*n), rep(0,2*n), pair, col=cols[condition], cex=2)
set.seed(1)
for (i in 1:3) {
  perms <- rep(2*sample(n,n),each=2) - rep(1:0,length=2*n)
  text(1:(2*n), rep(i,2*n), pair[perms], col=cols[condition[perms]], cex=2)
}

## ----echo=FALSE---------------------------------------------------------------
n <- 8
condition <- rep(c(1:2,1:2),each=n/2)
group <- rep(1:2,each=n)
id <- LETTERS[1:(2*n)]
cols <- c("dodgerblue","goldenrod4")
plot(1:(2*n), rep(0,2*n), ylim=c(-.5,3.5),
     type="n", xaxt="n", yaxt="n",
     xlab="samples", ylab="permutation")
abline(v=8.5, lty=2)
axis(2, 0:3, c("orig",1:3), las=2)
text(1:(2*n), rep(0,2*n), id, col=cols[condition], cex=2)
set.seed(3)
for (i in 1:3) {
  id.perms <- character(2*n)
  grp1 <- id[group==1]
  grp2 <- id[group==2]
  id.perms[c(1:4,9:12)] <- sample(id[condition==1],n)
  idx1 <- id.perms[c(1:4,9:12)] %in% grp1
  id.perms[c(5:8,13:16)][idx1] <- sample(id[condition==2 & group==1],sum(idx1))
  idx2 <- id.perms[c(1:4,9:12)] %in% grp2
  id.perms[c(5:8,13:16)][idx2] <- sample(id[condition==2 & group==2],sum(idx2))
  text(1:(2*n), rep(i,2*n), id.perms, col=cols[condition], cex=2)
}
arrows(3,1.5,1.3,1.15,,length=.1)
arrows(3,1.5,4.7,1.15,length=.1)

## -----------------------------------------------------------------------------
sessionInfo()

Try the fishpond package in your browser

Any scripts or data that you put into this service are public.

fishpond documentation built on Nov. 8, 2020, 7:54 p.m.