In JohnReid/DeLorean: Estimates Pseudotimes for Single Cell Expression Data

devtools::load_all('../..')
# fit.model <- FALSE
fit.model <- TRUE
rmarkdown::render('Windram-DeLorean.Rmd')

library(knitr)
library(knitcitations)
library(rmarkdown)
#
# knitr options
#
opts_chunk$set(
    fig.path = 'figures/Windram-',
    stop_on_error = TRUE,
    fig.width = 12.5,
    fig.height = 8)
#
# Citations
#
cleanbib()
cite_options(
    # hyperlink = 'to.doc',
    hyperlink = TRUE,
    # style = 'html',
    # citation_format = 'text',
    citation_format = "pandoc",
    cite.style = "numeric",
    check.entries = TRUE)
    # hyperlink = TRUE)
bib <- read.bibtex("DeLorean.bib")
if (file.exists("config.R")) {
    source("config.R")
}
source(system.file("scripts/shared.R", package="DeLorean"))

# suppressMessages(loadfonts())
library(DeLorean)
#
# Stylesheet
#
options(markdown.HTML.stylesheet = system.file(file.path('Rmd', 'foghorn.css'),
                                               package="DeLorean"))
font.family <- "Verdana"
font.theme <- theme_update(text=element_text(family=font.family))
theme_set(font.theme)

r date()

r citet(bib[["windram_arabidopsis_2012"]]) assayed leaves at 24 time points in 2 conditions.

Data

Windram et al.'s data is available in the DeLorean R package.

library(DeLorean)
library(dplyr)
library(ggplot2)
data(WindramDeLorean)
seed <- getOption("Windram.seed", 1)
set.seed(seed)

Obfuscate time points

Reduce resolution of observed capture time points.

group.size <- 12
windram.cell.meta$obstime.orig <- windram.cell.meta$obstime
windram.cell.meta$capture.orig <- windram.cell.meta$capture
windram.cell.meta$obstime <- (
    floor((windram.cell.meta$obstime-1) / group.size) * group.size
    + group.size / 2)
windram.cell.meta$capture <- (
    factor(as.character(windram.cell.meta$obstime),
           ordered=TRUE,
           levels=unique(as.character(windram.cell.meta$obstime))))

Just consider the Botrytis cells

dl <- de.lorean(
    windram.expr,
    windram.gene.meta,
    windram.cell.meta)
botrytis.cells <- dl$cell.meta %>% filter(condition == "Botrytis")
dl <- filter_cells(dl, cells=botrytis.cells$cell)

Estimate hyperparameters

Examine data for empirical Bayes estimation of hyperparameters.

model.name <- getOption('Windram.model', 'exact')
dl <- estimate.hyper(dl, sigma.tau=group.size/4, model.name=model.name, adjust.cell.sizes=FALSE)
if (! dl$opts$model.estimates.cell.sizes) {
    dl <- adjust.by.cell.sizes(dl)
}
fits.omega <- test.fit(dl$gene.var$omega.hat)
print(fits.omega$gp)
fits.psi <- test.fit(dl$gene.var$psi.hat)
print(fits.psi$gp)

Choose cells

Select some cells at random if we have too many.

# 24 is all the cells
max.cells <- min(getOption("Windram.max.cells", 24))
dl <- filter_cells(dl, number=max.cells)

Choose genes

Choose a few (at most 100) genes to fit the model.

max.genes <- min(getOption("Windram.max.genes", 100))
dl <- filter_genes(dl, number=max.genes)

Choose some held out genes to validate the model with.

held.out.genes <- with(dl, gene.var
                           %>% left_join(gene.meta)
                           %>% filter(! gene %in% rownames(dl$expr))
                           %>% arrange(-psi.hat/omega.hat)
                           %>% head(getOption('Windram.held.out', 100)))
held.out.expr <- windram.expr[as.character(held.out.genes$gene),
                              colnames(dl$expr)]

Fit model

num.inits <- getOption("Windram.num.inits", default.num.cores())
inf.method=getOption("Windram.method", "vb")

Define and compile the model, find the best initialisation, and fit the model. The model name is "r model.name", the inference method is "r inf.method" with r num.inits initialisations, the seed is r seed and the data have r dim(dl)[1] genes and r dim(dl)[2] cells.

dl <- prepare.for.stan(dl, num.inducing=30)
# Save for input to Monocle
saveRDS(list(expr=dl$expr,
             held.out.expr=held.out.expr,
             cell.meta=dl$cell.map,
             gene.meta=dl$gene.map),
        file='Data/Windram-input.rds')
dl <- compile.model(dl)
dl <- find.good.ordering(dl, seriation.find.orderings)
plot(dl, type='orderings')
dl <- pseudotimes.from.orderings(dl, num.to.keep=num.inits)
system.time(dl <- fit.model(dl, method=inf.method, num.inits=num.inits))
plots <- list()
if ("vb" == inf.method) {
    plots$pair.plot <- pseudotimes.pair.plot(dl)
}

dl <- examine.convergence(dl)

Analyse posterior

Examine posterior.

dl <- process.posterior(dl)
dl <- analyse.noise.levels(dl)

Profiles

Calculate expression profiles.

dl <- make.predictions(dl)

Examine pseudotime

Did DeLorean learn the obfuscated pseudotime?

plots$pseudo <- with(dl, {
    (ggplot(samples.l$tau %>% filter(iter == best.sample),
                    aes(x=tau, y=obstime.orig, color=capture),
                    environment=environment())
        + geom_point()
        # + scale_x_continuous(name="pseudotime")
        # + scale_y_continuous(name="capture time")
        # + scale_color_discrete(name="low-resolution\ngroup")
        + scale_y_continuous(name="true capture time")
        + scale_x_continuous(name="pseudotime")
        + scale_color_discrete(name="model\ncapture\ntime")
    )
})
print(plots$pseudo)
do.call(ggsave,
        c(list('Windram-pseudotime-vs-obfuscated.pdf', plots$pseudo + bioinf.config),
          bioinf.sizes))
if ('sample' == inf.method) {
    # Save convergence as well
    ggsave('Windram-Rhat.png', plot(dl, type="Rhat"),
        width=slide.fig.width, height=slide.fig.width / html5$ratio,
        dpi=300, units="in")
}
# Save profiles as well
plots$profiles <- plot(dl, type="profiles", genes=dl$genes.high.psi)
plots$profiles$layers[[3]]$aes_params$size <- 0.8
do.call(ggsave,
        c(list('Windram-profiles.pdf', plots$profiles + bioinf.config),
          suppl.sizes))
# Save tau posterior
plots$tau.posterior <- (
    ggplot(dl$samples.l$tau, aes(x=capture.orig, y=tau, color=capture))
    + geom_boxplot(outlier.size=.3, lwd=.15)
    # + theme(axis.text.x = element_text(angle = 90, hjust = 1))
    # + theme_gray(base_size=24)
    + scale_x_discrete(name="true capture time")
    + scale_y_continuous(name="pseudotime")
    + scale_color_discrete(name="model\ncapture\ntime")
    + coord_flip())
do.call(ggsave,
        c(list('Windram-tau-posterior.pdf', plots$tau.posterior + bioinf.config),
          bioinf.sizes))

# Save best tau
plots$tau.best <- plot(dl, type="pseudotime")
ggsave('Windram-tau-best.pdf', plots$tau.best + plos.theme,
       width=text.width, height=text.width, units="in")
ggsave('Windram-tau-best.png', plots$tau.best + plos.theme,
       width=slide.fig.width, height=slide.fig.width / html5$ratio,
       dpi=300, units="in")

Look at the expected correlation between the obfuscated capture time with the pseudotime in the full posterior.

posterior.cor <- (
    dl$samples.l$tau
    %>% group_by(iter)
    %>% dplyr::summarise(pseudotime.capture.cor=cor(tau, obstime.orig,
                                             method="spearman"))
)
posterior.cor.mean <- mean(posterior.cor$pseudotime.capture.cor)
posterior.cor.mean
posterior.cor.best <- filter(posterior.cor,
                             dl$best.sample == iter)$pseudotime.capture.cor
posterior.cor.best
monocle.cor <- 0.9269565
plots$post.cor <- (ggplot(posterior.cor, aes(x=pseudotime.capture.cor))
    + geom_histogram()
    + geom_vline(xintercept=monocle.cor, linetype='dotted')
    + geom_vline(xintercept=posterior.cor.best, linetype='dashed')
    + scale_x_continuous(name="correlation"))
print(plots$post.cor)
do.call(ggsave,
        c(list('Windram-posterior-cor.pdf', plot=plots$post.cor + bioinf.config),
          bioinf.sizes))

The correlation between the obfuscated capture time with the pseudotime of the best sample.

with(dl$samples.l$tau %>% filter(iter == dl$best.sample),
     cor(tau, obstime.orig, method="spearman"))

Roughness permutation test

Permutation test for roughness.

dl <- roughness.test(dl, held.out.expr)
print(dl$roughness.test)
monocle.roughness <- 0.85
plots$roughnesses <- plot(dl, type="roughnesses") +
    geom_vline(xintercept=monocle.roughness, linetype='dotted')
do.call(ggsave,
        c(list('Windram-roughnesses.pdf', plots$roughnesses + bioinf.config),
          suppl.sizes))
print(plots$roughnesses)
save.output <- getOption('Windram.save.output', TRUE)

# Save DeLorean object without fit component
saveRDS({dl2 <- dl; dl2$fit <- NULL; dl2}, "Data/Windram.rds")
# Save plots
saveRDS(plots, "Data/Windram-plots.rds")

# Not executed
# Load DeLorean object without fit component
devtools::load_all('../..')
library(DeLorean)
library(dplyr)
library(ggplot2)
dl <- readRDS("Data/Windram.rds")
plots <- readRDS("Data/Windram-plots.rds")

Examine data and estimated tau using PCA

pca <- prcomp(t(dl$expr), center=TRUE, scale.=FALSE)
pca <- prcomp(t(dl$expr + dl$cell.sizes$S.hat), center=TRUE, scale.=FALSE)
# print(pca)
# summary(pca)
png('Windram-PCA-var.png')
plot(pca, type = "l")
dev.off()
pca.l <- reshape2::melt(pca$x, varnames=c("cell", "PC"), value.name="x")
# sample_n(pca.l, 10)
tau.best <- dl$samples.l$tau %>% filter(iter==dl$best.sample)
pca.df <-pca.l %>% reshape2::dcast(cell ~ PC) %>% mutate(cell=factor(cell, levels=cell.levels(dl))) %>% left_join(tau.best)
pca.plot <- ggplot(pca.df,
       aes(x=PC1, y=PC2, label=cell, color=tau, shape=capture)) +
    #geom_text() +
    geom_point() +
    scale_size_continuous(breaks=c(1,2)) +
    scale_colour_gradient(low="red", high="blue")
print(pca.plot)
ggsave('Windram-PCA.png', pca.plot)
#
# Check which tau has best log prob
extracted <- rstan::extract(dl$fit)
lapply(extracted, dim)
pars <- with(extracted, list(
    tau=tau[dl$best.sample,],
    psi=psi[dl$best.sample,],
    omega=omega[dl$best.sample,]))
if (dl$opts$model.estimates.cell.sizes) {
    pars$S <- extracted$S[dl$best.sample,]
}
best.tau <- tau.for.sample(dl)
desired.tau <- sort(best.tau)
fit <- make.fit.valid(dl)
pars$tau <- best.tau
print(rstan::log_prob(fit, rstan::unconstrain_pars(fit, pars), adjust_transform=FALSE))
pars$tau <- desired.tau
print(rstan::log_prob(fit, rstan::unconstrain_pars(fit, pars), adjust_transform=FALSE))

R version and packages used:

date()
sessionInfo()

JohnReid/DeLorean documentation built on Sept. 27, 2021, 5:45 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

JohnReid/DeLorean
Estimates Pseudotimes for Single Cell Expression Data

In JohnReid/DeLorean: Estimates Pseudotimes for Single Cell Expression Data

Data

Obfuscate time points

Estimate hyperparameters

Choose cells

Choose genes

Fit model

Analyse posterior

Profiles

Examine pseudotime

Roughness permutation test

Examine data and estimated tau using PCA

R Package Documentation

Browse R Packages

We want your feedback!

JohnReid/DeLorean Estimates Pseudotimes for Single Cell Expression Data

In JohnReid/DeLorean: Estimates Pseudotimes for Single Cell Expression Data

Data

Obfuscate time points

Estimate hyperparameters

Choose cells

Choose genes

Fit model

Analyse posterior

Profiles

Examine pseudotime

Roughness permutation test

Examine data and estimated tau using PCA

R Package Documentation

Browse R Packages

We want your feedback!

JohnReid/DeLorean
Estimates Pseudotimes for Single Cell Expression Data