Examine the computational complexity of the model

#
# Execute this block to render the Rmarkdown.
#
devtools::load_all('../..')
devtools::load_all('../../../DeLoreanData/')
rmarkdown::render('complexity.Rmd')

Load data

Load expression data and meta data.

library(dplyr)
library(ggplot2)
library(DeLorean)
library(DeLoreanData)
data(ShalekDeLorean)
dl <- de.lorean(
    shalek.A.expr,
    shalek.A.gene.meta,
    shalek.A.cell.meta)
dl$cell.meta <- mutate(dl$cell.meta,
                       precocious=cell %in% c('LPS_1h_S51', 'LPS_1h_S52'))
time.course.cells <- (
    dl$cell.meta
    %>% filter(! is.na(total),
               "" == assay,
               "LPS" == stimulant | "" == stimulant,
               "" == ko,
               FALSE == disrupted,
               total > 1e6,
               "" == replicate))
dl <- filter_cells(dl, cells=time.course.cells$cell)
dl$cell.meta$cell <- factor(
    dl$cell.meta$cell,
    levels=(shalek.A.cell.meta %>% arrange(capture))$cell)
dl <- adjust.by.cell.sizes(dl)
induced.genes <- dl$gene.meta %>% filter(! is.na(cluster))
shalek.key.genes <- unique(toupper(c(
    #
    # Cluster I d (core antiviral module; enriched for annotated antiviral and
    #             interferon response genes; for example,-
    "Ifit1", "Irf7",
    #
    # Cluster III c (peaked inflammatory module; showing rapid,
    # yet transient, induction under LPS; for example,
    "Tnf", "Il1a", "Cxcl2",
    #
    # Cluster III d (sustained inflammatory module; exhibiting
    # continued rise in expression under LPS; for example,
    "Mmp14", "Marco", "Il6",
    #
    # Cluster III b (‘maturity’ module; containing markers of
    # dendritic cell maturation; for example,
    "Cd83", "Ccr7", "Ccl22",
    #
    # At 2 h following LPS,
    "Ifnb1",
    # was bimodally expressed
    #
    # Genes encoding key inflammatory cytokines (for example,
    "Tnf", "Cxcl1",
    #
    # Figure 4: core antiviral targets.
    "Rsad2", "Stat2"
)))
clusters <- c("Id", "IIIb", "IIIc", "IIId")
# clusters <- c("Id")
dl <- analyse.variance(dl, adjust.cell.sizes=TRUE)
gene.variances <- dl$gene.var  # Save copy for later
max.genes <- getOption("Shalek.max.genes", 74)
genes.for.stan <- (
    dl$gene.var
    %>% left_join(dl$gene.meta)
    %>% mutate(key=gene %in% shalek.key.genes)
    %>% filter(cluster %in% clusters)
    %>% arrange(- Psi / Omega)
    %>% head(max.genes))
dl <- filter_genes(dl, genes=genes.for.stan$gene)
print(object.size(dl))
print(sapply(dl, object.size))

Time model fitting

seed <- getOption("complexity.seed", 1)
set.seed(seed)
number.of.cells <- getOption("complexity.num.cells", c(10, 18, 30, 55, 100, 180, 300))
number.of.genes <- getOption("complexity.num.genes", c(10, 20, 30, 40, 50, 60, 70))
set.seed(seed)
default.num.genes <- 30
model.names <- getOption('complexity.model.names', c("exact", "lowrank"))
num.inits <- getOption("complexity.num.inits", default.num.cores())
inf.method <- getOption("complexity.method", "vb")
time.fit <- function(dl) {
    dl.2 <- estimate.hyper(
        dl,
        sigma.tau=getOption("Shalek.sigma.tau", 1),
        length.scale=getOption("Shalek.length.scale", 5),
        model.name=model.name)
    dl.2 <- prepare.for.stan(dl.2)
    dl.2 <- compile.model(dl.2)
    dl.2 <- find.good.ordering(dl.2, seriation.find.orderings)
    dl.2 <- pseudotimes.from.orderings(dl.2, num.to.keep=num.inits)
    system.time(dl.2 <- fit.model(dl.2, method=inf.method, num.inits=num.inits))
}
cell.timings <- NULL
gene.timings <- NULL
for (model.name in model.names) {
    cell.timings <- rbind(
        cell.timings,
        as.data.frame(t(sapply(
            number.of.cells,
            function(num.cells) time.fit(filter_cells(dl, number=num.cells))
        ))) %>%
        mutate(
            num.genes=dim(dl)[1],
            num.cells=number.of.cells,
            data="Shalek",
            model.name=model.name,
            inf.method=inf.method,
            seed=seed))
    gene.timings <- rbind(
        gene.timings,
        as.data.frame(t(sapply(
            number.of.genes,
            function(num.genes) time.fit(filter_cells(filter_genes(dl, number=num.genes), number=default.num.genes))
        ))) %>%
        mutate(
            num.genes=number.of.genes,
            num.cells=default.num.genes,
            data="Shalek",
            model.name=model.name,
            inf.method=inf.method,
            seed=seed))
}
saveRDS(list(cell=cell.timings, gene=gene.timings), 'Data/timings.rds')

Plot timings

The model name is "r model.name", the inference method is "r inf.method" with r num.inits initialisations and the seed is r seed.

ggplot(gene.timings, aes(x=num.genes, y=elapsed, color=model.name)) + geom_line()
ggplot(cell.timings, aes(x=num.cells, y=elapsed, color=model.name)) + geom_line()

Session information

date()

R version and packages used:

devtools::session_info()


Try the DeLorean package in your browser

Any scripts or data that you put into this service are public.

DeLorean documentation built on May 2, 2019, 9:24 a.m.