knitr::opts_chunk$set(echo = TRUE)

Load packages

suppressPackageStartupMessages({
    library(scRNAseq)
    library(scuttle)
    library(scran)
    library(velociraptor)
    library(scater)
    library(cowplot)
    library(GGally)
    library(reticulate)
    library(SingleCellExperiment)
    library(basilisk)
})

Define virtual environment to use for reticulate

We use the velociraptor conda environment in order to run the scVelo commands also directly in python.

reticulate::use_virtualenv(basilisk:::.obtainEnvironmentPath(velociraptor:::velo.env))
sys <- reticulate::import("sys")
sys$version

Set temporary directory

Output objects (to move data from SCE to AnnData without using basilisk) are saved to a temporary directory.

tmpdir <- tempdir()

Prepare data

We use an example data set from scRNAseq, and subset to the first 500 cells. Next, we preprocess the data set by selecting highly variable genes and applying PCA and tSNE. The object is also saved to a h5ad file for use with scVelo directly in python.

## Load and subset data
sce <- scRNAseq::HermannSpermatogenesisData()[, 1:500]

## Add 'counts' assay
assay(sce, "counts") <- assay(sce, "spliced")

## Save to h5ad file for python scVelo analysis
zellkonverter::writeH5AD(sce, X_name = "counts", 
                         file=file.path(tmpdir, "anndata.h5ad"))

Run analysis directly in python

We use the same conda environment as in velociraptor to run the scVelo analysis directly in python, rather than via the R wrapper.

Steady-state model

import scanpy as sc
import sys
import numpy as np
import anndata
import scvelo as scv
import matplotlib
import pandas as pd
from pathlib import Path
from os import path
adata=anndata.read(r.tmpdir + "/anndata.h5ad")
scv.pp.filter_and_normalize(adata, enforce=True, n_top_genes=2000)
scv.pp.moments(adata)
scv.tl.velocity(adata, mode="steady_state")
scv.tl.velocity_graph(adata)
scv.tl.velocity_pseudotime(adata)
scv.tl.velocity_confidence(adata)
adata.write(r.tmpdir + "/anndata_withvelo_steadystate.h5ad")

Dynamical model

import scanpy as sc
import sys
import numpy as np
import anndata
import scvelo as scv
import matplotlib
import pandas as pd
adata=anndata.read(r.tmpdir + "/anndata.h5ad")
scv.pp.filter_and_normalize(adata, enforce=True, n_top_genes=2000)
scv.pp.moments(adata)
scv.tl.recover_dynamics(adata)
scv.tl.velocity(adata, mode="dynamical")
scv.tl.velocity_graph(adata)
scv.tl.velocity_pseudotime(adata)
scv.tl.latent_time(adata)
scv.tl.velocity_confidence(adata)
adata.write(r.tmpdir + "/anndata_withvelo_dynamical.h5ad")

Read back objects in R for later comparison

sceps <- zellkonverter::readH5AD(file.path(tmpdir, "anndata_withvelo_steadystate.h5ad"))
scepd <- zellkonverter::readH5AD(file.path(tmpdir, "anndata_withvelo_dynamical.h5ad"))

Process the data set

sce <- scuttle::logNormCounts(sce, assay.type=1)

## To use the HVGs determined by scVelo
# top.hvgs <- rownames(sceps)

## To get HVGs with scran
dec <- scran::modelGeneVar(sce)
top.hvgs <- scran::getTopHVGs(dec, n=2000)

set.seed(1)
sce <- scater::runPCA(sce, subset_row=top.hvgs)
sce <- scater::runTSNE(sce, dimred="PCA")

Helper function

The function below runs scVelo via the R wrapper, first using the pre-normalized data and PCA from the SingleCellExperiment object, and then with use_theirs=TRUE. It also performs some comparisons between the two results.

run_scv <- function(sce, top.hvgs, mode) {
    ## precomputed PCs and normalized values
    velo.out <- scvelo(
        sce, assay.X="counts", sf.X=sizeFactors(sce), 
        subset.row=top.hvgs, use.dimred="PCA",
        mode=mode
    )
    stopifnot(colnames(sce) == colnames(velo.out))
    sce$velocity_pseudotime <- velo.out$velocity_pseudotime

    ## using scVelo normalization, gene selection and PCA calculation
    velo.out_theirs <- scvelo(
        sce, assay.X="counts", use.theirs=TRUE, mode=mode,
        scvelo.params=list(filter_and_normalize=list(enforce=TRUE, 
                                                     n_top_genes=2000L))
    )
    stopifnot(colnames(sce) == colnames(velo.out_theirs))
    sce$velocity_pseudotime_theirs <- velo.out_theirs$velocity_pseudotime

    ## Inferred pseudotime
    print(cowplot::plot_grid(
        cowplot::plot_grid(
            plotTSNE(sce, colour_by="velocity_pseudotime"),
            plotTSNE(sce, colour_by="velocity_pseudotime_theirs"),
            nrow = 1),
        plotTSNE(sce, colour_by="celltype"), ncol = 1
    ))
    print(GGally::ggpairs(as.data.frame(colData(sce))[, c("velocity_pseudotime", 
                                                          "velocity_pseudotime_theirs")]))

    shared_genes <- Reduce(intersect, list(rownames(velo.out), rownames(velo.out_theirs)))
    message("Number of shared genes:")
    print(length(shared_genes))
    message("Ours:")
    print(table(shared = rownames(velo.out) %in% shared_genes,
                velogene = rowData(velo.out)$velocity_genes))
    message("Theirs:")
    print(table(shared = rownames(velo.out_theirs) %in% shared_genes,
                velogene = rowData(velo.out_theirs)$velocity_genes))
    message("Among shared genes, how many are velocity genes:")
    print(table(ours = rowData(velo.out[shared_genes, ])$velocity_genes,
                theirs = rowData(velo.out_theirs[shared_genes, ])$velocity_genes))

    ## Correlations of velocities
    velo_corrs <- diag(cor(assay(velo.out[shared_genes, ], "velocity"), 
                           assay(velo.out_theirs[shared_genes, colnames(velo.out)],
                                 "velocity"),
                           use = "pairwise.complete"))
    hist(velo_corrs, 100)

    ## Correlations of normalized spliced/unspliced values
    ms_corrs <- diag(cor(assay(velo.out[shared_genes, ], "Ms"), 
                         assay(velo.out_theirs[shared_genes, colnames(velo.out)],
                               "Ms"),
                         use = "pairwise.complete"))
    hist(ms_corrs, 100)
    mu_corrs <- diag(cor(assay(velo.out[shared_genes, ], "Mu"), 
                         assay(velo.out_theirs[shared_genes, colnames(velo.out)],
                               "Mu"),
                         use = "pairwise.complete"))
    hist(mu_corrs, 100)

    velo.out_theirs
}

Run scVelo

Dynamical model

sced <- run_scv(sce, top.hvgs=top.hvgs, mode="dynamical")

Compare our run with 'use_theirs=TRUE' to direct python calls

stopifnot(all(colnames(sced) == colnames(scepd)),
          all(rownames(sced) == rownames(scepd)))
## Correlations of velocities
velo_corrs <- diag(cor(assay(sced, "velocity"), 
                       assay(scepd, "velocity"),
                       use = "pairwise.complete"))
summary(velo_corrs)
plot(sced$velocity_pseudotime, scepd$velocity_pseudotime)
summary(abs(sced$velocity_pseudotime - scepd$velocity_pseudotime))
stopifnot(max(abs(sced$velocity_pseudotime - scepd$velocity_pseudotime)) < 1e-3)

Steady-state model

sces <- run_scv(sce, top.hvgs=top.hvgs, mode="steady_state")

Compare our run with 'use_theirs=TRUE' to direct python calls

stopifnot(all(colnames(sces) == colnames(sceps)),
          all(rownames(sces) == rownames(sceps)))
## Correlations of velocities
velo_corrs <- diag(cor(assay(sces, "velocity"), 
                       assay(sceps, "velocity"),
                       use = "pairwise.complete"))
summary(velo_corrs)
plot(sces$velocity_pseudotime, sceps$velocity_pseudotime)
summary(abs(sces$velocity_pseudotime - sceps$velocity_pseudotime))
stopifnot(max(abs(sces$velocity_pseudotime - sceps$velocity_pseudotime)) < 1e-3)

Session info

sessionInfo()


kevinrue/velociraptor documentation built on Feb. 12, 2024, 4:34 a.m.