knitr::opts_chunk$set( collapse = TRUE, comment = "#>", crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html )
## Track time spent on making the vignette startTime <- Sys.time() ## Bib setup library("RefManageR") ## Write bibliography information bib <- c( R = citation(), BiocStyle = citation("BiocStyle")[1], knitr = citation("knitr")[1], RefManageR = citation("RefManageR")[1], rmarkdown = citation("rmarkdown")[1], sessioninfo = citation("sessioninfo")[1], testthat = citation("testthat")[1], DeconvoBuddies = citation("DeconvoBuddies")[1] )
Inferring the composition of different cell types in a bulk RNA-seq data
Deconvolution is a analysis that aims to calculate the proportion of different cell types that make up a sample of bulk RNA-seq, based off of cell type gene expression profiles in a single cell/nuclei RNA-seq dataset.
![Use single cell data to infer the composition of bulk RNA-seq samples](http://research.libd.org/DeconvoBuddies/reference/figures/Deconvolution.png){width=50%}
| Approach | Method | Citation | Availability |
| ---------- | -------- | ------- | ----------- |
| weighted least squares | DWLS | Tsoucas et al, Nature Comm, 2019 | R Package Cran |
| Bias correction: Assay | Bisque | Jew et al, Nature Comm, 2020 | R Package github |
| Bias correction: Sourse | MuSiC | Wang et al, Nature Communications, 2019 | R Package github |
| Machine Learning | CIBERSORTx | Newman et al., Nature BioTech, 2019 | Webtool |
| Bayesian | BayesPrism | Chu et al., Nature Cancer, 2022 | Webtool/R Package |
| linear | Hspe | Hunt et al., Ann. Appl. Stat, 2021 | R package github |
We will be demonstrating how to use DeconvoBuddies
tools when applying
deconvolution with the Bisque
package.
DeconvoBuddies
toolsBisqueRNA
DeconvoBuddies
toolsR
is an open-source statistical environment which can be easily modified to enhance its functionality via packages. r Biocpkg("DeconvoBuddies")
is a R
package available via the Bioconductor repository for packages. R
can be installed on any operating system from CRAN after which you can install r Biocpkg("DeconvoBuddies")
by using the following commands in your R
session:
DeconvoBuddies
if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } BiocManager::install("DeconvoBuddies") ## Check that you have a valid Bioconductor installation BiocManager::valid()
## install Bisque from cran # install.packages("BisqueRNA") library("spatialLIBD") library("DeconvoBuddies") library("SummarizedExperiment") library("SingleCellExperiment") library("BisqueRNA") library("dplyr") library("tidyr") library("tibble") library("ggplot2")
Access the 110 sample Human DLPFC bulk RNA-seq dataset for LIBD. These samples
are from 19 tissue blocks, and 10 neurotypical adult donors. Samples were sequenced
with two different library_types
(polyA and RiboZeroGold), and three different
RNA_extraction
(Cyto, Total, Nuc), post quality control n=110 samples.
## use fetch deconvon data to load rse_gene rse_gene <- fetch_deconvo_data("rse_gene") rse_gene # lobstr::obj_size(rse_gene) # 41.16 MB ## Use Symbol as rownames rownames(rse_gene) <- rowData(rse_gene)$Symbol ## bulk RNA seq samples were sequenced with different library types, and RNA extractions table(rse_gene$library_type, rse_gene$library_prep)
This data is paired with a single nucleus RNA-seq data set from spatialLIBD
.
This dataset can be accessed with spatialLIBD::fetch_data()
.
## Use spatialLIBD to fetch the snRNA-seq dataset sce_path_zip <- fetch_deconvo_data("sce") sce_path <- unzip(sce_path_zip, exdir = tempdir()) sce <- HDF5Array::loadHDF5SummarizedExperiment( file.path(tempdir(), "sce_DLPFC_annotated")) # lobstr::obj_size(sce) # 172.28 MB ## exclude Ambiguous cell type sce <- sce[,sce$cellType_broad_hc != "Ambiguous"] sce$cellType_broad_hc <- droplevels(sce$cellType_broad_hc) dim(sce) ## Check the broad cell type distribution table(sce$cellType_broad_hc) ## We're going to subset to the first 5k genes to save memory sce <- sce[seq_len(5000),]
The cell type proportions from the RNAScope experiment used to evaluate the deconvolution estimations.
Key columns:
SAMPLE_ID
: DLPFC Tissue block + RNAScope combination.
Sample
: DLFPC Tissue block (Donor BrNum + DLPFC position).
cell_type
: The cell type measured.
n_cell
: the number of cells counted for the Sample and cell type.
prop
: the calculated cell type proportion from n_cell
head(DeconvoBuddies::RNAScope_prop) ## plot the RNAScope compositions plot_composition_bar(prop_long = RNAScope_prop, sample_col = "SAMPLE_ID", x_col = "SAMPLE_ID", add_text = FALSE) + facet_wrap(~Combo, nrow = 2, scales = "free_x")
DeconvoBuddies::get_mean_ratio
Mean Ratio marker finding
![Use Mean Ratio to find less noisy marker genes htan 1vALL](http://research.libd.org/DeconvoBuddies/reference/figures/get_mean_ratio.png)
marker_stats <- get_mean_ratio(sce, cellType_col = "cellType_broad_hc", gene_ensembl = "gene_id", gene_name = "gene_name") marker_stats |> group_by(cellType.target) |> slice(1)
plot_marker_express(sce, stats = marker_stats, cell_type = "Excit", cellType_col = "cellType_broad_hc", rank_col = "MeanRatio.rank", anno_col = "MeanRatio.anno", gene_col = "gene" )
marker_genes <- marker_stats |> filter(MeanRatio.rank <= 25 & gene %in% rownames(rse_gene)) marker_genes |> count(cellType.target) marker_genes <- marker_genes |> pull(gene)
convert to ExpressionSet
format, filter for cells with no counts across marker genes
exp_set_bulk <- Biobase::ExpressionSet( assayData = assays(rse_gene[marker_genes, ])$counts, phenoData = AnnotatedDataFrame( as.data.frame(colData(rse_gene))[c("SAMPLE_ID")] ) ) exp_set_sce <- Biobase::ExpressionSet( assayData = as.matrix(assays(sce[marker_genes, ])$counts), phenoData = AnnotatedDataFrame( as.data.frame(colData(sce))[, c("cellType_broad_hc", "BrNum")] ) ) ## check for nuclei with 0 marker expression zero_cell_filter <- colSums(exprs(exp_set_sce)) != 0 message("Exclude ", sum(!zero_cell_filter), " cells") exp_set_sce <- exp_set_sce[, zero_cell_filter]
est_prop <- ReferenceBasedDecomposition( bulk.eset = exp_set_bulk, sc.eset = exp_set_sce, cell.types = "cellType_broad_hc", subject.names = "BrNum", use.overlap = FALSE )
est_prop$bulk.props <- t(est_prop$bulk.props) head(est_prop$bulk.props)
DeconvoBuddies
toolsPlot Composition Barplots of Bisque predicted compositions
## add Phenotype data to proportion estimates pd <- colData(rse_gene) |> as.data.frame() |> select(SAMPLE_ID, Sample, library_combo) ## make proption estimates long so they are ggplot friendly prop_long <- est_prop$bulk.props |> as.data.frame() |> tibble::rownames_to_column("SAMPLE_ID") |> tidyr::pivot_longer(!SAMPLE_ID, names_to = "cell_type", values_to = "prop") |> left_join(pd) ## create composition bar plots ## Average by sample plot_composition_bar(prop_long = prop_long, sample_col = "SAMPLE_ID", x_col = "Sample", add_text = FALSE) ## for all library preparations by sample n=110 plot_composition_bar(prop_long = prop_long, sample_col = "SAMPLE_ID", x_col = "SAMPLE_ID", add_text = FALSE) + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())
Note to compare the deconvolution results to the RNAScope proportions, Oligo and OPC need to be added together.
prop_long_opc <- prop_long |> mutate(cell_type = gsub("Oligo|OPC", "OligoOPC", cell_type)) |> group_by(SAMPLE_ID, Sample, library_combo, cell_type) |> summarize(prop = sum(prop)) |> ungroup() prop_long_opc |> count(cell_type) ## Join RNAScope and Bisque Proportions prop_compare <- prop_long_opc |> inner_join(RNAScope_prop |> select(Sample, cell_type, prop_RNAScope = prop, prop_sn), by = c("Sample", "cell_type"))
Calculate the correlation plot a scatter plot of the proportions
## correlation with RNAScope proportions cor(prop_compare$prop, prop_compare$prop_RNAScope) ## Scatter plot with RNAScope proportions prop_compare |> ggplot(aes(x = prop_RNAScope, y = prop, color = cell_type, shape = library_combo)) + geom_point() + geom_abline() ## correlation with snRNA-seq proportion cor(prop_compare$prop, prop_compare$prop_sn) ## Scatter plot with RNAScope proportions prop_compare |> ggplot(aes(x = prop_sn, y = prop, color = cell_type, shape = library_combo)) + geom_point() + geom_abline()
hspe
hspe
is downloadable as a tar.gz but
can't be shown on this vignettes
Code to prep-input and run hspe pseudobulk the snRNA-seq data (not run here)
if(FALSE){ sce_pb <- spatialLIBD::registration_pseudobulk(sce, var_registration = "cellType_broad_hc", var_sample_id = "Sample") ## mixture samples is the gene expression from the bulk rse_gene mixture_samples = t(assays(rse_gene)$logcounts) mixture_samples[1:5, 1:5] ## pure samples is a vector of indexes of the differnt cell types pure_samples = rafalib::splitit(sce_pb$cellType_broad_hc) ## refrence samples is the pseudobulked logcounts reference_samples = t(assays(sce_pb)$logcounts) reference_samples[1:5, 1:5] ncol(mixture_samples) == ncol(reference_samples) est_prop_hspe = hspe(Y = mixture_samples, reference = reference_samples, pure_samples = pure_samples, markers = marker_genes, seed = 10524) }
The r Biocpkg("DeconvoBuddies")
package r Citep(bib[["DeconvoBuddies"]])
was made possible thanks to:
r Citep(bib[["R"]])
r Biocpkg("BiocStyle")
r Citep(bib[["BiocStyle"]])
r CRANpkg("knitr")
r Citep(bib[["knitr"]])
r CRANpkg("RefManageR")
r Citep(bib[["RefManageR"]])
r CRANpkg("rmarkdown")
r Citep(bib[["rmarkdown"]])
r CRANpkg("sessioninfo")
r Citep(bib[["sessioninfo"]])
r CRANpkg("testthat")
r Citep(bib[["testthat"]])
This package was developed using r BiocStyle::Biocpkg("biocthis")
.
Code for creating the vignette
## Create the vignette library("rmarkdown") system.time(render("Deconvolution_Benchmark_DLPFC.Rmd", "BiocStyle::html_document")) ## Extract the R code library("knitr") knit("Deconvolution_Benchmark_DLPFC.Rmd", tangle = TRUE)
Date the vignette was generated.
## Date the vignette was generated Sys.time()
Wallclock time spent generating the vignette.
## Processing time in seconds totalTime <- diff(c(startTime, Sys.time())) round(totalTime, digits = 3)
R
session information.
## Session info library("sessioninfo") options(width = 120) session_info()
This vignette was generated using r Biocpkg("BiocStyle")
r Citep(bib[["BiocStyle"]])
with r CRANpkg("knitr")
r Citep(bib[["knitr"]])
and r CRANpkg("rmarkdown")
r Citep(bib[["rmarkdown"]])
running behind the scenes.
Citations made with r CRANpkg("RefManageR")
r Citep(bib[["RefManageR"]])
.
## Print bibliography PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.