Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE
)
## ----eval=FALSE---------------------------------------------------------------
# # Bioconductor must be installed +/- updated first
# BiocManager::install(version = "3.xx") # set to latest version
#
# # minimum necessary Bioconductor packages to install cellGeometry package
# BiocManager::install(c("ensembldb", "DelayedArray"))
#
# # packages needed to read h5ad files
# BiocManager::install(c("zellkonverter", "rhdf5", "HDF5Array"))
#
# # optional, if you are using Seurat
# install.packages("Seurat")
#
# # package needed to convert ensembl gene ids to symbols
# BiocManager::install("AnnotationHub")
## ----eval=FALSE---------------------------------------------------------------
# install.packages("cellGeometry")
## ----eval=FALSE---------------------------------------------------------------
# devtools::install_github("myles-lewis/cellGeometry")
## ----out.width='100%', echo=FALSE---------------------------------------------
knitr::include_graphics("workflow.png")
## ----eval=FALSE---------------------------------------------------------------
# library(zellkonverter)
# library(SingleCellExperiment)
# library(cellGeometry)
#
# typist_h5 <- readH5AD("2ac906a5-9725-4258-8e36-21a9f6c0302a.h5ad",
# use_hdf5 = TRUE, reader = "R")
## ----eval=FALSE---------------------------------------------------------------
# mat <- typist_h5@assays@data$X
# rownames(mat) <- rownames(typist_h5)
# meta <- typist_h5@colData@listData
## ----eval=FALSE---------------------------------------------------------------
# library(Seurat)
# typist <- readRDS("2ac906a5-9725-4258-8e36-21a9f6c0302a.rds") # 15.5 GB in memory
#
# mat <- typist@assays$RNA$counts
# meta <- typist@meta.data
## ----eval=FALSE---------------------------------------------------------------
# table(meta$Majority_voting_CellTypist)
#
# subcl <- meta$Majority_voting_CellTypist
# cellgrp <- meta$Majority_voting_CellTypist_high
#
# # reduce dataset to only blood (optional)
# subcl[meta$tissue != "blood"] <- NA
# cellgrp[meta$tissue != "blood"] <- NA
## ----eval=FALSE---------------------------------------------------------------
# mk <- cellMarkers(mat, subclass = subcl, cellgroup = cellgrp,
# dual_mean = TRUE, cores = 2)
## ----eval=FALSE---------------------------------------------------------------
# # example code using future for parallelisation on windows
# library(future)
# plan(multisession, workers = 4)
#
# mk <- cellMarkers(mat, subclass = subcl, cellgroup = cellgrp,
# use_future = TRUE)
## ----eval=FALSE---------------------------------------------------------------
# library(AnnotationHub)
# ah <- AnnotationHub()
# ensDb_v110 <- ah[["AH113665"]]
# mk <- gene2symbol(mk, ensDb_v110)
## ----eval=FALSE---------------------------------------------------------------
# signature_heatmap(mk) # visualise whole signature
# signature_heatmap(mk, top = 5) # show top 5 genes for each subclass
## ----out.width='90%', echo=FALSE----------------------------------------------
knitr::include_graphics("typist_sig.png")
## ----eval=FALSE---------------------------------------------------------------
# spillover_heatmap(mk)
## ----out.width='80%', echo=FALSE----------------------------------------------
knitr::include_graphics("typist_spillover.png")
## ----eval=FALSE---------------------------------------------------------------
# cs <- cos_similarity(mk)
# ComplexHeatmap::Heatmap(cs)
## ----out.width='80%', echo=FALSE----------------------------------------------
knitr::include_graphics("typist_cos_sim.png")
## ----eval=FALSE---------------------------------------------------------------
# rank_angle(cs)
## ----eval=FALSE---------------------------------------------------------------
# diagnose(mk)
## ----eval=FALSE---------------------------------------------------------------
# mk <- updateMarkers(mk,
# remove_subclass = c("Helper T cells", "Cytotoxic T cells"))
## ----eval=FALSE---------------------------------------------------------------
# # simulated bulk
# set.seed(3)
# sim_counts <- generate_samples(mk, 25)
# sim_percent <- sim_counts / rowSums(sim_counts) * 100
## ----eval=FALSE---------------------------------------------------------------
# # sample from original sc count matrix
# # 1.43 mins (Intel); 45 secs (ARM)
# set.seed(99)
# times <- 1 # can be increased
# sim_sampled <- simulate_bulk(mat, sim_counts, subcl, times = times)
#
# # fix rownames
# rownames(sim_sampled) <- gene2symbol(rownames(sim_sampled), ensDb_v110)
## ----eval=FALSE---------------------------------------------------------------
# # near optimal deconvolution of counts sampled from the original scRNA-Seq
# fit <- deconvolute(mk, sim_sampled,
# use_filter = FALSE, arith_mean = TRUE)
#
# # plot results - multiple scatter plots
# plot_set(sim_counts, fit$subclass$output / times) # adjust for oversampling using `times`
# plot_set(sim_percent, fit$subclass$percent)
#
# # alternative plot - single scatter plot
# plot_pred(sim_counts, fit$subclass$output / times)
#
# # stats table
# metric_set(sim_percent, fit$subclass$percent)
## ----out.width='90%', echo=FALSE----------------------------------------------
knitr::include_graphics("typist_sim.png")
## ----eval=FALSE---------------------------------------------------------------
# mk <- updateMarkers(mk, bulkdata = my_bulk_matrix)
## ----eval=FALSE---------------------------------------------------------------
# res <- tune_deconv(mk, sim_sampled, sim_counts * times,
# grid = list(nsubclass = c(5, 10, 15, 25, 50, 100, 200, 500, 1000),
# expfilter = c(0.05, 0.1, 0.25, 0.5, 0.75, 1, 1.5),
# weight_method = c("none", "equal")),
# arith_mean = TRUE,
# use_filter = FALSE)
#
# ## Tuning parameters: nsubclass, expfilter, weight_method
# ## |==========================================================| 100%, Elapsed 00:07
# ## Best tune:
# ## nsubclass expfilter weight_method mean.RMSE
# ## 1000 0.25 equal 35.21
## ----eval=FALSE---------------------------------------------------------------
# plot_tune(res, xvar = "nsubclass", group = "weight_method")
# plot_tune(res, xvar = "nsubclass", group = "expfilter")
# plot_tune(res, xvar = "expfilter", group = "weight_method")
# plot_tune(res, xvar = "expfilter", group = "nsubclass")
## ----out.width='90%', echo=FALSE----------------------------------------------
knitr::include_graphics("typist_tune.png")
## ----eval=FALSE---------------------------------------------------------------
# # simple gaussian noise applied to counts
# sim_noise <- add_noise(sim_sampled)
#
# # noise applied to log2 counts
# sim_noise <- log_noise(sim_sampled)
#
# # noise applied to sqrt transformed counts
# sim_noise <- sqrt_noise(sim_sampled)
#
# # whole genes are scaled up/down by a random amount
# # this simulates differences in chemistry
# sim_noise <- shift_noise(sim_sampled)
## ----eval=FALSE---------------------------------------------------------------
# res2 <- tune_deconv(mk, sim_noise, sim_counts,
# grid = list(nsubclass = c(5, 10, 15, 25, 50, 100, 200, 500, 1000),
# expfilter = c(0.05, 0.1, 0.25, 0.5, 0.75, 1, 1.5),
# weight_method = c("none", "equal")),
# arith_mean = TRUE,
# use_filter = FALSE)
## ----eval=FALSE---------------------------------------------------------------
# # default method using var.e (see paper for details)
# se1 <- se(fit)
# head(se1)
#
# # ordinary least squares
# se2 <- se(fit, type = "OLS")
## ----eval=FALSE---------------------------------------------------------------
# # default method using var.e (see paper for details)
# ci1 <- confint(fit)
# str(ci1)
#
# # ordinary least squares
# ci2 <- confint(fit, type = "OLS")
## ----eval=FALSE---------------------------------------------------------------
# r <- residuals(fit)
#
# # full table including all genes
# r1 <- residuals(fit, test = sim_sampled)
## ----eval=FALSE---------------------------------------------------------------
# # fitted values for bulk on x axis
# plot_residuals(fit)
#
# # actual bulk gene expression - good for spotting outliers with massive expression
# plot_residuals(fit, sim_sampled)
# plot_residuals(fit, sim_sampled, type = "raw")
# plot_residuals(fit, sim_sampled, type = "student")
## ----eval=FALSE---------------------------------------------------------------
# rs1 <- rstandard(fit)
# rs2 <- rstudent(fit)
# cooksd <- cooks.distance(fit)
## ----eval=FALSE---------------------------------------------------------------
# sim_noise <- shift_noise(sim_sampled)
# fit2 <- deconvolute(mk, sim_noise, npass = 4,
# use_filter = FALSE, arith_mean = TRUE)
## ----eval=FALSE---------------------------------------------------------------
# fit2 <- deconvolute(mk, sim_noise,
# use_filter = FALSE, arith_mean = TRUE, comp_check = TRUE)
#
# # show minimum deconvolution values across all samples to see where comp cut-offs occur
# plot_comp(fit2)
#
# # show coefficient paths for a single subclass (across all samples)
# plot_path(fit2)
#
# # show coef paths as cell percentages
# plot_path(fit2, type = "percent")
#
# # show coef paths for the first bulk sample (across all subclasses)
# plot_path(fit2, sample = 1)
# plot_path(fit2, sample = 1, type = "percent")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.