library(Matrix)
library(spacexr)
library(spASE)
Registered S3 method overwritten by 'spASE':
method from
merge.RCTD.objects spacexr
Attaching package: 'spASE'
The following objects are masked from 'package:spacexr':
aggregate_cell_types, build.designmatrix.intercept,
build.designmatrix.nonparam, build.designmatrix.regions,
build.designmatrix.single, choose_sigma_c, convert.old.RCTD,
count_cell_types, create_RCTD_plots, create.RCTD,
create.RCTD.replicates, CSIDE.population.inference,
exvar.celltocell.interactions, exvar.point.density, fitBulk,
fitPixels, get_cell_type_info, get_de_genes, get_doublet_weights,
get_norm_ref, get_standard_errors, import_weights,
make_all_de_plots, make_de_plots_genes, make_de_plots_quant,
make_de_plots_regions, make_de_plots_replicates,
make_de_plots_spatial, normalize_weights, plot_all_cell_types,
plot_class, plot_cond_occur, plot_doub_occur_stack, plot_doublets,
plot_doublets_type, plot_gene_raw, plot_gene_regions,
plot_gene_two_regions, plot_occur_unthreshold,
plot_prediction_gene, plot_puck_continuous, plot_puck_wrapper,
plot_weights, plot_weights_doublet, plot_weights_unthreshold,
process_beads_batch, process_data, read.SpatialRNA,
read.VisiumSpatialRNA, Reference, restrict_counts, restrict_puck,
run.CSIDE, run.CSIDE.general, run.CSIDE.intercept,
run.CSIDE.nonparam, run.CSIDE.regions, run.CSIDE.replicates,
run.CSIDE.single, run.RCTD, run.RCTD.replicates,
save.CSIDE.replicates, set_cell_types_assigned,
set_likelihood_vars, SpatialRNA, write_de_summary
library(ggplot2)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(tidyr)
Attaching package: 'tidyr'
The following objects are masked from 'package:Matrix':
expand, pack, unpack
library(Rfast)
Loading required package: Rcpp
Loading required package: RcppZiggurat
Attaching package: 'Rfast'
The following object is masked from 'package:dplyr':
nth
library(data.table)
Attaching package: 'data.table'
The following object is masked from 'package:Rfast':
transpose
The following objects are masked from 'package:dplyr':
between, first, last
library(ggthemes)
library(latex2exp)
library(wesanderson)
source('../R/spase_sim_utils.R')
cere3 <- readRDS('results/results_celltype_cere_3_df_5.rds')
cere4 <- readRDS('results/results_celltype_cere_4_visium_df_5.rds')
hippo1 <- readRDS('results/results_celltype_hippo_1_df_5.rds')
hippo2 <- readRDS('results/results_celltype_hippo_2_df_5.rds')
hippo3 <- readRDS('results/results_celltype_hippo_3_df_5.rds')
hippo1@de_results$gene_fits$mean_val |>
melt() |>
mutate(sample = 'Hippocampus 1 (SS)') |>
bind_rows(hippo2@de_results$gene_fits$mean_val |>
melt() |>
mutate(sample = 'Hippocampus 2 (SS)')) |>
bind_rows(hippo3@de_results$gene_fits$mean_val |>
melt() |>
mutate(sample = 'Hippocampus 3 (SS)')) |>
ggplot(aes(x = value)) +
geom_histogram(bins=50) +
facet_grid(Var2 ~ sample) +
theme_minimal() +
xlim(c(-5,5)) +
xlab('Estimated coefficient (logit scale)')
Warning in melt(hippo1@de_results$gene_fits$mean_val): The melt generic in
data.table has been passed a matrix and will attempt to redirect to the
relevant reshape2 method; please note that reshape2 is deprecated, and this
redirection is now deprecated as well. To continue using melt methods from
reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
namespace like reshape2::melt(hippo1@de_results$gene_fits$mean_val). In the
next version, this warning will become an error.
Warning in melt(hippo2@de_results$gene_fits$mean_val): The melt generic in
data.table has been passed a matrix and will attempt to redirect to the
relevant reshape2 method; please note that reshape2 is deprecated, and this
redirection is now deprecated as well. To continue using melt methods from
reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
namespace like reshape2::melt(hippo2@de_results$gene_fits$mean_val). In the
next version, this warning will become an error.
Warning in melt(hippo3@de_results$gene_fits$mean_val): The melt generic in
data.table has been passed a matrix and will attempt to redirect to the
relevant reshape2 method; please note that reshape2 is deprecated, and this
redirection is now deprecated as well. To continue using melt methods from
reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
namespace like reshape2::melt(hippo3@de_results$gene_fits$mean_val). In the
next version, this warning will become an error.
Warning: Removed 4571 rows containing non-finite values (`stat_bin()`).
ggsave('figures/03_cside_estimates_hippo.pdf', height=6, width=5)
Warning: Removed 4571 rows containing non-finite values (`stat_bin()`).
cere3@de_results$gene_fits$mean_val |>
melt() |>
mutate(sample = 'Cerebellum 3 (SS)') |>
bind_rows(cere4@de_results$gene_fits$mean_val |>
melt() |>
mutate(sample = 'Cerebellum 4 (V)')) |>
ggplot(aes(x = value)) +
geom_histogram(bins=50) +
facet_grid(Var2 ~ sample, scales='free') +
theme_minimal() +
xlim(c(-5,5)) +
xlab('Estimated coefficient (logit scale)')
Warning in melt(cere3@de_results$gene_fits$mean_val): The melt generic in
data.table has been passed a matrix and will attempt to redirect to the
relevant reshape2 method; please note that reshape2 is deprecated, and this
redirection is now deprecated as well. To continue using melt methods from
reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
namespace like reshape2::melt(cere3@de_results$gene_fits$mean_val). In the next
version, this warning will become an error.
Warning in melt(cere4@de_results$gene_fits$mean_val): The melt generic in
data.table has been passed a matrix and will attempt to redirect to the
relevant reshape2 method; please note that reshape2 is deprecated, and this
redirection is now deprecated as well. To continue using melt methods from
reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
namespace like reshape2::melt(cere4@de_results$gene_fits$mean_val). In the next
version, this warning will become an error.
Warning: Removed 7200 rows containing non-finite values (`stat_bin()`).
ggsave('figures/03_cside_estimates_cere.pdf', height=6, width=3)
Warning: Removed 7200 rows containing non-finite values (`stat_bin()`).
hippo1@de_results$gene_fits$s_mat |>
melt() |>
mutate(sample = 'Hippo 1 (SS)') |>
bind_rows(hippo2@de_results$gene_fits$s_mat |>
melt() |>
mutate(sample = 'Hippo 2 (SS)')) |>
bind_rows(hippo3@de_results$gene_fits$s_mat |>
melt() |>
mutate(sample = 'Hippo 3 (SS)')) |>
bind_rows(cere3@de_results$gene_fits$s_mat |>
melt() |>
mutate(sample = 'Cerebellum 3 (SS)')) |>
bind_rows(cere4@de_results$gene_fits$s_mat |>
melt() |>
mutate(sample = 'Cerebellum 4 (V)')) |>
mutate(sample = factor(sample, levels = c('Hippo 1 (SS)', 'Hippo 2 (SS)', 'Hippo 3 (SS)', 'Cerebellum 3 (SS)', 'Cerebellum 4 (V)'))) |>
ggplot(aes(x = value)) +
geom_histogram(bins=100) +
theme_minimal() +
xlim(c(0,10)) +
facet_wrap(sample ~ ., scales='free') +
xlab('Variance of estimated coefficient (logit scale)')
Warning in melt(hippo1@de_results$gene_fits$s_mat): The melt generic in
data.table has been passed a matrix and will attempt to redirect to the
relevant reshape2 method; please note that reshape2 is deprecated, and this
redirection is now deprecated as well. To continue using melt methods from
reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
namespace like reshape2::melt(hippo1@de_results$gene_fits$s_mat). In the next
version, this warning will become an error.
Warning in melt(hippo2@de_results$gene_fits$s_mat): The melt generic in
data.table has been passed a matrix and will attempt to redirect to the
relevant reshape2 method; please note that reshape2 is deprecated, and this
redirection is now deprecated as well. To continue using melt methods from
reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
namespace like reshape2::melt(hippo2@de_results$gene_fits$s_mat). In the next
version, this warning will become an error.
Warning in melt(hippo3@de_results$gene_fits$s_mat): The melt generic in
data.table has been passed a matrix and will attempt to redirect to the
relevant reshape2 method; please note that reshape2 is deprecated, and this
redirection is now deprecated as well. To continue using melt methods from
reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
namespace like reshape2::melt(hippo3@de_results$gene_fits$s_mat). In the next
version, this warning will become an error.
Warning in melt(cere3@de_results$gene_fits$s_mat): The melt generic in
data.table has been passed a matrix and will attempt to redirect to the
relevant reshape2 method; please note that reshape2 is deprecated, and this
redirection is now deprecated as well. To continue using melt methods from
reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
namespace like reshape2::melt(cere3@de_results$gene_fits$s_mat). In the next
version, this warning will become an error.
Warning in melt(cere4@de_results$gene_fits$s_mat): The melt generic in
data.table has been passed a matrix and will attempt to redirect to the
relevant reshape2 method; please note that reshape2 is deprecated, and this
redirection is now deprecated as well. To continue using melt methods from
reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
namespace like reshape2::melt(cere4@de_results$gene_fits$s_mat). In the next
version, this warning will become an error.
Warning: Removed 32168 rows containing non-finite values (`stat_bin()`).
Warning: Removed 10 rows containing missing values (`geom_bar()`).
ggsave('figures/03_cside_estimate_variances.pdf', height=3, width=5)
Warning: Removed 32168 rows containing non-finite values (`stat_bin()`).
Removed 10 rows containing missing values (`geom_bar()`).
gene <- 'Cpe'
myRCTD <- cere4
puck <- myRCTD@originalSpatialRNA
weights <- myRCTD@results$weights
my_beta <- as.matrix(sweep(weights, 1, rowSums(weights), '/')) # Cell type weights from full mode RCTD
thresh <- 0.8
mean_val <- myRCTD@de_results$gene_fits$mean_val # Cell type mean gene expression from CSIDE
mean_val <- matrix(pmax(-10,pmin(10,mean_val)),
nrow = nrow(mean_val),
ncol=ncol(mean_val),
dimnames = dimnames(mean_val))# numerical stability
mean_val <- mean_val[,-5] # remove Bergmann
# find genes with well-behaved estimates i.e. low variance
tot <- rowSums(myRCTD@spatialRNA@maternalCounts) + rowSums(myRCTD@spatialRNA@paternalCounts)
tot <- tot[rownames(mean_val)]
genes <- rownames(mean_val)[which(rowVars(mean_val)<5 & tot>100)]
gene <- 'Chd7'
alphas <- sweep(my_beta[,colnames(mean_val)], 2, exp(mean_val[gene,]), '*')
alphas <- sweep(alphas, 1, rowSums(alphas), '/')
alphas |>
melt() |>
ggplot(aes(x = value)) +
geom_histogram() +
theme_minimal() +
facet_wrap(Var2 ~ .)
Warning in melt(alphas): The melt generic in data.table has been passed a
matrix and will attempt to redirect to the relevant reshape2 method; please
note that reshape2 is deprecated, and this redirection is now deprecated as
well. To continue using melt methods from reshape2 while both libraries are
attached, e.g. melt.list, you can prepend the namespace like
reshape2::melt(alphas). In the next version, this warning will become an error.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Granule has spatial effect. All other cell types are 50/50.
set.seed(9)
df <- 5
cell_type <- 'Granule'
X2 <- build.designmatrix.nonparam(myRCTD, df = df)
barcodes <- rownames(X2)
coords <- myRCTD@spatialRNA@coords[barcodes,]
coef <- rnorm(df, 0, 2)
p <- expit(X2%*%coef)
cbind(coords, p) |>
cbind(weights |> as.matrix()) |>
filter(Granule > 0.2) |>
ggplot(aes(x = x, y = y)) +
geom_point(aes(color = p)) +
scale_color_gradient2(low = 'blue', mid = 'white', high = 'red', midpoint = 0.5, limits = c(0,1)) +
theme_void() +
theme(legend.position = 'none')
ggsave('figures/03_granule_sim_p.pdf', height=2, width=3)
cbind(coords, p) |>
cbind(weights |> as.matrix()) |>
ggplot(aes(x = x, y = y)) +
geom_point(aes(color = Granule)) +
scale_color_gradient(low = 'white', high = 'forestgreen', limits = c(0,1)) +
theme_void() +
theme(legend.position = 'none')
ggsave('figures/03_granule_weight.pdf', height=3, width=3)
# Generate raw counts
## Compute p_ij
p_ij <- numeric(nrow(alphas))
for (k in 1:ncol(alphas)) {
if (colnames(alphas)[k] == cell_type) {
p_ij <- p_ij + alphas[,k]*expit(X2%*%coef)
} else {
p_ij <- p_ij + alphas[,k]*expit(0) # expit(0) = 0.5
}
}
## Fix overdispersion phi_j
phi_j <- 0.3
## Convert to Beta(a,b)
a <- p_ij*(1-phi_j)/phi_j
b <- (1-phi_j)*(1-p_ij)/phi_j
lambd_ij <- rbeta(length(a), a, b)
## Binomial sampling
N_ij <- myRCTD@spatialRNA@maternalCounts[gene,barcodes]+myRCTD@spatialRNA@paternalCounts[gene,barcodes]+20
y_ij <- rbinom(length(a), size = N_ij, prob = lambd_ij)
cbind(coords, data.frame(yij = y_ij, nij = N_ij)) |>
filter(nij > 0) |>
filter(y > 7500) |>
ggplot(aes(x = x, y = y)) +
geom_point(aes(color = yij/nij),size=0.5) +
scale_color_gradient2(low = 'blue', mid = 'white', high = 'red', midpoint = 0.5, limits = c(0,1)) +
theme_void() +
theme(legend.position='none')
ggsave('figures/03_Chd7_sim_counts.pdf', height=1, width=2)
The parameters that change are
Here I write csvs that have all combinations of the above that get read by an sbatch script to the cluster. Results/stats that we are interested in are:
set.seed(13579)
cell_types <- c('Granule', 'MLI2','Fibroblast','Oligodendrocytes')
phis <- c(0.25, 0.5, 0.75)
add_count <- c(0, 1, 2, 5, 25)
genes <- find_genes(cere4, cell_types)
niter <- 10
params <- expand.grid(cell_types, phis, add_count, genes)
params <- sapply(params, rep, each = niter)
params <- cbind(params, seq(1,nrow(params)))
colnames(params) <- c('cell_type', 'overdispersion', 'add_count', 'gene', 'seed')
params <- as.data.frame(params)
params$cell_type <- factor(params$cell_type, labels = cell_types)
params$gene <- factor(params$gene, labels = genes)
write.table(params, file = 'simulations_cere4_params.tsv', quote = F, sep = '\t',
row.names = F, col.names = T)
set.seed(13579)
cell_types <- c('Fibroblast', 'Granule', 'MLI2', 'Purkinje', 'Bergmann', 'Oligodendrocytes')
phis <- c(0.25, 0.5, 0.75)
add_count <- c(0, 1, 5, 25)
genes <- find_genes(cere3, cell_types)
genes <- sample(genes, 100)
niter <- 5
params <- expand.grid(cell_types, phis, add_count, genes)
params <- sapply(params, rep, each = niter)
params <- cbind(params, seq(1,nrow(params)))
colnames(params) <- c('cell_type', 'overdispersion', 'add_count', 'gene', 'seed')
params <- as.data.frame(params)
params$cell_type <- factor(params$cell_type, labels = cell_types)
params$gene <- factor(params$gene, labels = genes)
write.table(params, file = 'simulations_cere3_params.tsv', quote = F, sep = '\t',
row.names = F, col.names = T)
visium_params <- fread('simulations_cere4_params.tsv')
visium_results <- read.csv('simulation_results_cere4.csv', header=F, col.names=c('idx','total_N', 'avg_N', 'median_N', 'MSE', 'avg_var', 'coverage', 'cor_p'))
visium_params$idx <- 1:nrow(visium_params)
visium_results <- left_join(visium_params, visium_results, by = 'idx') |>
mutate(platform = 'Visium')
ss_params <- fread('simulations_cere3_params.tsv')
ss_results <- read.csv('simulation_results_cere3.csv', header=F, col.names=c('idx','total_N', 'avg_N', 'median_N', 'MSE', 'avg_var', 'coverage', 'cor_p'))
ss_params$idx <- 1:nrow(ss_params)
ss_results <- left_join(ss_params, ss_results, by = 'idx') |>
mutate(platform = 'Slide-seq')
all_results <- bind_rows(visium_results, ss_results)
mypal <- wes_palette('Darjeeling2',3)[2:3]
aa <- all_results |>
mutate(bin_N = cut(total_N, breaks=c(0,1000,5000,10000,1e6), include.lowest=T)) |>
filter(!is.na(cor_p)) # very few entries are NA due to occasional file writing errors; usually the NA rows are duplicates of other rows. Could avoid this in the future by having the simulation write to different files instead of the same file
aa |>
select(bin_N) |>
mutate(bin_N = as.character(bin_N)) |>
mutate(bin_N = gsub('\\(','',bin_N)) |>
mutate(bin_N = gsub(']', '', bin_N)) |>
mutate(bin_N = gsub('\\[', '', bin_N)) |>
separate(bin_N, into = c('start','end'), sep=',') |>
mutate(start = as.numeric(start), end=as.numeric(end)) |>
distinct()
start end
1 5000 1e+04
2 10000 1e+06
3 1000 5e+03
4 0 1e+03
aa |>
filter(cell_type != 'Bergmann', cell_type != 'Purkinje') |>
mutate(bin_N = factor(bin_N, labels = c('<1,000','1,000-5,000', '5,000-10,000', '>10,000'))) |>
ggplot(aes(x = bin_N, y = cor_p^2)) +
geom_boxplot(aes(color = platform),outlier.size=0.1) +
scale_color_manual(name='',values = mypal) +
theme_minimal() +
theme(axis.text.x = element_text(angle=45, hjust=1, size=7),
panel.grid = element_blank(),
panel.border = element_rect(color='black', fill=NA),
axis.ticks = element_line(color='black')) +
facet_grid(.~cell_type) +
ylab(TeX(r"($R^2$)")) +
xlab('Total UMI')
ggsave('figures/03_simulations_r2.pdf', height=3, width=7)
aa |>
mutate(bin_N = factor(bin_N, labels = c('<1,000','1,000-\n5,000', '5,000-\n10,000', '>10,000'))) |>
filter(cell_type != 'Bergmann', cell_type != 'Purkinje') |>
ggplot(aes(x = bin_N, y = sqrt(MSE))) +
geom_boxplot(aes(color = platform),outlier.size=0.1) +
scale_color_manual(name='',values = mypal) +
theme_minimal() +
theme(axis.text.x = element_text(angle=45, hjust=1, size=7),
panel.grid = element_blank(),
panel.border = element_rect(color='black', fill=NA),
axis.ticks = element_line(color='black')) +
facet_grid(.~cell_type) +
ylab('RMSE of estimated coefficients') +
xlab('Total UMI')
ggsave('figures/03_simulations_rmse.pdf', height=3, width=7)
all_results |>
filter(total_N > 10000) |> # only look at ones with high coverage
ggplot(aes(x = avg_var)) +
geom_density(aes(color = platform, fill = platform), alpha=0.25) +
theme_classic() +
scale_color_manual(name='',values = mypal) +
scale_fill_manual(name='',values = mypal) +
xlim(c(0,10)) +
xlab('Average variance of estimated coefficients') +
ylab('Density')
Warning: Removed 6389 rows containing non-finite values (`stat_density()`).
ggsave('figures/03_simulations_coef_var.pdf', height=3, width=4)
Warning: Removed 6389 rows containing non-finite values (`stat_density()`).
aa |>
filter(cell_type == 'Granule') |>
ggplot(aes(x = factor(overdispersion), y = cor_p^2)) +
geom_boxplot(aes(color=platform),outlier.size=0.1) +
scale_color_manual(name='',values = mypal) +
theme_classic() +
theme(legend.position='none') +
ylab(TeX(r"($R^2$)")) +
xlab('Overdispersion')
ggsave('figures/03_simulations_overdispersion.pdf', height=3, width=3)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.