knitr::opts_chunk$set(echo = T, warning = F, message = F, cache = T, cache.lazy = FALSE, results = 'hide')

CSIDE on the Slide-seq cerebellum

Setup

library(spacexr)
library(Matrix)
library(devtools)
library(ggplot2)
library(ggpubr)
library(reshape2)
library(dplyr)
library(ggrepel)
devtools::load_all()
id <- '08'
puck_no <- paste0('190926_', id)
datadir <- paste0('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/spacexr/data/SpatialRNA/CerebellumReplicates/Puck_', '190926_11')
resultsdir <- paste0('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/spacexr/data/SpatialRNA/CerebellumReplicates/Puck_', puck_no)
myRCTD<- readRDS(file.path(datadir,'myRCTD_cer_reps.rds'))

Merge samples and test for population-level DE

datadir_list <- c('../../../RCTD/data/SpatialRNA/CerebellumReplicates/Puck_190926_08', '../../../RCTD/data/SpatialRNA/CerebellumReplicates/Puck_190926_09',
                  '../../../RCTD/data/SpatialRNA/CerebellumReplicates/Puck_190926_11')
cell_types <- c('Astrocytes','Bergmann','Granule','Purkinje','Oligodendrocytes')
cell_types_present <- c('Astrocytes','Bergmann','Granule','Purkinje','MLI1','MLI2','Oligodendrocytes')
resultsdir <- '../../../RCTD/data/SpatialRNA/CerebellumReplicates/JointResults/'
RCTDde_list <- lapply(datadir_list, function(x) readRDS(file.path(x, 'myRCTDde.rds')))
myRCTD <- RCTDde_list[[1]]
de_results_list <- lapply(RCTDde_list, function(x) x@de_results)
plot_results <- F
if(!dir.exists(resultsdir))
  dir.create(resultsdir)
de_pop_all <- list()
gene_final_all <- list()
for(cell_type in cell_types) {
  res <- one_ct_genes(cell_type, RCTDde_list, de_results_list, resultsdir, cell_types_present, plot_results = plot_results)
  de_pop_all[[cell_type]] <- res$de_pop
  gene_final_all[[cell_type]] <- res$gene_final
}
cell_type_1 <- 'Bergmann'
cell_type_2 <- 'Purkinje'
if(plot_results)
  cell_type_comparison(de_results_list, RCTDde_list, cell_type_1, cell_type_2, cell_types_present)
saveRDS(de_pop_all, file.path(resultsdir, 'de_pop_all.rds'))
saveRDS(gene_final_all, file.path(resultsdir, 'gene_final_all.rds'))

Load CSIDE results

de_pop_all <-  readRDS(file.path(resultsdir, 'de_pop_all.rds'))
gene_final_all <- readRDS(file.path(resultsdir, 'gene_final_all.rds'))

Make volcano plot

plot_df_list <- list()
myRCTD <- RCTDde_list[[1]]
for(cell_type in cell_types[c(2,3,4)]) {
  de_pop <- de_pop_all[[cell_type]]
  gene_big <- Reduce(intersect, lapply(RCTDde_list, 
                                       function(myRCTD) get_gene_list_type_wrapper(myRCTD, cell_type, cell_types_present)))
  cell_type_means <- myRCTD@cell_type_info$info[[1]][gene_big,cell_types_present]
  cell_prop <- sweep(cell_type_means,1,apply(cell_type_means,1,max),'/')
  p_vals <- 2*(1-pnorm(abs(de_pop[gene_big,'Z_est'])))
  names(p_vals) <- gene_big
  plot_df <- data.frame(gene_big, cell_type, de_pop[gene_big,'mean_est'], -log(pmax(p_vals,1e-16),10),  gene_big %in% gene_final_all[[cell_type]])
  colnames(plot_df) <- c('gene', 'ct', 'mean', 'y', 'sig')
  plot_df_list[[cell_type]] <- plot_df
}
plot_df <- bind_rows(plot_df_list)
plot_df$label <- plot_df$gene
plot_df$label[!plot_df$sig] <- NA
plot_df$mean <- plot_df$mean * log(exp(1),2) #convert to log scale
p <- ggplot(plot_df, aes(x=mean, y = y, color = ct)) + geom_point() + theme_classic()  +
  geom_vline(xintercept = 0.4*log(exp(1),2), linetype = 'dotted') + geom_vline(xintercept = -0.4*log(exp(1),2), linetype = 'dotted') +
  geom_label_repel(aes(label = label),nudge_x = 0.1,na.rm = TRUE, show.legend = FALSE) + labs(color = 'Cell Type') + xlab('Estimated cell type-specific DE') + ylab('CSIDE p-value') + scale_y_continuous(lim = c(0,16.01), breaks = c(0,5,10,15),labels = c("10^0", "10^(-5)", "10^(-10)","10^(-15)") )
p
plot_df <- data.frame('cell_type' = character(), 'sig_p' = numeric())
for(cell_type in cell_types[c(2,3,4)]) {
  de_pop <- de_pop_all[[cell_type]]
  gene_big <- Reduce(intersect, lapply(RCTDde_list, 
                                         function(myRCTD) get_gene_list_type_wrapper(myRCTD, cell_type, cell_types_present)))
  plot_df <- rbind(plot_df, data.frame('cell_type' = cell_type, 'sig_p' = de_pop[gene_big, 'sig_p']))
} 

p <- ggplot(plot_df, aes(sig_p, fill = cell_type, color = cell_type)) + geom_density(alpha = 0.3) + theme_classic() + ylab('Density of genes') + scale_fill_discrete("Cell type") + scale_color_discrete("Cell type") + xlab('Standard deviation of batch effect across replicates')
p
plot_df <- data.frame('rep' = integer(), 'sig_g' = numeric())
for(rep_num in 1:3) {
  myRCTD <- RCTDde_list[[rep_num]]
  plot_df <- rbind(plot_df, data.frame('rep' = rep_num, 'sig_g' = as.numeric(myRCTD@de_results$gene_fits$sigma_g)/100))
} 
plot_df$rep <- factor(plot_df$rep)
p <- ggplot(plot_df, aes(sig_g, fill = rep, color = rep)) + geom_density(alpha = 0.3) + theme_classic() + ylab('Density of genes') + scale_fill_discrete("Replicate") + scale_color_discrete("Replicate") + xlab('Gene specific overdispersion magnitude')
p
myRCTD <- readRDS('../../data/SpatialRNA/CerebellumReplicates/Puck_190926_08/myRCTDde_subtype.rds')
# compare to coarse cell type
myRCTD_coarse <- readRDS('../../data/SpatialRNA/CerebellumReplicates/Puck_190926_08/myRCTDde.rds')
cell_type <- 'Granule'
granule_list <- spacexr:::get_gene_list_type_wrapper(myRCTD_coarse, 'Granule', myRCTD_coarse@internal_vars_de$cell_types)
granule_list <- intersect(rownames(myRCTD@de_results$gene_fits$mean_val),granule_list)
class_df <- myRCTD@internal_vars$class_df
subtypes <- rownames(class_df)[class_df$class == cell_type]
diff_genes <- numeric(length(subtypes))
names(diff_genes) <- subtypes
sig_genes <- diff_genes
for(granule_subtype in subtypes) {
  ct_diff <- myRCTD_coarse@de_results$gene_fits$mean_val[granule_list, cell_type]-
    myRCTD@de_results$gene_fits$mean_val[granule_list, granule_subtype]
  #hist(ct_diff)
  se_diff <- sqrt(myRCTD_coarse@de_results$gene_fits$I_mat[granule_list,'2_2_Granule']^2 +
                    myRCTD@de_results$gene_fits$s_mat[granule_list,paste0('2_2_',granule_subtype)]^2)
  pdiff <- 2*pnorm(-abs(ct_diff/se_diff))
  pdiff[is.nan(pdiff)] <- 1
  qdiff <- p.adjust(pdiff, method = 'BH')
  diff_genes[granule_subtype] <- sum(qdiff < 0.1)
  myp <- 2*pnorm(-abs(myRCTD@de_results$gene_fits$mean_val[granule_list, granule_subtype]/
    myRCTD@de_results$gene_fits$s_mat[granule_list,paste0('2_2_',granule_subtype)]))
  myp[is.na(myp)] <- 1
  sig_genes[granule_subtype] <- sum(myp < 0.01)
}

granule_subtype <- 'Granule_Galntl6'
se_diff <- sqrt(myRCTD_coarse@de_results$gene_fits$I_mat[granule_list,'2_2_Granule']^2 +
                  myRCTD@de_results$gene_fits$s_mat[granule_list,paste0('2_2_',granule_subtype)]^2)
thresh <- 0.35*log(2)
length(names(which(se_diff < thresh))) # filter out noisy points
R2 <- cor(myRCTD_coarse@de_results$gene_fits$mean_val[names(which(se_diff < thresh)), cell_type],
    myRCTD@de_results$gene_fits$mean_val[names(which(se_diff < thresh)), 'Granule_Galntl6'])^2
plot_df <- data.frame(cbind(myRCTD_coarse@de_results$gene_fits$mean_val[names(which(se_diff < thresh)), cell_type],
    myRCTD@de_results$gene_fits$mean_val[names(which(se_diff < thresh)), 'Granule_Galntl6']))/log(2)
colnames(plot_df) <- c('x','y')
ggplot(plot_df,aes(x=x,y=y)) + geom_point() + theme_classic() + geom_line(aes(x=x,y=x)) + xlab('Estimated Granule DE') + ylab('Estimated Granule Subtype Galntl6 DE') + ggtitle(paste0('R^2 = ', round(R2,2))) + coord_fixed()
myp <- 2*pnorm(-abs(myRCTD_coarse@de_results$gene_fits$mean_val[granule_list, cell_type]/
                      myRCTD_coarse@de_results$gene_fits$I_mat[granule_list,'2_2_Granule']))
myp[is.na(myp)] <- 1
cell_type_power <- sum(myp < 0.01)
names(cell_type_power) <- cell_type
sig_genes
cell_type_power
plot_df <- data.frame(c(sig_genes, cell_type_power), names(c(sig_genes, cell_type_power)))
colnames(plot_df) <- c('n', 'cell_type')
p1 <- ggplot(plot_df, aes(x = cell_type, y = n)) +
  geom_bar(position="dodge", stat="identity") + theme_classic() + ylab('Number of significant genes detected (p < 0.01)') +
  geom_hline(linetype = 'dashed', yintercept = .01*length(granule_list)) +
  geom_vline(linetype = 'dashed', xintercept = 1.5) + theme(axis.text.x = element_text(angle = 25, hjust = 1)) + xlab('Cell type')
p1
explanatory.variable <- myRCTD@internal_vars_de$X2[,2]
cell_counts <- spacexr:::count_cell_types(myRCTD, names(explanatory.variable), myRCTD@internal_vars_de$cell_types_present,
                           cell_type_threshold = 75)[subtypes]
myRCTD_coarse@config$RCTDmode <- 'doublet'
cell_type_counts <- spacexr:::count_cell_types(myRCTD_coarse, myRCTD_coarse@internal_vars_de$all_barc, myRCTD_coarse@internal_vars_de$cell_types_present,
                                               cell_type_threshold = 125)
cell_type_counts <- cell_type_counts[cell_type]
cell_counts
cell_type_counts

plot_df <- data.frame(c(cell_counts, cell_type_counts), names(c(cell_counts, cell_type_counts)))
colnames(plot_df) <- c('n', 'cell_type')
p1 <- ggplot(plot_df, aes(x = cell_type, y = n)) +
  geom_bar(position="dodge", stat="identity") + theme_classic() + ylab('Number of pixels') +
  geom_vline(linetype = 'dashed', xintercept = 1.5)+ theme(axis.text.x = element_text(angle = 25, hjust = 1)) + xlab('Cell type')
p1
myp <- 2*pnorm(-abs(myRCTD_coarse@de_results$gene_fits$mean_val[granule_list, cell_type]/
                      myRCTD_coarse@de_results$gene_fits$I_mat[granule_list,'2_2_Granule']))
myp[is.na(myp)] <- 1
cell_type_power <- sum(myp < 0.01)
names(cell_type_power) <- cell_type
sig_genes
cell_type_power
plot_df <- data.frame(c(diff_genes), names(c(diff_genes)))
colnames(plot_df) <- c('n', 'cell_type')
p1 <- ggplot(plot_df, aes(x = cell_type, y = n)) +
  geom_bar(position="dodge", stat="identity") + theme_classic() + ylab('Number of significantly different DE genes') +theme(axis.text.x = element_text(angle = 25, hjust = 1)) + xlab('Cell type')
p1


dmcable/RCTD documentation built on Feb. 24, 2024, 11:03 p.m.