knitr::opts_chunk$set(echo = T, warning = F, message = F, cache = T, results = 'hide')

CSIDE on the MERFISH Hypothalamus

Load in CSIDE results and calculate significant genes

# Load in spatialRNA data and Reference data
library(spacexr)
library(Matrix)
library(devtools)
library(ggplot2)
library(ggpubr)
library(reshape2)
library(dplyr)
library(ggrepel)
library(fields)
library(stringr)
source('~/Documents/MIT/Research/Rafalab/Projects/spacexr/AnalysisCSIDE/helper_functions/merge_de_helper.R')
load_all()
pwd = getwd()
datadir <- paste0('~/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/DEGLAM/data/moffitt','/')
resultsdir <- paste0('~/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/DEGLAM/results/ResultsMerfish','/')
myRCTD = readRDS(paste0(resultsdir,'myRCTDde.rds'))
myRCTDQ <- readRDS(file.path(resultsdir,'myRCTDQ.rds'))
cell_types_present <- myRCTD@internal_vars_de$cell_types_present
cell_types <- myRCTD@internal_vars_de$cell_types
gene_fits <- myRCTD@de_results$gene_fits

Plot predictive-variable and cell types

my_barc <- rownames(myRCTD@results$results_df)[which((myRCTD@results$results_df$first_type == 'monocyte/DC' & myRCTD@results$results_df$spot_class != 'reject') | (myRCTD@results$results_df$second_type == 'monocyte/DC' & myRCTD@results$results_df$spot_class == 'doublet_certain' ))]
p1 <- plot_puck_continuous(myRCTD@spatialRNA, colnames(myRCTD@spatialRNA@counts) , myRCTD@internal_vars_de$X2[,2], 
                     title ='') + geom_point(data = myRCTD@spatialRNA@coords[my_barc,], size = 0.6)+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+  geom_segment(aes(x = 1934.6, y = -3990, xend = 2184.6, yend = -3990), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+ scale_color_gradientn(colors = pals::brewer.blues(20)[2:20],name = "Distance from midline", labels = c(0,1),breaks = c(0,1), limits = c(0,1))
results_df <- myRCTD@results$results_df
puck <- myRCTD@spatialRNA
barcodes = rownames(results_df[results_df$spot_class != "reject" & puck@nUMI >= 100,])
my_table = puck@coords[barcodes,]
my_table$class = results_df[barcodes,]$first_type
n_levels = myRCTD@cell_type_info$info[[3]]
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
names(my_pal) = myRCTD@cell_type_info$info[[2]]
my_pal_curr <- my_pal
my_pal_curr[cell_types[1]] <- "#CC79A7"
my_pal_curr[cell_types[2]] <- "#E69F00"
my_pal_curr[cell_types[3]] <- "#D55E00"
my_pal_curr[cell_types[5]] <- "#009E73"
my_pal_curr[cell_types[4]] <- "#0072B2"
my_pal_curr[cell_types[6]] <- "#56B4E9"
pres = unique(as.integer(my_table$class))
pres = pres[order(pres)]
p2 <- ggplot2::ggplot(my_table, ggplot2::aes(x=x, y=y)) + ggplot2::geom_point(ggplot2::aes(size = .4, shape=19,color=class)) + ggplot2::scale_color_manual("",values = my_pal_curr[pres], breaks = cell_types, labels = cell_types)+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2)))+ geom_segment(aes(x = 1934.6, y = -4000, xend = 2184.6, yend = -4000), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())
ggarrange(p1,p2,nrow = 2)

Count genes that fit quadratic vs linear model

cur_cell_types <- c('Excitatory', 'Inhibitory', 'Mature oligodendrocyte')
res_mat <- matrix(0, nrow = length(cur_cell_types), ncol = 3)
cell_type <- 'Inhibitory'
rownames(res_mat) <- cur_cell_types
for(cell_type in cur_cell_types) {
  a <- sum(rownames(myRCTDQ@de_results$res_gene_list[[cell_type]]) %in% rownames(myRCTD@de_results$res_gene_list[[cell_type]]))
  b <- length(rownames(myRCTDQ@de_results$res_gene_list[[cell_type]]))
  d <- length(rownames(myRCTD@de_results$res_gene_list[[cell_type]]))
  res_mat[cell_type,] <- c(a,b,d)
}
res_mat <- as.data.frame(res_mat)
res_mat[,1:3] <- res_mat[,3:1]
colnames(res_mat) <- c('Linear', 'Quadratic', 'Both')
res_mat$ct <- rownames(res_mat)
plot_df <- reshape2::melt(res_mat, id = 'ct')
ggplot(plot_df) +  geom_bar(aes(x = ct, y = value, group = variable, fill = variable), stat = 'identity', position = 'dodge') + theme_classic() + ylab('Number of significant genes detected') + scale_fill_manual("Model", breaks = c('Linear','Quadratic','Both'), labels = c('Linear','Quadratic','Both'), values = unname(my_pal_curr[c(4,6,7)]))+ xlab('Cell type')

Plot CSIDE predictions vs raw data for each gene

all_barc <- myRCTD@internal_vars_de$all_barc
my_beta <- myRCTD@internal_vars_de$my_beta
puck <- myRCTD@spatialRNA
results_df <- data.frame(gene = character(), region = integer(), model = character(),
                             N = integer(), Y = numeric(), pred = numeric(), se = numeric(), cell_type = character())
X2 <- myRCTD@internal_vars_de$X2
gene_fits <- myRCTD@de_results$gene_fits
plot_df <- data.frame(bin = numeric(), mean = numeric(), pred = numeric(), ub = numeric(), lb = numeric(),
                      gene = character(), cell_type = character(), model = character())
model <- 'Linear'
for(cell_type in cur_cell_types) {
  res_genes <- myRCTD@de_results$res_gene_list[[cell_type]]
  cell_type_ind <- which(cell_types == cell_type)
  barcodes_sing <- names(which(my_beta[all_barc,cell_type] > 0.999))
  MULT = 500
  NR = 5;
  Threshes <- (0:NR)/NR
  D <- dim(X2)[2] 
  if(cell_type == 'Excitatory')
    gene_list <- c('Syt2','Cd24a')
  if(cell_type == 'Inhibitory')
    gene_list <- c('Ano3','Oprk1')
  if(cell_type == 'Mature oligodendrocyte')
    gene_list <- c('Etv1','Man1a')
  for(gene in gene_list) {
    q_df <- get_quant_df(myRCTD, gene_fits, cell_types, cell_type, gene, multi_region = F, prop_thresh = 0.999)
    for(cat in 1:NR) {
      cur_barcs <- rownames(q_df)[q_df$region >= Threshes[cat] & q_df$region <= Threshes[cat + 1]]
      n <- length(cur_barcs)
      Y <- mean(q_df[cur_barcs, 'Y'])
      pred <- mean(q_df[cur_barcs, 'pred'])
      se <- sqrt(mean(q_df[cur_barcs,'var'])/n)
      de<-data.frame(gene, cat, model, n, Y, pred, se, cell_type)
      names(de)<-c('gene','region', 'model','N','Y','pred', 'se', 'cell_type')
      results_df <- bind_rows(results_df, de)
    }
  }
}

model <- 'Quadratic'
X2 <- myRCTDQ@internal_vars_de$X2
gene_fits <- myRCTDQ@de_results$gene_fits
for(cell_type in c('Excitatory','Inhibitory')) {
  res_genes <- myRCTDQ@de_results$res_gene_list[[cell_type]]
  cell_type_ind <- which(cell_types == cell_type)
  barcodes_sing <- names(which(my_beta[all_barc,cell_type] > 0.999))
  MULT = 500
  NR = 5;
  Threshes <- (0:NR)/NR
  D <- dim(X2)[2] 
  if(cell_type == 'Excitatory')
    gene_list <- c('Htr2c','Ntsr1')
  if(cell_type == 'Inhibitory')
    gene_list <- c('Htr2c','Rgs2')
  for(gene in gene_list) {
    q_df <- get_quant_df(myRCTDQ, gene_fits, cell_types, cell_type, gene, multi_region = F, prop_thresh = 0.999)
    for(cat in 1:NR) {
      cur_barcs <- rownames(q_df)[q_df$region >= Threshes[cat] & q_df$region <= Threshes[cat + 1]]
      n <- length(cur_barcs)
      Y <- mean(q_df[cur_barcs, 'Y'])
      pred <- mean(q_df[cur_barcs, 'pred'])
      se <- sqrt(mean(q_df[cur_barcs,'var'])/n)
      de<-data.frame(gene, cat, model, n, Y, pred, se, cell_type)
      names(de)<-c('gene','region', 'model','N','Y','pred', 'se', 'cell_type')
      results_df <- bind_rows(results_df, de)
    }
  }
}
results_df[,c('Y','pred','se')] <- results_df[,c('Y','pred','se')]*500 # convert to counts per 500
results_df$region <- (results_df$region - 1)/(NR - 1)
cur_gene_list <- c('Syt2','Ano3','Etv1','Man1a','Htr2c')
p3 <- ggplot2::ggplot(results_df[results_df$gene %in% cur_gene_list,],mapping = ggplot2::aes(x=region,y=log(Y,2), color = gene, linetype = model)) + ggplot2::geom_point() + ggplot2::theme_classic() +
  ggplot2::geom_line(ggplot2::aes(region,log(pred,2))) + ggplot2::geom_errorbar(aes(ymin = log(Y - 1.96*se,2), ymax = log(Y + 1.96*se,2)), width = 0.05) + facet_wrap(cell_type ~.) + xlab('Distance from midline') + ylab('Log gene expression') + labs(linetype = 'Model') + scale_color_manual("Gene", breaks = cur_gene_list, labels = cur_gene_list, values = unname(my_pal_curr[c(1,4,6,7,9)]))
p3

Make spatial gene plots

X2 <- myRCTD@internal_vars_de$X2
gene_fits <- myRCTD@de_results$gene_fits
all_barc <- myRCTD@internal_vars_de$all_barc
my_beta <- myRCTD@internal_vars_de$my_beta
puck <- myRCTD@spatialRNA

cell_type <- "Inhibitory"
gene = 'Slc18a2'
barcodes_sing <- names(which(my_beta[all_barc,cell_type] > 0.999))
MULT = 500
density_thresh <- 0.5
barc_plot <- intersect(barcodes_sing,colnames(puck@counts)[puck@nUMI >= 200])
Y_plot <- MULT*puck@counts[gene,]/puck@nUMI
ge_thresh <- 10
my_class <- rep(0,length(barc_plot)); names(my_class) <- barc_plot
my_class[(X2[barc_plot,2] <= density_thresh) & (Y_plot[barc_plot] <= ge_thresh)] <- 2
my_class[(X2[barc_plot,2] <= density_thresh) & (Y_plot[barc_plot] > ge_thresh)] <- 4
my_class[(X2[barc_plot,2] > density_thresh) & (Y_plot[barc_plot] <= ge_thresh)] <- 1
my_class[(X2[barc_plot,2] > density_thresh) & (Y_plot[barc_plot] > ge_thresh)] <- 3
p3 <- plot_class(puck, barc_plot[order(my_class[barc_plot])], factor(my_class)) + ggtitle(gene)
suppressMessages(p3 <- p3 + scale_color_manual(values=c("#CCE2EF","#F6DECC","#0072B2","#D55E00"))+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2)))+ geom_segment(aes(x = 1934.6, y = -4000, xend = 2184.6, yend = -4000), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()))
p3
#ggarrange(p1,p2, nrow = 2)

Compare with and without segmentation

resultsdir <- '../../data/SpatialRNA/MERFISH_24'
myRCTD <- readRDS(file.path(resultsdir,'myRCTDde_seg.rds'))
myRCTD_pix <- readRDS(file.path(resultsdir,'myRCTDde.rds'))
plot_df <- data.frame(cbind(myRCTD_pix@de_results$all_gene_list$Inhibitory$log_fc,
     myRCTD@de_results$all_gene_list$Inhibitory$log_fc))
colnames(plot_df) <- c('x','y')
R2 <- cor(plot_df$x, plot_df$y)^2
p1 <- ggplot(plot_df, aes(x=x,y=y)) + geom_point() + theme_classic() + geom_line(aes(x=x,y=x)) + xlab('Estimated Inhibitory DE without Segmentation') + ylab('Estimated Inhibitory DE with Segmentation') + ggtitle(paste0('R^2 = ', round(R2,2))) + coord_fixed()
plot_df <- data.frame(cbind(myRCTD_pix@de_results$all_gene_list$Excitatory$log_fc,
     myRCTD@de_results$all_gene_list$Excitatory$log_fc))
colnames(plot_df) <- c('x','y')
R2 <- cor(plot_df$x, plot_df$y)^2
p2 <- ggplot(plot_df, aes(x=x,y=y)) + geom_point() + theme_classic() + geom_line(aes(x=x,y=x)) + xlab('Estimated Excitatory DE without Segmentation') + ylab('Estimated Excitatory DE with Segmentation') + ggtitle(paste0('R^2 = ', round(R2,2))) + coord_fixed()
ggarrange(p1,p2,nrow = 2)

Compare standard errors

cell_types <- c('Excitatory', 'Inhibitory')
plot_df <- data.frame(cbind(c(sapply(myRCTD@de_results$all_gene_list, function(x) median(x$se))[cell_types],
  sapply(myRCTD_pix@de_results$all_gene_list, function(x) median(x$se))[cell_types]),c(cell_types, cell_types),
  c("With segmentation", "With segmentation", "Without segmentation", "Without segmentation")))
colnames(plot_df) <- c('y','cell_type','seg')
plot_df$y <- as.numeric(plot_df$y)
ggplot(plot_df,aes(x=factor(cell_type), y =y, fill = factor(seg))) +
  geom_bar(stat = 'identity', position = 'dodge') + theme_classic() + ylab('Median DE standard error') + xlab('Cell type') + scale_fill_manual('', breaks=c("With segmentation","Without segmentation"),  values = c("#D55E00", "#0072B2")) +
  scale_y_continuous(limits = c(0, .15))

Plot all cell types (unbinned)

cell_types <- myRCTD@internal_vars_de$cell_types
my_barc <- rownames(myRCTD@results$results_df)[which((myRCTD@results$results_df$first_type == 'monocyte/DC' & myRCTD@results$results_df$spot_class != 'reject') | (myRCTD@results$results_df$second_type == 'monocyte/DC' & myRCTD@results$results_df$spot_class == 'doublet_certain' ))]
p1 <- plot_puck_continuous(myRCTD@spatialRNA, colnames(myRCTD@spatialRNA@counts) , myRCTD@internal_vars_de$X2[,2], 
                     title ='') + geom_point(data = myRCTD@spatialRNA@coords[my_barc,], size = 0.6)+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+  geom_segment(aes(x = 30, y = 0, xend = 30 + 250/14, yend = 0), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+ scale_color_gradientn(colors = pals::brewer.blues(20)[2:20],name = "Distance from midline", labels = c(0,1),breaks = c(0,1), limits = c(0,1))
results_df <- myRCTD@results$results_df
puck <- myRCTD@spatialRNA
barcodes = rownames(results_df[results_df$spot_class != "reject" & puck@nUMI >= 100,])
my_table = puck@coords[barcodes,]
my_table$class = results_df[barcodes,]$first_type
n_levels = myRCTD@cell_type_info$info[[3]]
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
names(my_pal) = myRCTD@cell_type_info$info[[2]]
my_pal_curr <- my_pal
my_pal_curr[cell_types[1]] <- "#CC79A7"
my_pal_curr[cell_types[2]] <- "#E69F00"
my_pal_curr[cell_types[3]] <- "#D55E00"
my_pal_curr[cell_types[5]] <- "#009E73"
my_pal_curr[cell_types[4]] <- "#0072B2"
my_pal_curr[cell_types[6]] <- "#56B4E9"
pres = unique(as.integer(my_table$class))
pres = pres[order(pres)]
p2 <- ggplot2::ggplot(my_table, ggplot2::aes(x=x, y=y)) + ggplot2::geom_point(ggplot2::aes(size = .4, shape=19,color=class)) + ggplot2::scale_color_manual("",values = my_pal_curr[pres], breaks = cell_types, labels = cell_types)+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2)))+ geom_segment(aes(x = -3200, y = -4000, xend = -3200 + 250, yend = -4000), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())
ggarrange(p1,p2,nrow = 2)

Plot all cell types (binned)

myRCTD <- myRCTD_pix
cell_types <- myRCTD@internal_vars_de$cell_types
my_barc <- rownames(myRCTD@results$results_df)[which((myRCTD@results$results_df$first_type == 'monocyte/DC' & myRCTD@results$results_df$spot_class != 'reject') | (myRCTD@results$results_df$second_type == 'monocyte/DC' & myRCTD@results$results_df$spot_class == 'doublet_certain' ))]
p1 <- plot_puck_continuous(myRCTD@spatialRNA, colnames(myRCTD@spatialRNA@counts) , myRCTD@internal_vars_de$X2[,2], 
                     title ='') + geom_point(data = myRCTD@spatialRNA@coords[my_barc,], size = 0.6)+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+  geom_segment(aes(x = 30, y = 0, xend = 30 + 250/14, yend = 0), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+ scale_color_gradientn(colors = pals::brewer.blues(20)[2:20],name = "Distance from midline", labels = c(0,1),breaks = c(0,1), limits = c(0,1))
results_df <- myRCTD@results$results_df
puck <- myRCTD@spatialRNA
barcodes = rownames(results_df[results_df$spot_class != "reject" & puck@nUMI >= 100,])
my_table = puck@coords[barcodes,]
my_table$class = results_df[barcodes,]$first_type
n_levels = myRCTD@cell_type_info$info[[3]]
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
names(my_pal) = myRCTD@cell_type_info$info[[2]]
my_pal_curr <- my_pal
my_pal_curr[cell_types[1]] <- "#CC79A7"
my_pal_curr[cell_types[2]] <- "#E69F00"
my_pal_curr[cell_types[3]] <- "#D55E00"
my_pal_curr[cell_types[5]] <- "#009E73"
my_pal_curr[cell_types[4]] <- "#0072B2"
my_pal_curr[cell_types[6]] <- "#56B4E9"
pres = unique(as.integer(my_table$class))
pres = pres[order(pres)]
p2 <- ggplot2::ggplot(my_table, ggplot2::aes(x=x, y=y)) + ggplot2::geom_point(ggplot2::aes(size = .4, shape=19,color=class)) + ggplot2::scale_color_manual("",values = my_pal_curr[pres], breaks = cell_types, labels = cell_types)+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2)))+ geom_segment(aes(x = 5, y = -5, xend = 5 + 250/14, yend = -5), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())
ggarrange(p1,p2,nrow = 2)


dmcable/RCTD documentation built on Feb. 24, 2024, 11:03 p.m.