In dmcable/RCTD: SpatialeXpressionR: Cell type identification and cell type-specific differential expression in spatial transcriptomics

knitr::opts_chunk$set(echo = T, warning = F, message = F, cache = T, results = 'hide')

Finding Spatially Variable Genes

library(spacexr)
library(Matrix)
library(ggplot2)
library(ggpubr)
library(gridExtra)
library(reshape2)
library(Seurat)

Setup and filter for genes expressed in CA3

#given a puck object, returns a puck with counts filtered based on UMI threshold and gene list
restrict_counts <- function(puck, gene_list, UMI_thresh = 1, UMI_max = 20000) {
  keep_loc = (puck@nUMI >= UMI_thresh) & (puck@nUMI <= UMI_max)
  puck@counts = puck@counts[gene_list,keep_loc]
  if(length(puck@cell_labels) > 0) #check cell_labels non null
    puck@cell_labels = puck@cell_labels[keep_loc]
  puck@nUMI = puck@nUMI[keep_loc]
  return(puck)
}

puck <- readRDS('../../Data/SpatialRNA/Puck_200115_08/puckCropped.RDS')
reference <- readRDS('../../Data/Reference/DropVizHC/scRefSubsampled1000.RDS')
cell_type_info <- get_cell_type_info(reference@assays$RNA@counts, reference@meta.data$liger_ident_coarse, reference@meta.data$nUMI)
gene_list <- intersect(rownames(cell_type_info[[1]]),rownames(puck@counts))
gene_list <- gene_list[rowSums(puck@counts[gene_list,]) >= 10]
gene_list <- gene_list[apply(cell_type_info[[1]][gene_list,],1,function(x) max(x)) >= 2e-5]
puck <- restrict_puck(puck, names(which(puck@nUMI >= 100)))
puck <- restrict_counts(puck, gene_list, UMI_max = 200000)
CA3_genes <- names(which(cell_type_info[[1]][gene_list,]$CA3*2 > apply(cell_type_info[[1]][gene_list,!(cell_type_info[[2]] %in% c('CA1','CA3','Denate','Neuron.Slc17a6', "Entorihinal", "Neurogenesis"))],1,max)))
CA3_genes <- CA3_genes[cell_type_info[[1]][CA3_genes,"CA3"] >= 2e-5]

Compute expected cell type specific gene expression

#Command used to save the data from the gather_results.R script:
#save(puck_d, iv, results, file = 'Data/SpatialRNA/Puck_200115_08/results/final_analysis/gathered_results.RData')
#loading in that data:
load('../../Data/SpatialRNA/Puck_200115_08/results/final_analysis/gathered_results_6.RData')
puck_d <- get_decomposed_data(results$results_df, CA3_genes, puck, results$weights_doublet, cell_type_info)

Find Spatially Variable Genes within CA3

cell_barc <- puck_d@cell_labels == "CA3" & puck_d@nUMI >= 300
gene_df <- puck_d@coords[cell_barc,]
gene_df$nUMI <- puck_d@nUMI[cell_barc]
get_int_genes <- function(gene_df,cell_type, gene_list_ct, puck_d, cell_barc) {
  pvals <- numeric(length(gene_list_ct)); names(pvals) = gene_list_ct
  cvs <- numeric(length(gene_list_ct)); names(cvs) = gene_list_ct
  toti = 0
  for(gene in gene_list_ct) {
    print(toti)
    toti = toti + 1
    gene_df$nUMI <- puck_d@nUMI[cell_barc]
    Tr = 100
    f_dist <- numeric(Tr)
    F_keep = 0
    big_count = 0
    for(i in 1:Tr) {
      gene_df$gene <- puck_d@counts[gene,cell_barc]/ puck_d@nUMI[cell_barc]
      if(i > 1)
        gene_df$gene <- sample(gene_df$gene)
      n = dim(gene_df)[1]
      fit <- loess(gene ~ x*y, span = 0.8, data = gene_df, degree = 1)
      #with(dat, plot(x, y, col = getcol(f), pch = 16, cex = 0.5, main = i))
      p <- fit$enp+1
      ss_total <-   sum((gene_df$gene- mean(gene_df$gene))^2)
      rss <- sum(fit$residuals^2)
      ss_reg <- ss_total - rss
      ms_reg <- ss_reg / (p-1)
      ms_res <- rss/(n-p)
      fstat <- ms_reg/ms_res
      if(i == 1) {
        gene_df$fitted <- fitted(fit)
        F_keep = fstat
        cvs[gene] <- sd(gene_df$fitted)/mean(gene_df$fitted)
        if(cvs[gene] < 0.5)
          break
      } else {
        if(fstat > F_keep)
          big_count = big_count + 1
      }
      if(big_count > 3)
        break
      #print(c(fstat, 1  - pf(fstat, p-1, n-p)))
      f_dist[i] <- fstat
    }
    pvals[gene] = (big_count + 1) / Tr
  }
  means_df <- data.frame(pvals,cvs)
  return(means_df)
}
means_df_CA3 <- get_int_genes(gene_df,"CA3", CA3_genes, puck_d, cell_barc)
int_genes_CA3 <- rownames(means_df_CA3[means_df_CA3$cvs >= 0.5 & means_df_CA3$pvals < 0.02,])

Find Spatially Variable Genes Ignoring Cell Type

hippo <- CreateSeuratObject(puck@counts, project = "Slideseq",assay = "RNA",min.cells = 0,min.features = 0,names.field = 1,names.delim = "_",meta.data = NULL)
hippo <- NormalizeData(hippo, verbose = FALSE)
hippo <- FindVariableFeatures(hippo, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
hippo <- ScaleData(hippo)
hippo[['image']] <- new(Class='SlideSeq',assay='RNA',coordinates = puck@coords[colnames(puck@counts),])
resultsdir = "Data/Slideseq/NewCerPuck_190926_08/SeuratResults/"
DefaultAssay(hippo) <- "RNA"
hippo <- FindSpatiallyVariableFeatures(hippo, assay = "RNA", slot = "scale.data", features = VariableFeatures(hippo)[1:1000],
                                       selection.method = "moransi", x.cuts = 100, y.cuts = 100)
spatial_genes <- t(cell_type_info[[1]][head(SpatiallyVariableFeatures(hippo, selection.method = "moransi"),20),])

Plot coefficient of variation of spatially variable genes, across and within cell types

CA3_spatial <- intersect(colnames(spatial_genes),rownames(means_df_CA3))
p_val <- means_df_CA3[int_genes_CA3,'pvals']
names(p_val) <- int_genes_CA3
write.csv(data.frame(p_val), file = "Results/CA3.csv")
all_cv <- apply(cell_type_info[[1]][gene_list,],1,function(x) sd(x)/mean(x))
plot_df <- data.frame(c(all_cv[colnames(spatial_genes)],sample(all_cv,50)), c(rep('glob',length(colnames(spatial_genes))),rep('null',50))) 
colnames(plot_df) <- c('quant','colr')
plot_df$cat <- 1
p2 <- ggplot(plot_df, aes(x = colr, y = quant)) +
  geom_jitter(alpha = 0.75) +
  geom_boxplot(fill = NA, width = 0.4, outlier.alpha = 0) +
  theme_classic() + scale_y_continuous(breaks = c(0,2,4), limits = c(0,4.2)) +xlab('Globally Variable Genes') + ylab('Coefficient of Variation Across Cell Types') +  scale_x_discrete(labels = c('Genes Detected Ignoring Cell Type','Randomly Selected Genes')) +  theme(axis.title.x=element_blank(),axis.text=element_text(size=8)) 

fc_df <- data.frame(means_df_CA3[int_genes_CA3,]$cvs, 'Local')
fc_df2 <- data.frame(means_df_CA3[CA3_spatial,]$cvs,'Global')
colnames(fc_df) <- c('score','class'); colnames(fc_df2) <- c('score','class')
fc_df <- rbind(fc_df2,fc_df)
p1 <- ggplot(fc_df, aes(x = class, y = score)) +
  geom_jitter(alpha = .75) +
  geom_boxplot(fill = NA, width = 0.4, outlier.alpha = 0) +
  theme_classic() + ylim(c(0,1.7)) + ylab('Coefficient of Variation Within CA3')  + scale_x_discrete(labels = c('Genes Detected Ignoring Cell Type','Genes Detected within Cell Type')) +  theme(axis.title.x=element_blank(),axis.text=element_text(size=7))


ggarrange(p2,p1,nrow = 1)

Plot the gene Ptk2b in excitatory neurons and in general

my_mod <- function(p) {
  p + scale_x_continuous(breaks = c(1500,3500,5500), limits = c(1450,5700)) + scale_y_continuous(breaks =c(2000,3250,4500), limits = c(1800,4700)) + geom_segment(aes(x = 1700, y = 2100, xend = 2084.6, yend = 2100), color = "black")+ theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+ theme(legend.position="top")
}
results_df <- results$results_df
gene = "Ptk2b"
my_class <- rep(0,length(colnames(puck@counts)))
names(my_class) <- colnames(puck@counts)
type_list <- c("CA1","CA3","Denate")
my_ind <- (results_df$spot_class != "reject" & results_df$first_type %in% type_list) | (results_df$spot_class == "doublet_certain" & results_df$second_type %in% type_list) 
my_ind2 <- (results_df$spot_class %in% c("singlet", "doublet_certain")) & !my_ind
my_class[names(which(puck@counts[gene,my_ind] < 0.1))] <- 1
my_class[names(which(puck@counts[gene,my_ind] >= 0.1))] <- 3
my_class[names(which(puck@counts[gene,my_ind2] < 0.1))] <- 2
my_class[names(which(puck@counts[gene,my_ind2] >= 0.1))] <- 4
my_barc <- names(my_class[my_class > 0])
p <- plot_class(puck, my_barc[order(my_class[my_barc])], factor(my_class)) 
p <- my_mod(p) + scale_color_manual(values=c("#CCE2EF","#F6DECC","#0072B2","#D55E00"))
ggarrange(p)

2D local regression smoothing of Rgs14 and Cpne9 genes within CA3 cell types

my_pal = pals::brewer.ylorrd(20)[2:20]

my_mod <- function(p) {
  p + xlim(c(3800,5700)) + ylim(c(2100,3500)) + geom_segment(aes(x = 4000, y = 2400, xend = 4154, yend = 2400), color = "black")+ theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+ theme(legend.position="top")
}

#int_genes <- c("Rgs14","Spink8",'Cpne9','Kcnf1')
int_genes <- c("Rgs14",'Cpne9')
MULT <- 500
p1 <- list()
p2 <- list()
for(ind in 1:length(int_genes)) {
  gene <- int_genes[ind]
  max_val <- 20*mean((puck_d@counts[gene,]/puck_d@nUMI)[cell_barc])
  cur_range <- c(0, MULT*.00087)
  if(ind == 2)
    cur_range <- c(0,MULT*.00025)
  barcodes <- rownames(gene_df)
  raw_df <- puck_d@coords[barcodes,]
  raw_df$val <- as.integer((puck_d@counts[gene,]/puck_d@nUMI)[barcodes] > 0.0002)
  raw_df <- raw_df[raw_df$val > 0,]
  raw_df$val <- factor(raw_df$val)



  gene_df$gene <- puck_d@counts[gene,cell_barc]/puck_d@nUMI[cell_barc]
  fit <- loess(gene ~ x*y, span = 0.8, data = gene_df, degree = 1)
  gene_df$fitted <- fitted(fit)
  plot_val <- pmax(0,gene_df$fitted)
  names(plot_val) <- as.character(which(cell_barc))
  min_val = MULT*min(plot_val);max_val = MULT*quantile(plot_val[gene_df$x  >= 3800 & gene_df$x < 5700 & gene_df$y > 2100 & gene_df$y < 3500],0.95)
  p2[[ind]]<- plot_puck_continuous(puck_d,barcodes,MULT*plot_val[names(puck_d@cell_labels)], ylimit = c(min_val,max_val), size = 1.25, alpha = 0.2)
  cur_range <- c(-.00001*MULT, signif(max_val,2))
  p2[[ind]] <- my_mod(p2[[ind]])+ ggplot2::scale_colour_gradientn(paste(gene, "Smoothed"), colors = pals::brewer.orrd(20)[2:20],limits = cur_range, breaks = c(0,signif(max_val,2)), labels = c(0,signif(max_val,2)))
  p2[[ind]] <- p2[[ind]] + geom_point(data=raw_df, aes(fill=val),,size = 0.1) + scale_fill_manual("Raw",values = c("#000000"),labels = c("")) + guides(fill = guide_legend(override.aes = list(size=2)))
}
#ggarrange(p1[[1]], p2[[1]], p1[[2]], p2[[2]],p1[[3]], p2[[3]],p1[[4]], p2[[4]], nrow = 4,ncol=2)
#ggarrange(p1[[1]], p2[[1]],p1[[2]], p2[[2]], nrow = 2,ncol=2)
ggarrange(p2[[1]], p2[[2]],nrow = 1,ncol=2)

dmcable/RCTD documentation built on Feb. 24, 2024, 11:03 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

dmcable/RCTD
SpatialeXpressionR: Cell type identification and cell type-specific differential expression in spatial transcriptomics

In dmcable/RCTD: SpatialeXpressionR: Cell type identification and cell type-specific differential expression in spatial transcriptomics

Finding Spatially Variable Genes

Setup and filter for genes expressed in CA3

Compute expected cell type specific gene expression

Find Spatially Variable Genes within CA3

Find Spatially Variable Genes Ignoring Cell Type

Plot coefficient of variation of spatially variable genes, across and within cell types

Plot the gene Ptk2b in excitatory neurons and in general

2D local regression smoothing of Rgs14 and Cpne9 genes within CA3 cell types

R Package Documentation

Browse R Packages

We want your feedback!

dmcable/RCTD SpatialeXpressionR: Cell type identification and cell type-specific differential expression in spatial transcriptomics

In dmcable/RCTD: SpatialeXpressionR: Cell type identification and cell type-specific differential expression in spatial transcriptomics

Finding Spatially Variable Genes

Setup and filter for genes expressed in CA3

Compute expected cell type specific gene expression

Find Spatially Variable Genes within CA3

Find Spatially Variable Genes Ignoring Cell Type

Plot coefficient of variation of spatially variable genes, across and within cell types

Plot the gene Ptk2b in excitatory neurons and in general

2D local regression smoothing of Rgs14 and Cpne9 genes within CA3 cell types

R Package Documentation

Browse R Packages

We want your feedback!

dmcable/RCTD
SpatialeXpressionR: Cell type identification and cell type-specific differential expression in spatial transcriptomics