knitr::opts_chunk$set(echo = T, warning = F, message = F, cache = T, cache.lazy = FALSE, results = 'hide')
library(spacexr) library(Matrix) library(devtools) library(ggplot2) library(ggpubr) library(reshape2) library(dplyr) library(ggrepel) devtools::load_all() id <- '08' puck_no <- paste0('190926_', id) datadir <- paste0('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/spacexr/data/SpatialRNA/CerebellumReplicates/Puck_', '190926_11') resultsdir <- paste0('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/spacexr/data/SpatialRNA/CerebellumReplicates/Puck_', puck_no) myRCTD<- readRDS(file.path(datadir,'myRCTD_cer_reps.rds'))
datadir_list <- c('../../../RCTD/data/SpatialRNA/CerebellumReplicates/Puck_190926_08', '../../../RCTD/data/SpatialRNA/CerebellumReplicates/Puck_190926_09', '../../../RCTD/data/SpatialRNA/CerebellumReplicates/Puck_190926_11') cell_types <- c('Astrocytes','Bergmann','Granule','Purkinje','Oligodendrocytes') cell_types_present <- c('Astrocytes','Bergmann','Granule','Purkinje','MLI1','MLI2','Oligodendrocytes') resultsdir <- '../../../RCTD/data/SpatialRNA/CerebellumReplicates/JointResults/' RCTDde_list <- lapply(datadir_list, function(x) readRDS(file.path(x, 'myRCTDde.rds')))
myRCTD <- RCTDde_list[[1]] de_results_list <- lapply(RCTDde_list, function(x) x@de_results) plot_results <- F if(!dir.exists(resultsdir)) dir.create(resultsdir) de_pop_all <- list() gene_final_all <- list() for(cell_type in cell_types) { res <- one_ct_genes(cell_type, RCTDde_list, de_results_list, resultsdir, cell_types_present, plot_results = plot_results) de_pop_all[[cell_type]] <- res$de_pop gene_final_all[[cell_type]] <- res$gene_final } cell_type_1 <- 'Bergmann' cell_type_2 <- 'Purkinje' if(plot_results) cell_type_comparison(de_results_list, RCTDde_list, cell_type_1, cell_type_2, cell_types_present) saveRDS(de_pop_all, file.path(resultsdir, 'de_pop_all.rds')) saveRDS(gene_final_all, file.path(resultsdir, 'gene_final_all.rds'))
de_pop_all <- readRDS(file.path(resultsdir, 'de_pop_all.rds')) gene_final_all <- readRDS(file.path(resultsdir, 'gene_final_all.rds'))
plot_df_list <- list() myRCTD <- RCTDde_list[[1]] for(cell_type in cell_types[c(2,3,4)]) { de_pop <- de_pop_all[[cell_type]] gene_big <- Reduce(intersect, lapply(RCTDde_list, function(myRCTD) get_gene_list_type_wrapper(myRCTD, cell_type, cell_types_present))) cell_type_means <- myRCTD@cell_type_info$info[[1]][gene_big,cell_types_present] cell_prop <- sweep(cell_type_means,1,apply(cell_type_means,1,max),'/') p_vals <- 2*(1-pnorm(abs(de_pop[gene_big,'Z_est']))) names(p_vals) <- gene_big plot_df <- data.frame(gene_big, cell_type, de_pop[gene_big,'mean_est'], -log(pmax(p_vals,1e-16),10), gene_big %in% gene_final_all[[cell_type]]) colnames(plot_df) <- c('gene', 'ct', 'mean', 'y', 'sig') plot_df_list[[cell_type]] <- plot_df } plot_df <- bind_rows(plot_df_list) plot_df$label <- plot_df$gene plot_df$label[!plot_df$sig] <- NA plot_df$mean <- plot_df$mean * log(exp(1),2) #convert to log scale p <- ggplot(plot_df, aes(x=mean, y = y, color = ct)) + geom_point() + theme_classic() + geom_vline(xintercept = 0.4*log(exp(1),2), linetype = 'dotted') + geom_vline(xintercept = -0.4*log(exp(1),2), linetype = 'dotted') + geom_label_repel(aes(label = label),nudge_x = 0.1,na.rm = TRUE, show.legend = FALSE) + labs(color = 'Cell Type') + xlab('Estimated cell type-specific DE') + ylab('CSIDE p-value') + scale_y_continuous(lim = c(0,16.01), breaks = c(0,5,10,15),labels = c("10^0", "10^(-5)", "10^(-10)","10^(-15)") ) p
plot_df <- data.frame('cell_type' = character(), 'sig_p' = numeric()) for(cell_type in cell_types[c(2,3,4)]) { de_pop <- de_pop_all[[cell_type]] gene_big <- Reduce(intersect, lapply(RCTDde_list, function(myRCTD) get_gene_list_type_wrapper(myRCTD, cell_type, cell_types_present))) plot_df <- rbind(plot_df, data.frame('cell_type' = cell_type, 'sig_p' = de_pop[gene_big, 'sig_p'])) } p <- ggplot(plot_df, aes(sig_p, fill = cell_type, color = cell_type)) + geom_density(alpha = 0.3) + theme_classic() + ylab('Density of genes') + scale_fill_discrete("Cell type") + scale_color_discrete("Cell type") + xlab('Standard deviation of batch effect across replicates') p
plot_df <- data.frame('rep' = integer(), 'sig_g' = numeric()) for(rep_num in 1:3) { myRCTD <- RCTDde_list[[rep_num]] plot_df <- rbind(plot_df, data.frame('rep' = rep_num, 'sig_g' = as.numeric(myRCTD@de_results$gene_fits$sigma_g)/100)) } plot_df$rep <- factor(plot_df$rep) p <- ggplot(plot_df, aes(sig_g, fill = rep, color = rep)) + geom_density(alpha = 0.3) + theme_classic() + ylab('Density of genes') + scale_fill_discrete("Replicate") + scale_color_discrete("Replicate") + xlab('Gene specific overdispersion magnitude') p
myRCTD <- readRDS('../../data/SpatialRNA/CerebellumReplicates/Puck_190926_08/myRCTDde_subtype.rds') # compare to coarse cell type myRCTD_coarse <- readRDS('../../data/SpatialRNA/CerebellumReplicates/Puck_190926_08/myRCTDde.rds') cell_type <- 'Granule' granule_list <- spacexr:::get_gene_list_type_wrapper(myRCTD_coarse, 'Granule', myRCTD_coarse@internal_vars_de$cell_types) granule_list <- intersect(rownames(myRCTD@de_results$gene_fits$mean_val),granule_list) class_df <- myRCTD@internal_vars$class_df subtypes <- rownames(class_df)[class_df$class == cell_type] diff_genes <- numeric(length(subtypes)) names(diff_genes) <- subtypes sig_genes <- diff_genes for(granule_subtype in subtypes) { ct_diff <- myRCTD_coarse@de_results$gene_fits$mean_val[granule_list, cell_type]- myRCTD@de_results$gene_fits$mean_val[granule_list, granule_subtype] #hist(ct_diff) se_diff <- sqrt(myRCTD_coarse@de_results$gene_fits$I_mat[granule_list,'2_2_Granule']^2 + myRCTD@de_results$gene_fits$s_mat[granule_list,paste0('2_2_',granule_subtype)]^2) pdiff <- 2*pnorm(-abs(ct_diff/se_diff)) pdiff[is.nan(pdiff)] <- 1 qdiff <- p.adjust(pdiff, method = 'BH') diff_genes[granule_subtype] <- sum(qdiff < 0.1) myp <- 2*pnorm(-abs(myRCTD@de_results$gene_fits$mean_val[granule_list, granule_subtype]/ myRCTD@de_results$gene_fits$s_mat[granule_list,paste0('2_2_',granule_subtype)])) myp[is.na(myp)] <- 1 sig_genes[granule_subtype] <- sum(myp < 0.01) } granule_subtype <- 'Granule_Galntl6' se_diff <- sqrt(myRCTD_coarse@de_results$gene_fits$I_mat[granule_list,'2_2_Granule']^2 + myRCTD@de_results$gene_fits$s_mat[granule_list,paste0('2_2_',granule_subtype)]^2) thresh <- 0.35*log(2) length(names(which(se_diff < thresh))) # filter out noisy points R2 <- cor(myRCTD_coarse@de_results$gene_fits$mean_val[names(which(se_diff < thresh)), cell_type], myRCTD@de_results$gene_fits$mean_val[names(which(se_diff < thresh)), 'Granule_Galntl6'])^2 plot_df <- data.frame(cbind(myRCTD_coarse@de_results$gene_fits$mean_val[names(which(se_diff < thresh)), cell_type], myRCTD@de_results$gene_fits$mean_val[names(which(se_diff < thresh)), 'Granule_Galntl6']))/log(2) colnames(plot_df) <- c('x','y') ggplot(plot_df,aes(x=x,y=y)) + geom_point() + theme_classic() + geom_line(aes(x=x,y=x)) + xlab('Estimated Granule DE') + ylab('Estimated Granule Subtype Galntl6 DE') + ggtitle(paste0('R^2 = ', round(R2,2))) + coord_fixed()
myp <- 2*pnorm(-abs(myRCTD_coarse@de_results$gene_fits$mean_val[granule_list, cell_type]/ myRCTD_coarse@de_results$gene_fits$I_mat[granule_list,'2_2_Granule'])) myp[is.na(myp)] <- 1 cell_type_power <- sum(myp < 0.01) names(cell_type_power) <- cell_type sig_genes cell_type_power plot_df <- data.frame(c(sig_genes, cell_type_power), names(c(sig_genes, cell_type_power))) colnames(plot_df) <- c('n', 'cell_type') p1 <- ggplot(plot_df, aes(x = cell_type, y = n)) + geom_bar(position="dodge", stat="identity") + theme_classic() + ylab('Number of significant genes detected (p < 0.01)') + geom_hline(linetype = 'dashed', yintercept = .01*length(granule_list)) + geom_vline(linetype = 'dashed', xintercept = 1.5) + theme(axis.text.x = element_text(angle = 25, hjust = 1)) + xlab('Cell type') p1
explanatory.variable <- myRCTD@internal_vars_de$X2[,2] cell_counts <- spacexr:::count_cell_types(myRCTD, names(explanatory.variable), myRCTD@internal_vars_de$cell_types_present, cell_type_threshold = 75)[subtypes] myRCTD_coarse@config$RCTDmode <- 'doublet' cell_type_counts <- spacexr:::count_cell_types(myRCTD_coarse, myRCTD_coarse@internal_vars_de$all_barc, myRCTD_coarse@internal_vars_de$cell_types_present, cell_type_threshold = 125) cell_type_counts <- cell_type_counts[cell_type] cell_counts cell_type_counts plot_df <- data.frame(c(cell_counts, cell_type_counts), names(c(cell_counts, cell_type_counts))) colnames(plot_df) <- c('n', 'cell_type') p1 <- ggplot(plot_df, aes(x = cell_type, y = n)) + geom_bar(position="dodge", stat="identity") + theme_classic() + ylab('Number of pixels') + geom_vline(linetype = 'dashed', xintercept = 1.5)+ theme(axis.text.x = element_text(angle = 25, hjust = 1)) + xlab('Cell type') p1
myp <- 2*pnorm(-abs(myRCTD_coarse@de_results$gene_fits$mean_val[granule_list, cell_type]/ myRCTD_coarse@de_results$gene_fits$I_mat[granule_list,'2_2_Granule'])) myp[is.na(myp)] <- 1 cell_type_power <- sum(myp < 0.01) names(cell_type_power) <- cell_type sig_genes cell_type_power plot_df <- data.frame(c(diff_genes), names(c(diff_genes))) colnames(plot_df) <- c('n', 'cell_type') p1 <- ggplot(plot_df, aes(x = cell_type, y = n)) + geom_bar(position="dodge", stat="identity") + theme_classic() + ylab('Number of significantly different DE genes') +theme(axis.text.x = element_text(angle = 25, hjust = 1)) + xlab('Cell type') p1
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.