knitr::opts_chunk$set(echo = T, warning = F, message = F, cache = T, cache.lazy = FALSE, results = 'hide')
library(spacexr) library(Matrix) library(devtools) library(ggplot2) library(ggpubr) library(reshape2) library(dplyr) library(ggrepel) source('~/Documents/MIT/Research/Rafalab/Projects/spacexr/AnalysisCSIDE/helper_functions/testes_helper.R') source('~/Documents/MIT/Research/Rafalab/Projects/spacexr/AnalysisCSIDE/helper_functions/merge_de_helper.R') devtools::load_all() datadir <- '../../data/SpatialRNA/Testes' puck <- readRDS(file.path(datadir,'puck.rds')) ### END PRELUDE myRCTD <- readRDS(file.path(datadir,'myRCTD_testes.rds')) myRCTD@originalSpatialRNA <- puck
codes_1 <- c() stadir <- file.path(datadir,'Stage/I-III') id <- 0 cvx_df <- data.frame(x=numeric(),y=numeric(),stage=numeric(),id = numeric()) for(file in list.files(stadir)) { labels <- read.csv(file.path(stadir,file)) tubule_codes <- as.character(labels$barcode) codes_1 <- c(codes_1,tubule_codes) print(length(codes_1)) my_coords <- puck@coords[tubule_codes,1:2] my_ind <- chull(puck@coords[tubule_codes,]) my_coords <- my_coords[my_ind,] temp_df <- cbind(my_coords, 1,id) colnames(temp_df)[3] <- 'stage' cvx_df <- bind_rows(cvx_df, temp_df) id <- id + 1 } codes_2 <- c() stadir <- file.path(datadir,'Stage/IV-VI') for(file in list.files(stadir)) { labels <- read.csv(file.path(stadir,file)) tubule_codes <- as.character(labels$barcode) codes_2 <- c(codes_2,tubule_codes) print(length(codes_2)) my_coords <- puck@coords[tubule_codes,1:2] my_ind <- chull(puck@coords[tubule_codes,]) my_coords <- my_coords[my_ind,] temp_df <- cbind(my_coords, 2,id) colnames(temp_df)[3] <- 'stage' cvx_df <- bind_rows(cvx_df, temp_df) id <- id + 1 } codes_3 <- c() stadir <- file.path(datadir,'Stage/VII-VIII') for(file in list.files(stadir)) { labels <- read.csv(file.path(stadir,file)) tubule_codes <- as.character(labels$barcode) codes_3 <- c(codes_3,tubule_codes) print(length(codes_3)) my_coords <- puck@coords[tubule_codes,1:2] my_ind <- chull(puck@coords[tubule_codes,]) my_coords <- my_coords[my_ind,] temp_df <- cbind(my_coords, 3,id) colnames(temp_df)[3] <- 'stage' cvx_df <- bind_rows(cvx_df, temp_df) id <- id + 1 } codes_4 <- c() stadir <- file.path(datadir,'Stage/IX-XII') for(file in list.files(stadir)) { labels <- read.csv(file.path(stadir,file)) tubule_codes <- as.character(labels$barcode) codes_4 <- c(codes_4,tubule_codes) print(length(codes_4)) my_coords <- puck@coords[tubule_codes,1:2] my_ind <- chull(puck@coords[tubule_codes,]) my_coords <- my_coords[my_ind,] temp_df <- cbind(my_coords, 4,id) colnames(temp_df)[3] <- 'stage' cvx_df <- bind_rows(cvx_df, temp_df) id <- id + 1 } barcodes_1 <- intersect(codes_1,names(myRCTD@spatialRNA@nUMI)) barcodes_2 <- intersect(codes_2,names(myRCTD@spatialRNA@nUMI)) barcodes_3 <- intersect(codes_3,names(myRCTD@spatialRNA@nUMI)) barcodes_4 <- intersect(codes_4,names(myRCTD@spatialRNA@nUMI)) region_list <- list(barcodes_1, barcodes_2, barcodes_3, barcodes_4) saveRDS(cvx_df, file.path(datadir, 'cvx_df.rds')) saveRDS(region_list, file.path(datadir, 'region_list.rds'))
region_list <- readRDS(file.path(datadir, 'region_list.rds')) cvx_df <- readRDS(file.path(datadir, 'cvx_df.rds')) cvx_df$stage <- factor(cvx_df$stage) cvx_df <- bind_rows(cvx_df, cvx_df[which(!duplicated(cvx_df$id)),]) all_barc <- intersect(Reduce(union,region_list), colnames(myRCTD@spatialRNA@counts)) aggregate_cell_types(myRCTD,all_barc,doublet_mode = F) myRCTD@config$max_cores <- 4 cur_cell_types = c('1','2','4','5','6') cell_types <- cur_cell_types
myRCTDde <- run.de.regions(myRCTD, region_list, datadir = datadir, cell_types = cur_cell_types, doublet_mode = F) myRCTDde@internal_vars_de$delta <- 0; myRCTDde@internal_vars_de$test_mode <- 'multi' myRCTDde@reference <- gen_small_reference() myRCTDde <- add_res_genes(myRCTDde, datadir = datadir, plot_genes = F) saveRDS(myRCTDde, file.path(datadir,'myRCTDde_updated2.rds')) make_all_de_plots(myRCTDde, datadir)
myRCTDde <- readRDS(file.path(datadir,'myRCTDde_updated2.rds')) cell_type_mapping <- c('1'="ES", '2'="RS", '3'='Myoid', '4'="SPC", '5'="SPG", '6'="Sertoli", '7'="Leydig",'8'="Endothelial",'9'="Macrophage") #extract relevant variables X2 <- myRCTDde@internal_vars_de$X2 my_beta <- myRCTDde@internal_vars_de$my_beta cell_types_present <- myRCTDde@internal_vars_de$cell_types_present cur_cell_types <- myRCTDde@internal_vars_de$cell_types all_barc <- myRCTDde@internal_vars_de$all_barc n_regions <- dim(X2)[2] gene_fits <- myRCTDde@de_results$gene_fits
plot_df <- get_Z_test_res(c('1','2','4'), myRCTDde, X2, puck) saveRDS(plot_df, file.path(datadir, 'z_test_plot_df.rds')) ct_df <- get_Z_test_res_ct('1', cur_cell_types, myRCTDde, X2,puck, fdr = .01) saveRDS(ct_df, file.path(datadir, 'ct_df.rds'))
n_levels = myRCTD@cell_type_info$info[[3]] my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)] names(my_pal) = myRCTD@cell_type_info$info[[2]] my_pal_curr <- my_pal my_pal_curr[cell_types[4]] <- "#CC79A7" my_pal_curr[cell_types[5]] <- "#E69F00" my_pal_curr[cell_types[3]] <- "#D55E00" my_pal_curr[cell_types[1]] <- "#009E73" my_pal_curr[cell_types[2]] <- "#0072B2" plot_df <- readRDS(file.path(datadir, 'z_test_plot_df.rds')) p1 <- ggplot(plot_df, aes(fill = method, x = cur_cell_types, y = count)) + geom_bar(position="dodge", stat="identity") + theme_classic() + ylab('Number of significant genes detected') + scale_fill_manual("Method", breaks = c('RCTD','Z'), labels = c('CSIDE','Z-test'), values = unname(my_pal_curr[c(4,2)])) + scale_x_discrete('Cell Type', breaks = c(1,2,4), labels = c('ES','RS','SPC')) p2 <- ggplot(plot_df, aes(fill = method, x = cur_cell_types, y = n_total)) + geom_bar(position="dodge", stat="identity") + theme_classic() + ylab('Number of pixels used for each cell type') + scale_fill_manual("Method", breaks = c('RCTD','Z'), labels = c('CSIDE','Z-test'), values = unname(my_pal_curr[c(4,2)])) + scale_x_discrete('Cell Type', breaks = c(1,2,4), labels = c('ES','RS','SPC')) ggarrange(p1,p2, nrow = 2)
ct_df <- readRDS(file.path(datadir, 'ct_df.rds')) both_genes <- intersect(rownames(myRCTDde@de_results$res_gene_list[['1']]), rownames(ct_df)) print(cor(gene_fits$all_vals[both_genes,1,1] - gene_fits$all_vals[both_genes,2,1], ct_df[both_genes, 'log_fc_12'])^2) plot(gene_fits$all_vals[both_genes,1,1] - gene_fits$all_vals[both_genes,2,1], ct_df[both_genes, 'log_fc_12'])
cell_type_1 <- '2'; cell_type_2 <- '4' cell_type_1 <- '1'; cell_type_2 <- '4' cell_type_1 <- '1'; cell_type_2 <- '2' cor_res <- cor_ct_patterns(cell_type_1, cell_type_2, myRCTDde, cell_types_present, X2, gene_fits, cur_cell_types) hist(cor_res$res_n) hist(cor_res$res_corr[cor_res$res_corr > -1]) cor_mat <- cor_ct_patterns_quant(cell_type_1, cell_type_2, myRCTDde, cell_types_present, X2, gene_fits, cur_cell_types) corrplot::corrplot(cor_mat)
all_ct <- c('1','2','4') info_1 <- get_marker_by_region(all_ct, '1', n_regions, myRCTDde, cell_types_present, gene_fits, trials = 50000, p_val_thresh = .001) saveRDS(info_1, file.path(datadir, 'info_1.rds')) info_1 <- readRDS(file.path(datadir, 'info_1.rds')) info_2 <- get_marker_by_region(all_ct, '2', n_regions, myRCTDde, cell_types_present, gene_fits, trials = 50000, p_val_thresh = .001) saveRDS(info_2, file.path(datadir, 'info_2.rds')) info_2 <- readRDS(file.path(datadir, 'info_2.rds')) info_4 <- get_marker_by_region(all_ct, '4', n_regions, myRCTDde, cell_types_present, gene_fits, trials = 50000, p_val_thresh = .001) saveRDS(info_4, file.path(datadir, 'info_4.rds')) info_4 <- readRDS(file.path(datadir, 'info_4.rds')) table(info_1$first_region) table(info_2$first_region) table(info_4$first_region) info_1[order(info_1$first_region),] infodf <- info_1[info_1$first_region == 4,] infodf[order(-infodf$diff),]
region <- 4 info_1 <- readRDS(file.path(datadir, 'info_1.rds')) gene_list <- rownames(info_1)[info_1$first_region == region] final_gene_list <- gene_list fit_df <- cbind(gene_fits$all_vals[final_gene_list,,1], gene_fits$all_vals[final_gene_list,,2], gene_fits$all_vals[final_gene_list,,3]) fit_df <- exp(fit_df) norm_df <- sweep(fit_df, 1, apply(fit_df,1,max),'/') plot_df <- reshape2::melt(norm_df) colnames(plot_df) <- c('gene', 'region_id', 'expr') plot_df$region <- ((plot_df$region_id - 1) %% n_regions)+ 1 plot_df$cell_type <- cur_cell_types[floor((plot_df$region_id - 1) / n_regions) + 1] cur_range = c(0,1) p <- ggplot(plot_df, aes(region_id, gene, fill = expr)) + geom_tile() + scale_fill_gradientn(colors = pals::brewer.blues(20)[2:20],name = "Normalized Expression", labels = c(0,1),breaks = c(0,1)) p
library(ggnewscale) cell_type <- '1'; region <- 4 gene_thresh <- 7.5; gene <- 'Tnp1' make_ct_region_plot(cur_cell_types, cell_type, region, my_beta, X2, gene_thresh, puck, gene, sing_thresh = 0.8) + new_scale_color()+ geom_path(data = cvx_df,aes(x=x,y=y, group = id, color = stage), linetype = 'dashed') + scale_color_manual('Stage', values = c('#E69F00','#CC79A7', '#EFCB00',"#000000"), labels = c('I-III','IV-VI', 'VII-VIII', 'IX-XII'))+ geom_segment(aes(x = 1300, y = 1000, xend = 1684.6, yend = 1000), color = "black")+labs(color='') + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()) + coord_fixed()
region <- 4 gene_list <- rownames(info_1)[info_1$first_region == region] cell_type <- '1'; region <- 4 gene_thresh <- 45; gene <- 'Tnp1' make_ct_region_plot(cur_cell_types, cell_type, region, my_beta, X2, gene_thresh, puck, gene_list, sing_thresh = 0.8) + new_scale_color()+ geom_path(data = cvx_df,aes(x=x,y=y, group = id, color = stage), linetype = 'dashed') + scale_color_manual('Stage', values = c('#E69F00','#CC79A7', '#EFCB00',"#000000"), labels = c('I-III','IV-VI', 'VII-VIII', 'IX-XII'))+ geom_segment(aes(x = 1300, y = 1000, xend = 1684.6, yend = 1000), color = "black")+labs(color='') + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()) + coord_fixed()
gene_list_final <- c() region_list <- c() ct_list <- c() mean_list <- matrix(0, 12,4) sd_list <- matrix(0,12,4) cur_ind <- 1 for(ct_ind in 1:3) { Imat_ind <- (ct_ind-1)*4 + (1:4) sig_gene_list <- rownames(myRCTDde@de_results$res_gene_list[[ct_ind]]) mean_mat <- gene_fits$all_vals[sig_gene_list,,ct_ind] sd_mat <- gene_fits$I_mat[sig_gene_list, Imat_ind] cur_gene_list <- names(which(rowSums(sd_mat < 0.2) == 4)) cur_gene_list <- names(which(apply(mean_mat[cur_gene_list,],1, is.maxcyclic))) maxcyc_df <- cbind(apply(mean_mat[cur_gene_list,],1,which.max), apply(mean_mat[cur_gene_list,],1,breathing.room)) for(region in unique(maxcyc_df[,1])) { match_list <- which(maxcyc_df[,1] == region) reg_df <- maxcyc_df[match_list,] if(length(match_list) > 1) cur_gene <- names(which.max(reg_df[,2])) else cur_gene <- rownames(maxcyc_df)[which(maxcyc_df[,1] == region)] gene_list_final <- c(gene_list_final,cur_gene) region_list <- c(region_list,region) ct_list <- c(ct_list, ct_ind) mean_list[cur_ind,] <- mean_mat[cur_gene,] sd_list[cur_ind,] <- sd_mat[cur_gene,] cur_ind <- cur_ind + 1 } } plot_df <- data.frame(gene_list_final,region_list,ct_list) mean_list <- mean_list[1:(cur_ind-1),] sd_list <- sd_list[1:(cur_ind-1),] df1 <- cbind(plot_df, 1, mean_list[,1], sd_list[,1]) plot_df <- rbind(setNames(df1,colnames(df1)), setNames(cbind(plot_df, 2, mean_list[,2], sd_list[,2]),colnames(df1)), setNames(cbind(plot_df, 3, mean_list[,3], sd_list[,3]),colnames(df1)), setNames(cbind(plot_df, 4, mean_list[,4], sd_list[,4]),colnames(df1))) colnames(plot_df) <- c('gene','regionmax','cell_type','region','mean','sd') plot_df$regionmax <- factor(plot_df$regionmax) plot_df$cell_type <- factor(plot_df$cell_type) library(ggrepel) plot_df$label <- plot_df$gene plot_df$label[plot_df$region > 1] <- NA plot_df$region <- factor(plyr::mapvalues(plot_df$region, from = c(1:4), to= c('I-III','IV-VI','VII-VIII','IX-XII')), levels = c('I-III','IV-VI','VII-VIII','IX-XII')) plot_df$regionmax <- factor(plyr::mapvalues(plot_df$regionmax, from = c(1:4), to= c('I-III','IV-VI','VII-VIII','IX-XII')), levels = c('I-III','IV-VI','VII-VIII','IX-XII')) plot_df[,c('mean','sd')] <- plot_df[,c('mean','sd')]*log(exp(1),2) # scale to log 2 plot_df$mean <- plot_df$mean + log(500,2)# counts per 500 ggplot(plot_df, aes(x = region, y = mean, group = gene, color = cell_type)) + geom_line() + geom_point(size = 3) + theme_classic() + geom_label_repel(aes(label = label),nudge_x = -0.05,na.rm = TRUE, show.legend = F) + facet_wrap(regionmax ~.) + geom_errorbar(aes(ymin = mean - 1.96*sd, ymax = mean+1.96*sd, width = 0.1)) + scale_color_manual('Cell type', breaks = c(1,2,3),values = unname(my_pal_curr[c(1,2,4)]), labels = c('ES','RS','SPC')) + xlab('Stage') + ylab('Log estimated expression')
results_mat <- matrix(0,3,9) for(ct_ind in 1:3) { Imat_ind <- (ct_ind-1)*4 + (1:4) sig_gene_list <- rownames(myRCTDde@de_results$res_gene_list[[ct_ind]]) mean_mat <- gene_fits$all_vals[sig_gene_list,,ct_ind] cyclic <- table(apply(mean_mat, 1, is.cyclic)) percent_cyclic <- cyclic[2] / sum(cyclic) maxcyclic <- table(apply(mean_mat, 1, is.maxcyclic.thresh)) percent_maxcyclic <- maxcyclic[2] / sum(maxcyclic) random_chance<- mean(apply(mean_mat, 1, maxcyclic.thresh.prob)) norm_mean_mat <- t(apply(mean_mat, 1, norm_vec)) results_vec <- c(D_thresh, percent_cyclic, 3/4, percent_maxcyclic, random_chance, colMeans(norm_mean_mat)) results_mat[ct_ind, ] <- results_vec } colnames(results_mat) <- c('D_thresh', 'cyclic', 'randomc', 'cyclicmax', 'randomm', 'pos1','pos2','pos3','pos4') results_mat <- as.data.frame(results_mat) results_mat$ct_ind <- 1:3 plot_df <- melt(results_mat[,c(2:5,10)], id = 'ct_ind') ggplot(plot_df[plot_df$variable %in% c('cyclicmax','randomm'),],aes(x=factor(ct_ind), y =value, fill = factor(variable))) + geom_bar(stat = 'identity', position = 'dodge') + ylim(c(0,1)) + theme_classic()+ ylab('Cyclic genes as a proportion of significant genes') + scale_fill_manual("", breaks = c('cyclicmax','randomm'), labels = c('CSIDE Estimates','Shuffled'), values = unname(my_pal_curr[c(4,2)])) + scale_x_discrete('Cell Type', breaks = c(1,2,3), labels = c('ES','RS','SPC'))
barcodes <- myRCTDde@internal_vars_de$all_barc O <- myRCTDde@internal_vars_de$my_beta > 0.25 S <- myRCTDde@internal_vars_de$my_beta[,1] + myRCTDde@internal_vars_de$my_beta[,2] > 0.25 vec <- c(sum(S & O[,3] & myRCTDde@internal_vars_de$X2[all_barc,1]), sum(!S & O[,3]& myRCTDde@internal_vars_de$X2[all_barc,1]), sum(S & O[,3] & myRCTDde@internal_vars_de$X2[all_barc,2]), sum(!S & O[,3]& myRCTDde@internal_vars_de$X2[all_barc,2]), sum(S & O[,3] & myRCTDde@internal_vars_de$X2[all_barc,3]), sum(!S & O[,3]& myRCTDde@internal_vars_de$X2[all_barc,3]), sum(S & O[,3] & myRCTDde@internal_vars_de$X2[all_barc,4]), sum(!S & O[,3]& myRCTDde@internal_vars_de$X2[all_barc,4])) plot_df <- data.frame(cbind(vec, c(T,F,T,F,T,F,T,F), c(1,1,2,2,3,3,4,4))) colnames(plot_df) <- c('n','S','region') ggplot(plot_df,aes(x=factor(region), y =n, fill = factor(S))) + geom_bar(stat = 'identity', position = 'dodge') + theme_classic() + ylab('Number of pixels') + xlab('Region') + scale_fill_manual('', breaks=c(0,1), labels=c('ES/RS Absent','ES/RS Present'), values = unname(my_pal_curr[c(4,2)]))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.