knitr::opts_chunk$set(echo = T, warning = F, message = F, cache = T, cache.lazy = FALSE, results = 'hide')
library(spacexr) library(Matrix) library(devtools) library(ggplot2) library(ggpubr) library(reshape2) library(dplyr) library(ggrepel) source('~/Documents/MIT/Research/Rafalab/Projects/spacexr/AnalysisCSIDE/helper_functions/testes_helper.R') source('~/Documents/MIT/Research/Rafalab/Projects/spacexr/AnalysisCSIDE/helper_functions/merge_de_helper.R') source('~/Documents/MIT/Research/Rafalab/Projects/spacexr/R/CSIDE_plots.R') load_all() datadir <- '../../../spacexr/data/SpatialRNA/Testes' puck <- readRDS(file.path(datadir,'puck.rds')) ### END PRELUDE myRCTD <- readRDS(file.path(datadir,'myRCTD_testes.rds')) myRCTD@config$doublet_mode <- 'full' myRCTD@originalSpatialRNA <- puck
codes_1 <- c() stadir <- file.path(datadir,'Stage/I-III') id <- 0 cvx_df <- data.frame(x=numeric(),y=numeric(),stage=numeric(),id = numeric()) for(file in list.files(stadir)) { labels <- read.csv(file.path(stadir,file)) tubule_codes <- as.character(labels$barcode) codes_1 <- c(codes_1,tubule_codes) print(length(codes_1)) my_coords <- puck@coords[tubule_codes,1:2] my_ind <- chull(puck@coords[tubule_codes,]) my_coords <- my_coords[my_ind,] temp_df <- cbind(my_coords, 1,id) colnames(temp_df)[3] <- 'stage' cvx_df <- bind_rows(cvx_df, temp_df) id <- id + 1 } codes_2 <- c() stadir <- file.path(datadir,'Stage/IV-VI') for(file in list.files(stadir)) { labels <- read.csv(file.path(stadir,file)) tubule_codes <- as.character(labels$barcode) codes_2 <- c(codes_2,tubule_codes) print(length(codes_2)) my_coords <- puck@coords[tubule_codes,1:2] my_ind <- chull(puck@coords[tubule_codes,]) my_coords <- my_coords[my_ind,] temp_df <- cbind(my_coords, 2,id) colnames(temp_df)[3] <- 'stage' cvx_df <- bind_rows(cvx_df, temp_df) id <- id + 1 } codes_3 <- c() stadir <- file.path(datadir,'Stage/VII-VIII') for(file in list.files(stadir)) { labels <- read.csv(file.path(stadir,file)) tubule_codes <- as.character(labels$barcode) codes_3 <- c(codes_3,tubule_codes) print(length(codes_3)) my_coords <- puck@coords[tubule_codes,1:2] my_ind <- chull(puck@coords[tubule_codes,]) my_coords <- my_coords[my_ind,] temp_df <- cbind(my_coords, 3,id) colnames(temp_df)[3] <- 'stage' cvx_df <- bind_rows(cvx_df, temp_df) id <- id + 1 } codes_4 <- c() stadir <- file.path(datadir,'Stage/IX-XII') for(file in list.files(stadir)) { labels <- read.csv(file.path(stadir,file)) tubule_codes <- as.character(labels$barcode) codes_4 <- c(codes_4,tubule_codes) print(length(codes_4)) my_coords <- puck@coords[tubule_codes,1:2] my_ind <- chull(puck@coords[tubule_codes,]) my_coords <- my_coords[my_ind,] temp_df <- cbind(my_coords, 4,id) colnames(temp_df)[3] <- 'stage' cvx_df <- bind_rows(cvx_df, temp_df) id <- id + 1 } barcodes_1 <- intersect(codes_1,names(myRCTD@spatialRNA@nUMI)) barcodes_2 <- intersect(codes_2,names(myRCTD@spatialRNA@nUMI)) barcodes_3 <- intersect(codes_3,names(myRCTD@spatialRNA@nUMI)) barcodes_4 <- intersect(codes_4,names(myRCTD@spatialRNA@nUMI)) region_list <- list(barcodes_1, barcodes_2, barcodes_3, barcodes_4) saveRDS(cvx_df, file.path(datadir, 'cvx_df.rds')) saveRDS(region_list, file.path(datadir, 'region_list.rds'))
region_list <- readRDS(file.path(datadir, 'region_list.rds')) cvx_df <- readRDS(file.path(datadir, 'cvx_df.rds')) cvx_df$stage <- factor(cvx_df$stage) cvx_df <- bind_rows(cvx_df, cvx_df[which(!duplicated(cvx_df$id)),]) all_barc <- intersect(Reduce(union,region_list), colnames(myRCTD@spatialRNA@counts)) aggregate_cell_types(myRCTD,all_barc,doublet_mode = F) myRCTD@config$max_cores <- 4 cur_cell_types = c('1','2','4','5','6') cell_types <- cur_cell_types
region_no <- rep(0, length(names(puck@nUMI))) names(region_no) <- names(puck@nUMI) for(i in 1:4) region_no[region_list[[i]]] <- i p1 <- plot_class(puck, names(region_no), factor(region_no))+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2)))+ ggplot2::scale_color_manual("Stage",values = c('grey', '#E69F00','#CC79A7', '#EFCB00',"#000000"), breaks = c(0,1,2,3,4), labels = c('Not Classified','I-III','IV-VI', 'VII-VIII', 'IX-XII'))+ geom_segment(aes(x = 1300, y = 1000, xend = 1684.6, yend = 1000), color = "black")+ theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()) my_barc <- rownames(myRCTD@results$results_df) results_df <- myRCTD@results$results_df puck <- myRCTD@spatialRNA barcodes = rownames(results_df[results_df$spot_class != "reject" & puck@nUMI >= 100,]) my_table = puck@coords[barcodes,] my_table$class = results_df[barcodes,]$first_type n_levels = myRCTD@cell_type_info$info[[3]] my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)] names(my_pal) = myRCTD@cell_type_info$info[[2]] my_pal_curr <- my_pal my_pal_curr[cell_types[4]] <- "#CC79A7" my_pal_curr[cell_types[5]] <- "#E69F00" my_pal_curr[cell_types[3]] <- "#D55E00" my_pal_curr[cell_types[1]] <- "#009E73" my_pal_curr[cell_types[2]] <- "#0072B2" pres = unique(as.integer(my_table$class)) pres = pres[order(pres)] p2 <- ggplot2::ggplot(my_table, ggplot2::aes(x=x, y=y)) + ggplot2::geom_point(ggplot2::aes(size = .1, shape=19,color=class)) + ggplot2::scale_color_manual(values = my_pal_curr[pres], breaks = cell_types, labels = c('ES','RS','SPC','SPG','Sertoli'))+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2)))+ geom_segment(aes(x = 1300, y = 1000, xend = 1684.6, yend = 1000), color = "black")+labs(color='') + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()) ggarrange(p1,p2,nrow = 2)
myRCTDde <- run.de.regions(myRCTD, region_list, datadir = datadir, cell_types = cur_cell_types, doublet_mode = F) myRCTDde@internal_vars_de$delta <- 0; myRCTDde@internal_vars_de$test_mode <- 'multi' myRCTDde@reference <- gen_small_reference() myRCTDde <- add_res_genes(myRCTDde, datadir = datadir, plot_genes = F) saveRDS(myRCTDde, file.path(datadir,'myRCTDde_updated2.rds')) make_all_de_plots(myRCTDde, datadir)
myRCTDde <- readRDS(file.path(datadir,'myRCTDde_updated2.rds')) cell_type_mapping <- c('1'="ES", '2'="RS", '3'='Myoid', '4'="SPC", '5'="SPG", '6'="Sertoli", '7'="Leydig",'8'="Endothelial",'9'="Macrophage") #extract relevant variables X2 <- myRCTDde@internal_vars_de$X2 my_beta <- myRCTDde@internal_vars_de$my_beta cell_types_present <- myRCTDde@internal_vars_de$cell_types_present cur_cell_types <- myRCTDde@internal_vars_de$cell_types all_barc <- myRCTDde@internal_vars_de$all_barc n_regions <- dim(X2)[2] gene_fits <- myRCTDde@de_results$gene_fits
res_genes <- myRCTDde@de_results$res_gene_list$`4` get_best_pair <- function(gene) { Z_max <- 0 imax <- 0 jmax <- 0 for(i in 1:3) { for(j in (i+1):4) { se <- sqrt(gene_fits$I_mat[gene, i+8]^2 + gene_fits$I_mat[gene, j+8]^2) Z <- abs(res_genes[gene,i + 4] - res_genes[gene,j+4]) / se if(Z > Z_max) { imax <- i jmax <- j Z_max <- Z } } } return(c(imax,jmax,Z_max)) } gl_type12 <- intersect(get_gene_list_type_wrapper(myRCTDde, '1', cell_types_present), get_gene_list_type_wrapper(myRCTDde, '2', cell_types_present)) res_genes <- myRCTDde@de_results$res_gene_list$`4` int_genes <- setdiff(intersect(rownames(res_genes), gl_type12), rownames(myRCTDde@de_results$res_gene_list$`1`)) int_genes <- setdiff(int_genes, rownames(myRCTDde@de_results$res_gene_list$`2`)) cur_cell_types <- c('1','2', '4') ct_thresh <- 0.9 cur_ress <- matrix(0, nrow = length(int_genes), ncol = 9) rownames(cur_ress) <- int_genes for(gene in int_genes) { bp <- get_best_pair(gene) se <- gene_fits$I_mat[gene, c(bp[1], bp[2], bp[1] + 4, bp[2] + 4, bp[1] + 8, bp[2]+ 8)]^2 se <- sqrt(c(se[1] + se[2], se[3] + se[4], se[5]+se[6])) cur_ress[gene, ] <- c(bp,(myRCTDde@de_results$gene_fits$all_vals[gene,bp[1],1:3] - myRCTDde@de_results$gene_fits$all_vals[gene,bp[2],1:3]), se) } is_good <- cur_ress[,6] > 2*pmax(abs(cur_ress[,4]),abs(cur_ress[,5])) int_genes <- int_genes[is_good] int_genes <- c('Prss40','Snx3') detect_thresh <- .25 results_df <- data.frame(gene = character(), one_pres = logical(), two_pres = logical(), four_pres = logical(), region = integer(), N = integer(), Y = numeric(), pred = numeric(), se = numeric()) for(gene in int_genes) { p_df <- get_quant_df(myRCTDde, myRCTDde@de_results$gene_fits, myRCTDde@internal_vars_de$cell_types, cur_cell_types, gene, multi_region = T, prop_thresh = ct_thresh) for(op in c(F,T)) for(fp in c(F,T)) { for(region in c(cur_ress[gene,1],cur_ress[gene,2])) { tp <- op cur_barcs <- rownames(p_df) if(op) { cur_barcs <- cur_barcs[which(p_df[cur_barcs,'X1'] + p_df[cur_barcs,'X2'] > detect_thresh)] } else { cur_barcs <- cur_barcs[which(p_df[cur_barcs,'X1'] + p_df[cur_barcs,'X2'] < detect_thresh)] } if(fp) { cur_barcs <- cur_barcs[which(p_df[cur_barcs,'X4'] > detect_thresh)] } else { cur_barcs <- cur_barcs[which(p_df[cur_barcs,'X4'] < detect_thresh)] } cur_barcs <- cur_barcs[which(p_df[cur_barcs,'region'] == region)] n <- length(cur_barcs) Y <- mean(p_df[cur_barcs, 'Y']) pred <- mean(p_df[cur_barcs, 'pred']) se <- sqrt(mean(p_df[cur_barcs,'var'])/n) de<-data.frame(gene, op, tp, fp, region, n, Y, pred, se) names(de)<-c('gene',"one_pres","two_pres", 'four_pres','region','N','Y','pred', 'se') if(op | fp) results_df <- bind_rows(results_df, de) } } } results_df$region <- factor(results_df$region) results_df$category <- factor(as.integer(results_df$four_pres)*2 + as.integer(results_df$one_pres), levels = c(1,3,2)) results_df$category <- plyr::mapvalues(results_df$category, from = c(1,2,3), to = c('S+, SPC-', 'S-, SPC+', 'S+, SPC+')) p<- ggplot(results_df, aes(x = category, y = log(pmax(500*Y, 500*10^(-4.5)),2), color = region)) + geom_point(position = position_dodge(0.2)) + geom_point(aes(group = region, y = log(500*pred,2)),position = position_dodge(0.2), shape = 5)+ facet_wrap(gene ~.) + geom_errorbar(aes(ymin = log(pmax(500*(Y - 1.96*se),500*10^(-4.5)),2), ymax = log(500*(Y + 1.96*se),2)), width = 0.2, position = position_dodge(0.2)) + theme_classic() + ylab('Log average expression') + xlab('Cell types present') + scale_color_manual("Stage", breaks = c(1,4), labels = c('I-III','IX-XII'), values = c('#E69F00',"#000000")) p_df <- cbind(results_df, 'Raw data') p_df_2 <- cbind(results_df, 'CSIDE model') p_df_2$Y <- p_df_2$pred colnames(p_df_2)[11] <- 'model' colnames(p_df)[11] <- 'model' plot_df <- rbind(p_df, p_df_2) p<- ggplot(plot_df, aes(x = category, y = log(pmax(500*Y, 500*10^(-4.5)),2), color = region, shape = model, group = region)) + geom_point(position = position_dodge(0.2)) + facet_wrap(gene ~.) + geom_errorbar(data = plot_df[plot_df$model == 'Raw data',], aes(ymin = log(pmax(500*(Y - 1.96*se),500*10^(-4.5)),2), ymax = log(500*(Y + 1.96*se),2)), width = 0.2, position = position_dodge(0.2)) + theme_classic() + ylab('Log average expression') + xlab('Cell types present') + scale_color_manual("Stage", breaks = c(1,4), labels = c('I-III','IX-XII'), values = c('#E69F00',"#000000")) + scale_shape_discrete("") p
all_ct <- c('1','2','4') info_1 <- get_marker_by_region(all_ct, '1', n_regions, myRCTDde, cell_types_present, gene_fits, trials = 50000, p_val_thresh = .001) saveRDS(info_1, file.path(datadir, 'info_1.rds')) info_1 <- readRDS(file.path(datadir, 'info_1.rds')) info_2 <- get_marker_by_region(all_ct, '2', n_regions, myRCTDde, cell_types_present, gene_fits, trials = 50000, p_val_thresh = .001) saveRDS(info_2, file.path(datadir, 'info_2.rds')) info_2 <- readRDS(file.path(datadir, 'info_2.rds')) info_4 <- get_marker_by_region(all_ct, '4', n_regions, myRCTDde, cell_types_present, gene_fits, trials = 50000, p_val_thresh = .001) saveRDS(info_4, file.path(datadir, 'info_4.rds')) info_4 <- readRDS(file.path(datadir, 'info_4.rds')) table(info_1$first_region) table(info_2$first_region) table(info_4$first_region) info_1[order(info_1$first_region),] infodf <- info_1[info_1$first_region == 4,] infodf[order(-infodf$diff),]
final_gene_list <- c('Ms4a5', 'Iqcf4', 'Hdac11', 'Tnp1','Prr22', 'Tesk2','Acbd3', 'Tmx4', 'Aurka', 'Syt11', 'Piwil1', 'Smg9') table(gene_fits$con_all[final_gene_list,1:12]) fit_df <- cbind(gene_fits$all_vals[final_gene_list,,1], gene_fits$all_vals[final_gene_list,,2], gene_fits$all_vals[final_gene_list,,3]) fit_df <- exp(fit_df) norm_df <- sweep(fit_df, 1, apply(fit_df,1,max),'/') plot_df <- reshape2::melt(norm_df) colnames(plot_df) <- c('gene', 'region_id', 'expr') plot_df$region <- ((plot_df$region_id - 1) %% n_regions)+ 1 plot_df$cell_type <- cur_cell_types[floor((plot_df$region_id - 1) / n_regions) + 1] cur_range = c(0,1) p <- ggplot(plot_df, aes(region_id, gene, fill = expr)) + geom_tile() + scale_fill_gradientn(colors = pals::brewer.blues(20)[2:20],name = "Normalized estimated expression", labels = c(0,1),breaks = c(0.001,1)) + theme_classic() + ylab('Gene')+ ggplot2::scale_size_identity() + coord_fixed() p
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.