knitr::opts_chunk$set(echo = F, warning = F, message = F, cache = T, results = 'hide')
library(RCTD) library(Matrix) library(ggplot2) library(ggpubr) library(gridExtra) library(reshape2) library(readr) library(Seurat)
load(file = '../Results2/revisions3-1-1_plot.RData') p_df <- as.data.frame(l1_errors) p1 <- ggplot(p_df, aes(x=l1_errors)) + geom_density() + theme_classic() + xlab('L1 Norm of Difference in Weight Vector')+ylab('Density of Pixels') + scale_x_continuous(breaks = c(0,0.002,0.004), limits = c(0,0.004)) p3 <- ggplot(plot_df, aes(x=log_eps, y = value, colour = barcode)) + geom_line() + theme_classic() + theme(legend.position="none") + ylim(c(0,1)) + xlab('Log Epsilon') + ylab('Estimated Granule Proportion') p_df <- as.data.frame(as.vector(l1e_mat)) colnames(p_df) <- 'x' p2 <- ggplot(p_df, aes(x=x)) + geom_density() + theme_classic() + xlab('L1 Norm of Difference in Weight Vector')+ylab('Density of Pixels') + scale_x_continuous(breaks = c(0,2e-6,4e-6), limits = c(0,4e-6)) ggarrange(p1,p3,p2, nrow = 2, ncol = 2)
load('../../data/SpatialRNA/Puck_Viz/results/gathered_results.RData') results_df <- results$results_df num_calls <- table(unlist(list(results_df$first_type[results_df$spot_class != 'reject'],results_df$second_type[results_df$spot_class == 'doublet_certain']))) not_pres <- names(num_calls)[c(3,4,6,8,11,12,13)] total_miss <- sum(num_calls[not_pres]) total_hit <- sum(num_calls[names(num_calls)[! (names(num_calls) %in% not_pres)]]) plot_df <- cbind(c(total_hit, total_miss), c('in', 'out')) colnames(plot_df) <- c('count', 'class') plot_df <- as.data.frame(plot_df) plot_df$count <- c(total_hit,total_miss) plot_df$class <- as.character(plot_df$class) plot_df$class[1] <- 'Cell Types Present' plot_df$class[2] <- 'Cell Types Absent' p2 <- ggplot(plot_df, aes(x=class, y=count)) + geom_bar(stat="identity") + theme_classic() + ylab('Number of Predictions') + xlab('') of_int <- num_calls[not_pres] of_int['In'] <- total_hit / 12 plot_df <- as.data.frame(cbind(names(of_int), of_int)) plot_df$of_int <- of_int colnames(plot_df) <- c('type', 'value') plot_df$type <- as.character(plot_df$type) plot_df['In','type'] <- 'Average Present Type' p1 <- ggplot(plot_df, aes(x=type,y=value)) + geom_bar(stat="identity") + theme_classic() + ylab('Number of Predictions') + xlab('Cell Type') + theme(axis.text.x = element_text(angle = 45, hjust = 1)) ggarrange(p2,p1)
load(file = '../Results2/out_ref_0.RData') transform_plot <- function(plot_df) { colnames(plot_df) <- c('Reference', 'Prediction', 'value') targ_lev <- levels(plot_df$Reference) prev_lev <- levels(plot_df$Prediction) plot_df$Prediction <- factor(plot_df$Prediction,c(prev_lev[prev_lev %in% targ_lev], prev_lev[! prev_lev %in% targ_lev])) plot_df$diag = as.character(plot_df$Prediction) == as.character(plot_df$Reference) plot_df$diag[!plot_df$diag] <- NA p1 <- ggplot(plot_df, aes(Reference, Prediction, fill= value)) + geom_tile() +theme_classic() +scale_fill_gradientn(colors = pals::brewer.blues(20)[2:20], limits= c(0,1),name = "Classification Proportion")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab('True Cell Type')+ ylab('Predicted Cell Type') + geom_tile(data = plot_df, aes(color = diag), size = 0.7) + scale_color_manual(guide = FALSE, values = c('TRUE' = "#D55E00")) return(p1) } gen_plot <- function(classify_mat) { classify_mat[is.na(classify_mat)] <- 0 plot_df <- melt(sweep(classify_mat, 1, rowSums(classify_mat),'/')) p1 <- transform_plot(plot_df) return(p1) } p1 <- gen_plot(classify_mat) load(file = '../Results2/out_ref_1.RData') p2 <- gen_plot(classify_mat) load(file = '../Results2/out_ref_2.RData') p3 <- gen_plot(classify_mat) load(file = '..//Results2/out_ref_3.RData') p4 <- gen_plot(classify_mat) load(file = '../Results2/out_ref_4.RData') p5 <- gen_plot(classify_mat) ggarrange(p1,p2,p3,p4,p5,nrow = 3, ncol = 2, common.legend = T)
load(file = '../Results2/confidence_class_remove.RData') library(plyr) metadir <- "../../data/SpatialRNA/Puck_Viz/MetaData" meta_data <- readRDS(file.path(metadir,"meta_data.RDS")) keep_barc <- colnames(myRCTD@spatialRNA@counts)[1:25740 %% 30 <= 2] meta_df <- meta_data$meta_df meta_df_sm <- meta_df[keep_barc,] meta_df_sm <- do.call("rbind", replicate(10, meta_df_sm, simplify = FALSE)) rownames(meta_df_sm) <- as.character(1:25740) meta_df_sm$UMI_tot <- ceiling(1:25740 / 2574)* 100 #first UMI is no longer accurate, only proportional cur_types <- myRCTD@cell_type_info$info[[2]][c(1,2,5,7,14,15,16,17)] my_barc <- (meta_df_sm$first_UMI == 1000 & meta_df_sm$first_type %in% cur_types) | (meta_df_sm$first_UMI == 0 & meta_df_sm$second_type %in% cur_types) singl_df <- aggregate(myRCTD@results$results_df$spot_class[my_barc] != 'reject', list(floor(myRCTD@spatialRNA@nUMI/100 )[my_barc]), mean) diff <- singl_df$x - colMeans(conf_df_all_types_alt[cur_types,])/30 plot_df <- as.data.frame(cbind(c(singl_df$x,colMeans(conf_df_all_types_alt[cur_types,])/30),c(rep(1,10),rep(2,10)),rep((1:10)*100,2))) colnames(plot_df) <- c('perf','class','UMI') plot_df$class <- factor(plot_df$class) plot_df$class <- mapvalues(plot_df$class,c(1,2),c('Regular','Class Removed')) p1 <- ggplot(plot_df, aes(x=UMI,y=perf, group = class, colour = class)) + geom_line() + xlim(0,1000) + ylim(0,1) + theme_classic() + xlab('UMI Counts per Pixel') + ylab('Percent Pixels Confident') + theme(legend.title = element_blank(), legend.position = 'top') print(mean(diff)) results_df <- myRCTD@results$results_df first_pred <- myRCTD@internal_vars$class_df[results_df$first_type,] == myRCTD@internal_vars$class_df[as.character(meta_df_sm$first_type),] | myRCTD@internal_vars$class_df[results_df$first_type,] == myRCTD@internal_vars$class_df[as.character(meta_df_sm$second_type),] first_conf <- results_df$spot_class != 'reject' accuracy_first <- aggregate(first_pred[first_conf], list(myRCTD@spatialRNA@nUMI[first_conf]), mean) second_pred <- myRCTD@internal_vars$class_df[results_df$second_type,] == myRCTD@internal_vars$class_df[as.character(meta_df_sm$first_type),] | myRCTD@internal_vars$class_df[results_df$second_type,] == myRCTD@internal_vars$class_df[as.character(meta_df_sm$second_type),] second_conf <- results_df$spot_class == 'doublet_certain' accuracy_second <- aggregate(second_pred[second_conf], list(myRCTD@spatialRNA@nUMI[second_conf]), mean) accuracy_tot <- (aggregate(first_pred[first_conf], list(myRCTD@spatialRNA@nUMI[first_conf]), sum)$x + aggregate(second_pred[second_conf], list(myRCTD@spatialRNA@nUMI[second_conf]), sum)$x) / (aggregate(first_pred[first_conf], list(myRCTD@spatialRNA@nUMI[first_conf]), length)$x + aggregate(second_pred[second_conf], list(myRCTD@spatialRNA@nUMI[second_conf]), length)$x) names(accuracy_tot) <- accuracy_first$Group.1 plot_df <- melt(accuracy_tot) plot_df$UMI <- as.numeric(rownames(plot_df)) p3<- ggplot(plot_df, aes(x=UMI,y=value)) + geom_line() + ylim(0,1) + theme_classic() + xlab('UMI Counts per Pixel') + ylab('Percent Pixels Accurate') + theme(legend.position = 'top') + xlim(0,1000) + ylim(0,1) + labs(colour = "Number of Cell Types per Pixel") load(file = '../Results2/confidence_cer.RData') all_df <- rbind(singl_df,doub_df) all_df$class <- c(rep(1,dim(singl_df)[1]),rep(2,dim(doub_df)[1])) all_df$class <- factor(all_df$class) p2<- ggplot(all_df, aes(x=Group.1*100,y=x, group = class, colour = class)) + geom_line() + ylim(0,1) + theme_classic() + xlab('UMI Counts per Pixel') + ylab('Percent Pixels Confident') + theme(legend.position = 'top') + xlim(0,1000) + ylim(0,1) + labs(colour = "Number of Cell Types per Pixel") load(file = '../Results2/umi_sim.RData') keep_barc <- colnames(myRCTD@spatialRNA@counts)[1:25740 %% 30 <= 2] metadir <- "../../data/SpatialRNA/Puck_Viz/MetaData" meta_data <- readRDS(file.path(metadir,"meta_data.RDS")) meta_df <- meta_data$meta_df meta_df_sm <- meta_df[keep_barc,] meta_df_sm <- do.call("rbind", replicate(10, meta_df_sm, simplify = FALSE)) rownames(meta_df_sm) <- as.character(1:25740) meta_df_sm$UMI_tot <- ceiling(1:25740 / 2574)* 100 #first UMI is no longer accurate, only proportional singl_df <- aggregate(myRCTD@results$results_df$spot_class != 'reject', list(floor(myRCTD@spatialRNA@nUMI/100 )), mean) doublets = myRCTD@results$results_df$spot_class %in% c('doublet_certain','doublet_uncertain') doub_df <- aggregate(myRCTD@results$results_df$spot_class[doublets] == 'doublet_certain', list(floor(myRCTD@spatialRNA@nUMI[doublets]/100 )), mean) all_df <- rbind(singl_df,doub_df) all_df$class <- c(rep(1,dim(singl_df)[1]),rep(2,dim(doub_df)[1])) all_df$class <- factor(all_df$class) p4<- ggplot(all_df, aes(x=Group.1*100,y=x, group = class, colour = class)) + geom_line() + ylim(0,1) + theme_classic() + xlab('UMI Counts per Pixel') + ylab('Percent Pixels Confident') + theme(legend.position = 'top') + xlim(0,1000) + ylim(0,1) + labs(colour = "Number of Cell Types per Pixel") ggarrange(p1,p2,p3,p4, nrow = 2, ncol = 2)
my_barc <- rownames(res_df)[res_df$meta_df.first_type == 'Bergmann' & res_df$meta_df.second_type == "Granule"] err_barc <- my_barc[results_df[my_barc,'second_type'] %in% c('Endothelial', 'Fibroblast')] all_classifications <- table(results_df[250 <= meta_df$first_UMI & meta_df$first_UMI <= 750 & meta_df$first_type == 'Bergmann' & meta_df$second_type == 'Granule' & results_df$spot_class == 'doublet_certain','second_type']) + table(results_df[250 <= meta_df$first_UMI & meta_df$first_UMI <= 750 & meta_df$first_type == 'Bergmann' & meta_df$second_type == 'Granule' & results_df$spot_class != 'reject','first_type']) plot_df <- as.data.frame(all_classifications) ggplot(plot_df, aes(Var1,Freq)) + geom_bar(stat="identity") + theme_classic() + ylab('Number of Predictions') + xlab('Cell Type') + theme(axis.text.x = element_text(angle = 45, hjust = 1))
###PLOT TRANSCRIPTIONALLY SIMILAR DOUBLET RATE### load('../../data/SpatialRNA/Puck_Viz/results/gathered_results.RData') results_df <- results$results_df metadir <- "../../data/SpatialRNA/Puck_Viz/MetaData" meta_data <- readRDS(file.path(metadir,"meta_data.RDS")) meta_df <- meta_data$meta_df sim_barc <- (meta_df$first_type == "Astrocytes" & meta_df$second_type == 'Bergmann') | (meta_df$first_type == "Endothelial" & meta_df$second_type == 'Fibroblast') | (meta_df$first_type == "MLI1" & meta_df$second_type == 'MLI2') | (meta_df$first_type == "Oligodendrocytes" & meta_df$second_type == 'Polydendrocytes') UMI_list <- meta_data$UMI_list N_UMI_cond <- (length(UMI_list) + 1)/2 spot_class_df <- as.data.frame(Matrix(0,nrow = N_UMI_cond, ncol = 2)) rownames(spot_class_df) <- UMI_list[1:N_UMI_cond]; colnames(spot_class_df) <- c('singlets','doublets') barcodes <- rownames(results_df) for(nUMI in UMI_list) { vec = table(results_df[barcodes[meta_df$first_UMI == nUMI],]$spot_class) spot_class_df[as.character(nUMI), 'singlets'] = vec["singlet"] spot_class_df[as.character(nUMI), 'doublets'] = sum(vec) - vec["singlet"] - vec["reject"] vec = table(results_df[barcodes[meta_df$first_UMI == nUMI & sim_barc],]$spot_class) spot_class_df[as.character(nUMI), 'singlets_sim'] = vec["singlet"] spot_class_df[as.character(nUMI), 'doublets_sim'] = sum(vec) - vec["singlet"] - vec["reject"] } frac_error <- spot_class_df["500",'singlets_sim'] / spot_class_df["500",'singlets'] spot_class_df[1:6,] <- spot_class_df[1:6,] + spot_class_df[13:8,] spot_class_df <- spot_class_df[1:7,c(3,4)] spot_class_df <- sweep(spot_class_df, 1, rowSums(spot_class_df),'/') spot_class_df[,"nUMI"] <- UMI_list[1:N_UMI_cond] df <- melt(spot_class_df, id.vars = 'nUMI', variable.name = 'series') df$std <- sqrt(df$value*(1 - df$value)/(240)[1]) df$std[df$nUMI == 500] <- df$std[df$nUMI == 500]*sqrt(2) doublet_df <- df my_pal = pals::coolwarm(20) doublet_df$prop <- doublet_df$nUMI / 1000 plot_df = doublet_df[doublet_df$series == "doublets_sim",] p1 <- ggplot(plot_df, aes(prop,value)) + geom_line() + geom_errorbar(aes(ymin=value-1.96*std, ymax=value+1.96*std), width=.02,position=position_dodge(.001)) + theme_classic() + geom_hline(yintercept=0, linetype="dashed", color = "grey", size=0.5) + geom_hline(yintercept=1, linetype="dashed", color = "grey", size=0.5) + xlab("UMI Proportion of Minority Cell Type") + ylab("Doublet Classification Rate") + scale_color_manual(values=c(my_pal[1], my_pal[20]),labels = c("Doublet"), name = "Class") + scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))+ scale_x_continuous(breaks = c(0,0.25,0.5), limits = c(-.03,0.53)) p1
myRCTDsn <- readRDS('../../myRCTD_cer.rds') #percent of genes small sum(rowMeans(myRCTDsn@cell_type_info$renorm[[1]][myRCTDsn@internal_vars$gene_list_reg,]) < 1e-3) / length(myRCTDsn@internal_vars$gene_list_reg) myRCTDsc <- readRDS('../../myRCTD_cer_viz.rds') success <- 0 total <- 0 results_df_sc <- myRCTDsc@results$results_df results_df_sn <- myRCTDsn@results$results_df for(i in 1:dim(myRCTDsn@results$results_df)[1]) { if(results_df_sc$spot_class[i] == 'reject' | results_df_sc$first_class[i]) { if(results_df_sc$spot_class[i] == 'doublet_certain' & !results_df_sc$second_class[i]) sc_types <- c(as.character(results_df_sc$second_type[i])) else sc_types <- c() } else if(results_df_sc$spot_class[i] == 'doublet_certain' & !(results_df_sc$second_class[i])) { sc_types <- c(as.character(results_df_sc$first_type[i]), as.character(results_df_sc$second_type[i])) } else sc_types <- c(as.character(results_df_sc$first_type[i])) if(results_df_sn$spot_class[i] == 'reject' | results_df_sn$first_class[i]) { if(results_df_sn$spot_class[i] == 'doublet_certain' & !results_df_sn$second_class[i]) sn_types <- c(as.character(results_df_sn$second_type[i])) else sn_types <- c() } else if(results_df_sn$spot_class[i] == 'doublet_certain' & !(results_df_sn$second_class[i])) { sn_types <- c(as.character(results_df_sn$first_type[i]), as.character(results_df_sn$second_type[i])) } else sn_types <- c(as.character(results_df_sn$first_type[i])) success <- success + length(intersect(sc_types,sn_types)) total <- total + min(length(sc_types), length(sn_types)) } print(success/total) sing_barc <- results_df_sn$spot_class == 'singlet' & results_df_sc$spot_class == 'singlet' & !results_df_sn$first_class & !results_df_sc$first_class co_df <- data.frame(results_df_sn$first_type[sing_barc], results_df_sc$first_type[sing_barc], 1) colnames(co_df) <- c('x','y','z') plot_df <- aggregate(. ~x+y, co_df ,sum) tot_df <- aggregate(. ~ y, plot_df, sum) rownames(tot_df) <- tot_df$y plot_df$prop <- plot_df$z / tot_df[plot_df$y,]$z colnames(plot_df) <- c('Reference','Prediction','skip','value') p_df <- plot_df[!(plot_df[,2] %in% c('Choroid','Microglia')),c(2,1,4)] p_mat <- dcast(p_df, formula = Prediction ~ Reference) rownames(p_mat) <- p_mat$Prediction p_mat$Prediction <- NULL p_mat[is.na(p_mat)] <- 0 p1 <- gen_plot(as.matrix(p_mat)) + theme(legend.position = 'top') + xlab('Predicted Cell Type Single-Cell') + ylab('Predicted Cell Type Single-Nucleus') ggarrange(p1)
##PLOT### myRCTDmulti <- readRDS('../Results2/RCTDmulti_cer.rds') num_types_all <- unlist(lapply(myRCTDmulti@results, function(x) length(x$conf_list))) p_df <- as.data.frame(num_types_all) p2 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels') sum(num_types_all >= 3) / length(num_types_all) myRCTDmulti <- readRDS('../Results2/RCTDmulti_hc.rds') num_types_all <- unlist(lapply(myRCTDmulti@results, function(x) length(x$conf_list))) p_df <- as.data.frame(num_types_all) p1 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels') sum(num_types_all >= 3) / length(num_types_all) myRCTDmulti <- readRDS('../Results2/RCTDmulti_som.rds') num_types_all <- unlist(lapply(myRCTDmulti@results, function(x) length(x$conf_list))) p_df <- as.data.frame(num_types_all) p3 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels') sum(num_types_all >= 3) / length(num_types_all) myRCTDmulti <- readRDS('../Results2/RCTDmulti_visium.rds') num_types_all <- unlist(lapply(myRCTDmulti@results, function(x) length(x$conf_list))) p_df <- as.data.frame(num_types_all) p4 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels') sum(num_types_all >= 3) / length(num_types_all) ggarrange(p2,p1,p3,p4)
weights <- readRDS('../../DWLS-master/ref/DWLS-weights.RDS') common_cell_types = c("Astrocytes", "Bergmann", "Endothelial", "Fibroblast", "Golgi", "Granule", "MLI1", "MLI2", "Oligodendrocytes", "Polydendrocytes", "Purkinje", "UBCs") class_df <- data.frame(colnames(weights), row.names = colnames(weights)); colnames(class_df)[1] = "class" class_df["Bergmann","class"] = "Astrocytes" class_df["Fibroblast","class"] = "Endothelial" class_df["MLI2","class"] = "MLI1" class_df["Macrophages","class"] = "Microglia" class_df["Polydendrocytes","class"] = "Oligodendrocytes" meta_data <- readRDS('../../DWLS-master/ref/MetaData/meta_data.RDS') beads <- readRDS('../../DWLS-master/ref/beads.rds') meta_df <- meta_data$meta_df keep_barc <- colnames(beads)[(meta_df$first_UMI == 0 & meta_df$second_type %in% common_cell_types) | (meta_df$first_UMI == 1000 & meta_df$first_type %in% common_cell_types)] pred <- colnames(weights)[apply(weights,1,which.max)] names(pred) <- keep_barc first_barc = keep_barc[meta_df[keep_barc,]$first_UMI == 1000] sec_barc = keep_barc[meta_df[keep_barc,]$first_UMI == 0] res_df <- cbind(c(as.character(meta_df[first_barc,'first_type']), as.character(meta_df[sec_barc,'second_type'])), c(pred[first_barc],pred[sec_barc])) colnames(res_df) <- c('true','pred') res_df <- as.data.frame(res_df) res_df$y = 1 plot_df <- aggregate(y ~ true + pred, res_df, length) plot_df <- plot_df[plot_df$true %in% common_cell_types,] p_mat <- dcast(plot_df, formula = true ~ pred) rownames(p_mat) <- p_mat$true p_mat$true <- NULL p_mat[is.na(p_mat)] <- 0 p1 <- gen_plot(as.matrix(p_mat)) + theme(legend.position = 'top') accuracy = sum(plot_df[as.character(plot_df$true) == as.character(plot_df$pred),'y']) / sum(plot_df$y) accuracy_class = (sum(plot_df[class_df[as.character(plot_df$true),'class'] == class_df[as.character(plot_df$pred),'class'],'y'])) / sum(plot_df$y) weights <- readRDS('../../DWLS-master/DWLS-weights.RDS') meta_data <- readRDS('../../DWLS-master/MetaData/meta_data.RDS') beads <- readRDS('../../DWLS-master/beads.rds') meta_df <- meta_data$meta_df keep_barc <- colnames(beads)[(meta_df$first_UMI == 0 | meta_df$first_UMI == 1000) & 1:25740 %% 30 <= 2] pred <- colnames(weights)[apply(weights,1,which.max)] names(pred) <- keep_barc first_barc = keep_barc[meta_df[keep_barc,]$first_UMI == 1000] sec_barc = keep_barc[meta_df[keep_barc,]$first_UMI == 0] res_df <- cbind(c(as.character(meta_df[first_barc,'first_type']), as.character(meta_df[sec_barc,'second_type'])), c(pred[first_barc],pred[sec_barc])) colnames(res_df) <- c('true','pred') res_df <- as.data.frame(res_df) res_df$y = 1 plot_df <- aggregate(y ~ true + pred, res_df, length) p_mat <- dcast(plot_df, formula = true ~ pred) rownames(p_mat) <- p_mat$true p_mat$true <- NULL p_mat[is.na(p_mat)] <- 0 p2 <- gen_plot(as.matrix(p_mat)) + theme(legend.position = 'top') accuracy = sum(plot_df[as.character(plot_df$true) == as.character(plot_df$pred),'y']) / sum(plot_df$y) accuracy_class = (sum(plot_df[class_df[as.character(plot_df$true),'class'] == class_df[as.character(plot_df$pred),'class'],'y'])) / sum(plot_df$y) ggarrange(p1,p2,common.legend = T)
myRCTDsc <- readRDS('../../myRCTD_cer_viz.rds') my_pal = pals::brewer.blues(20)[2:20] my_mod <- function(p) { p + scale_x_continuous(breaks = c(1000,3000,5000), limits = c(900,5600)) + scale_y_continuous(breaks = c(1000,3000,5000), limits = c(1000,4900))+ theme(legend.position="top") + geom_segment(aes(x = 1300, y = 1700, xend = 1684.6, yend = 1700), color = "black")+ theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()) } results_df <- myRCTDsc@results$results_df weights_doublet <- myRCTDsc@results$weights_doublet plots <- list() puck <- myRCTDsc@spatialRNA for(i in 1:length(myRCTDsc@cell_type_info$info[[2]])) { cell_type = myRCTDsc@cell_type_info$info[[2]][i] cur_range <- c(0,1) all_weights <- weights_doublet[results_df$spot_class == "doublet_certain" & results_df$second_type == cell_type,2, drop=FALSE] all_weights <- rbind(all_weights, weights_doublet[!(results_df$spot_class == "reject") & results_df$first_type == cell_type,1,drop=FALSE]) all_weights_vec <- as.vector(all_weights); names(all_weights_vec) <- rownames(all_weights) plots[[i]] <- plot_puck_continuous(puck, rownames(all_weights), all_weights_vec, ylimit = cur_range,title=cell_type) plots[[i]] <- my_mod(plots[[i]])+ggplot2::scale_colour_gradientn("Weight", colors = my_pal, limits = cur_range, breaks = cur_range) } ggarrange(plotlist = plots,ncol=4,nrow=5, common.legend = T)
myRCTD <- readRDS('../../data/SpatialRNA/Somato_200306_02/myRCTD_somato_enlarged.rds') my_pal = pals::brewer.blues(20)[2:20] my_mod <- function(p) { p + scale_x_continuous(breaks = c(1000,3000,5000), limits = c(2300,5600)) + scale_y_continuous(breaks = c(1000,3000,5000), limits = c(2200,5400))+ theme(legend.position="top") + geom_segment(aes(x = 2400, y = 2400, xend = 2784.6, yend = 2400), color = "black")+ theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()) } results_df <- myRCTD@results$results_df weights_doublet <- myRCTD@results$weights_doublet plots <- list() puck <- myRCTD@spatialRNA for(i in 1:length(myRCTD@cell_type_info$info[[2]])) { cell_type = myRCTD@cell_type_info$info[[2]][i] cur_range <- c(0,1) all_weights <- weights_doublet[results_df$spot_class == "doublet_certain" & results_df$second_type == cell_type,2, drop=FALSE] all_weights <- rbind(all_weights, weights_doublet[!(results_df$spot_class == "reject") & results_df$first_type == cell_type,1,drop=FALSE]) all_weights_vec <- as.vector(all_weights); names(all_weights_vec) <- rownames(all_weights) plots[[i]] <- plot_puck_continuous(puck, rownames(all_weights), all_weights_vec, ylimit = cur_range,title=cell_type) plots[[i]] <- my_mod(plots[[i]])+ggplot2::scale_colour_gradientn("Weight", colors = my_pal, limits = cur_range, breaks = cur_range) } ggarrange(plotlist = plots,ncol=4,nrow=5, common.legend = T)
my_mod <- function(p) { p + scale_x_continuous(breaks = c(1000,3000,5000), limits = c(2300,5600)) + scale_y_continuous(breaks = c(1000,3000,5000), limits = c(2200,5400))+ theme(legend.position="top") + geom_segment(aes(x = 2400, y = 2400, xend = 2784.6, yend = 2400), color = "black")+ theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()) } cell_type_names <- c(myRCTD@cell_type_info$info[[2]][3:9]) results_df <- myRCTD@results$results_df puck <- myRCTD@spatialRNA barcodes = rownames(results_df[results_df$spot_class != "reject" & puck@nUMI >= 100,]) my_table = puck@coords[barcodes,] my_table$class = results_df[barcodes,]$first_type n_levels = myRCTD@cell_type_info$info[[3]] my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)] names(my_pal) = myRCTD@cell_type_info$info[[2]] my_pal_curr <- my_pal my_pal_curr[cell_type_names[1]] <- "#CC79A7" my_pal_curr[cell_type_names[2]] <- "#E69F00" my_pal_curr[cell_type_names[5]] <- "#56B4E9" my_pal_curr[cell_type_names[4]] <- "#009E73" my_pal_curr[cell_type_names[3]] <- "#F0E442" my_pal_curr[cell_type_names[6]] <- "#0072B2" my_pal_curr[cell_type_names[7]] <- "#D55E00" #my_pal_curr[cell_type_names[8]] <- "#000000" my_table <- my_table[my_table$class %in% cell_type_names,] pres = unique(as.integer(my_table$class)) pres = pres[order(pres)] p1 <- ggplot2::ggplot(my_table, ggplot2::aes(x=x, y=y)) + ggplot2::geom_point(ggplot2::aes(size = .1, shape=19,color=class)) + ggplot2::scale_color_manual("",values = my_pal_curr[pres], breaks = cell_type_names, labels = cell_type_names)+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2))) p1 <- my_mod(p1) ggarrange(p1)
load(file = '../Results2/four_types_viz.RData') map_class <- function(list_my, class_df) { return(as.character(class_df[list_my,'class'])) } class_df <- RCTD@internal_vars$class_df N_pixels <- length(RCTD@results) num_types_all <- unlist(lapply(RCTD@results, function(x) length(x$conf_list))) p_df <- as.data.frame(num_types_all) p3 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels') sum(num_types_all >= 3) / length(num_types_all) #num_types <- unlist(lapply(RCTD@results, function(x) length(unique(map_class(names(x$conf_list),class_df))))) #hist(num_types) #num cell types predicted conf_types_all <- unlist(lapply(RCTD@results, function(x) sum(x$conf_list))) conf_types_class <- unlist(lapply(RCTD@results, function(x) length(unique(map_class(names(which(x$conf_list)),class_df))))) correct_num <- unlist(lapply(1:N_pixels, function(i) sum(unique(map_class(RCTD@results[[i]]$cell_type_list[RCTD@results[[i]]$conf_list],class_df)) %in% map_class(names(which(truth[i,] > 0)),class_df)))) sum(correct_num) / sum(conf_types_class) #overall accuracy plot_df <- numeric(4) for(i in 1:4) plot_df[i] <- mean(correct_num[conf_types_class == i])/i plot_df <- data.frame(cbind(1:4,plot_df)) colnames(plot_df) <- c('n','accuracy') p2 <- ggplot(plot_df, aes(x=n,y=accuracy)) + geom_line() + ylim(0,1) + theme_classic() + xlab('Number of Cell Types per Pixel') + ylab('Percent Pixels Accurate') + theme(legend.title = element_blank(), legend.position = 'top') #for(i in 1:4) # hist(correct_num[conf_types == i]) plot_df <- numeric(4) for(i in 1:4) plot_df[i] <- sum(conf_types_all >= i) / sum(num_types_all >= i) plot_df <- data.frame(cbind(1:4,plot_df)) colnames(plot_df) <- c('n','confidence') p1 <- ggplot(plot_df, aes(x=n,y=confidence)) + geom_line() + ylim(0,1) + theme_classic() + xlab('Number of Cell Types per Pixel') + ylab('Percent Pixels Confident') + theme(legend.title = element_blank(), legend.position = 'top') print(mean(diff)) load(file = '../Results2/three_types_viz.RData') map_class <- function(list_my, class_df) { return(as.character(class_df[list_my,'class'])) } class_df <- RCTD@internal_vars$class_df N_pixels <- length(RCTD@results) num_types_all <- unlist(lapply(RCTD@results, function(x) length(x$conf_list))) p_df <- as.data.frame(num_types_all) p6 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels') sum(num_types_all >= 3) / length(num_types_all) #num_types <- unlist(lapply(RCTD@results, function(x) length(unique(map_class(names(x$conf_list),class_df))))) #hist(num_types) #num cell types predicted conf_types_all <- unlist(lapply(RCTD@results, function(x) sum(x$conf_list))) conf_types_class <- unlist(lapply(RCTD@results, function(x) length(unique(map_class(names(which(x$conf_list)),class_df))))) correct_num <- unlist(lapply(1:N_pixels, function(i) sum(unique(map_class(RCTD@results[[i]]$cell_type_list[RCTD@results[[i]]$conf_list],class_df)) %in% map_class(names(which(truth[i,] > 0)),class_df)))) sum(correct_num) / sum(conf_types_class) #overall accuracy plot_df <- numeric(4) for(i in 1:4) plot_df[i] <- mean(correct_num[conf_types_class == i])/i plot_df <- data.frame(cbind(1:4,plot_df)) colnames(plot_df) <- c('n','accuracy') p5 <- ggplot(plot_df, aes(x=n,y=accuracy)) + geom_line() + ylim(0,1) + theme_classic() + xlab('Number of Cell Types per Pixel') + ylab('Percent Pixels Accurate') + theme(legend.title = element_blank(), legend.position = 'top') #for(i in 1:4) # hist(correct_num[conf_types == i]) plot_df <- numeric(4) for(i in 1:4) plot_df[i] <- sum(conf_types_all >= i) / sum(num_types_all >= i) plot_df <- data.frame(cbind(1:4,plot_df)) colnames(plot_df) <- c('n','confidence') p4 <- ggplot(plot_df, aes(x=n,y=confidence)) + geom_line() + ylim(0,1) + theme_classic() + xlab('Number of Cell Types per Pixel') + ylab('Percent Pixels Confident') + theme(legend.title = element_blank(), legend.position = 'top') print(mean(diff)) ggarrange(p4,p5,p6,p1,p2,p3,nrow = 2,ncol = 3)
load(file = '../Results2/four_types_viz.RData') RCTD <- readRDS(file = '../Results2/four_type_full.rds') transform_class <- function(weights, class_df) { class_weights <- matrix(0, nrow = dim(weights)[1], ncol = dim(weights)[2]) colnames(class_weights) <- colnames(weights) for(type in colnames(weights)) { class_weights[,class_df[type,]] <- class_weights[,class_df[type,]] + weights[,type] } return(class_weights) } normWeights <- sweep(RCTD@results$weights,1,rowSums(RCTD@results$weights),'/') class_weights <- transform_class(normWeights, RCTD@internal_vars$class_df) UMI_beads <- rowSums(truth) normTruth <- sweep(truth,1,UMI_beads,'/') class_truth <- transform_class(normTruth, RCTD@internal_vars$class_df) mean(rowSums(abs(normWeights - normTruth)) / rowSums(abs(normTruth))) class_weights_random <- class_weights[sample(1:dim(class_weights)[1]),] par(mfrow = c(1,2)) #hist(sqrt(rowSums(abs(class_weights - class_truth)^2) / rowSums(abs(class_truth)^2))) #hist(sqrt(rowSums(abs(class_weights_random - class_truth)^2) / rowSums(abs(class_truth)^2))) R2_val <- numeric(length(common_cell_types)); names(R2_val) = common_cell_types plots <- list() i <- 1 cell_classes <- common_cell_types[c(1,3,5,6,7,9,11,12)] cell_class_names <- c('Astrocytes-Bergmann','Endothelial-Fibroblast','Golgi','Granule','MLI','Oligoden.-Polyden.','Purkinje','UBCs') names(cell_class_names) <- cell_classes for(type in cell_classes) { plot_df <-as.data.frame(cbind(class_weights[,type],class_truth[,type])) plots[[i]] <- ggplot(plot_df, aes(V2,V1)) + geom_point() + xlim(0,1) + ylim(0,1) + theme_classic() + xlab('True Proportion') + ylab('Predicted Proportion') + ggplot2::ggtitle(cell_class_names[type]) i <- i + 1 R2_val[type] <- cor(plot_df$V1,plot_df$V2)^2 } ggarrange(plotlist = plots,ncol=2,nrow=4)
load(file = '../Results2/three_types_viz.RData') RCTD <- readRDS(file = '../Results2/three_type_full.rds') transform_class <- function(weights, class_df) { class_weights <- matrix(0, nrow = dim(weights)[1], ncol = dim(weights)[2]) colnames(class_weights) <- colnames(weights) for(type in colnames(weights)) { class_weights[,class_df[type,]] <- class_weights[,class_df[type,]] + weights[,type] } return(class_weights) } normWeights <- sweep(RCTD@results$weights,1,rowSums(RCTD@results$weights),'/') class_weights <- transform_class(normWeights, RCTD@internal_vars$class_df) UMI_beads <- rowSums(truth) normTruth <- sweep(truth,1,UMI_beads,'/') class_truth <- transform_class(normTruth, RCTD@internal_vars$class_df) mean(rowSums(abs(normWeights - normTruth)) / rowSums(abs(normTruth))) class_weights_random <- class_weights[sample(1:dim(class_weights)[1]),] par(mfrow = c(1,2)) #hist(sqrt(rowSums(abs(class_weights - class_truth)^2) / rowSums(abs(class_truth)^2))) #hist(sqrt(rowSums(abs(class_weights_random - class_truth)^2) / rowSums(abs(class_truth)^2))) R2_val <- numeric(length(common_cell_types)); names(R2_val) = common_cell_types plots <- list() i <- 1 cell_classes <- common_cell_types[c(1,3,5,6,7,9,11,12)] cell_class_names <- c('Astrocytes-Bergmann','Endothelial-Fibroblast','Golgi','Granule','MLI','Oligoden.-Polyden.','Purkinje','UBCs') names(cell_class_names) <- cell_classes for(type in cell_classes) { plot_df <-as.data.frame(cbind(class_weights[,type],class_truth[,type])) plots[[i]] <- ggplot(plot_df, aes(V2,V1)) + geom_point() + xlim(0,1) + ylim(0,1) + theme_classic() + xlab('True Proportion') + ylab('Predicted Proportion') + ggplot2::ggtitle(cell_class_names[type]) i <- i + 1 R2_val[type] <- cor(plot_df$V1,plot_df$V2)^2 } ggarrange(plotlist = plots,ncol=2,nrow=4)
get_marker_data <- function(cell_type_names, cell_type_means, gene_list) { marker_means = cell_type_means[gene_list,] marker_norm = marker_means / rowSums(marker_means) marker_data = as.data.frame(cell_type_names[max.col(marker_means)]) marker_data$max_epr <- apply(cell_type_means[gene_list,],1,max) colnames(marker_data) = c("cell_type",'max_epr') rownames(marker_data) = gene_list marker_data$log_fc <- 0 epsilon <- 1e-9 for(cell_type in unique(marker_data$cell_type)) { cur_genes <- gene_list[marker_data$cell_type == cell_type] other_mean = rowMeans(cell_type_means[cur_genes,cell_type_names != cell_type]) marker_data$log_fc[marker_data$cell_type == cell_type] <- log(epsilon + cell_type_means[cur_genes,cell_type]) - log(epsilon + other_mean) } return(marker_data) } myRCTD <- readRDS('../../myRCTD_cer.rds') cur_cell_types <- c("Bergmann","Granule","Purkinje","MLI2","Oligodendrocytes") puck <- myRCTD@spatialRNA cell_type_info_restr = myRCTD@cell_type_info$info cell_type_info_restr[[1]] = cell_type_info_restr[[1]][,cur_cell_types] cell_type_info_restr[[2]] = cur_cell_types; cell_type_info_restr[[3]] = length(cur_cell_types) de_genes <- get_de_genes(cell_type_info_restr, puck, fc_thresh = 4, expr_thresh = .0001, MIN_OBS = 3) marker_data_de = get_marker_data(cell_type_info_restr[[2]], cell_type_info_restr[[1]], de_genes) saveRDS(marker_data_de, '../../Data/SpatialRNA/NewCerPuck_190926_08/results/marker_data_de_MLI2.RDS')
load(file = '../../Data/SpatialRNA/NewCerPuck_190926_08/results/gathered_results.RData') results_df <- results$results_df weights_doublet <- results$weights_doublet puck <- iv$puck marker_data_de <- readRDS('../../Data/SpatialRNA/NewCerPuck_190926_08/results/marker_data_de_standard.RDS') barcodes = rownames(results_df[results_df$spot_class != "reject" & puck@nUMI >= 100,]) my_table = puck@coords[barcodes,] my_table$class = results_df[barcodes,]$first_type n_levels = iv$cell_type_info[[3]] my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)] names(my_pal) = iv$cell_type_info[[2]] my_pal_curr <- my_pal marker_data_de <- readRDS('../../Data/SpatialRNA/NewCerPuck_190926_08/results/marker_data_de_MLI2.RDS') my_pal = pals::brewer.blues(20)[2:20] my_mod <- function(p) { p + scale_x_continuous(breaks = c(1000,3000,5000), limits = c(900,5600)) + scale_y_continuous(breaks = c(1000,3000,5000), limits = c(1000,4900))+ theme(legend.position="top") + geom_segment(aes(x = 1300, y = 1700, xend = 1684.6, yend = 1700), color = "black")+ theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()) } cell_type = "Granule" MULT <- 500 cur_range <- c(0,MULT*0.03) gran_genes <- intersect(rownames(marker_data_de[marker_data_de$cell_type == cell_type,]), rownames(puck@counts)) p1 <- plot_puck_continuous(puck,colnames(puck@counts)[puck@nUMI >= 200],MULT*colSums(puck@counts[gran_genes,])/puck@nUMI, ylimit = cur_range) p1 <- my_mod(p1) +ggplot2::scale_colour_gradientn(paste(cell_type,"Markers"), colors = my_pal, limits = cur_range, breaks = cur_range) cur_range <- c(0,1) all_weights <- weights_doublet[results_df$spot_class == "doublet_certain" & results_df$second_type == cell_type,2, drop=FALSE] all_weights <- rbind(all_weights, weights_doublet[!(results_df$spot_class == "reject") & results_df$first_type == cell_type,1,drop=FALSE]) all_weights_vec <- as.vector(all_weights); names(all_weights_vec) <- rownames(all_weights) p2 <- plot_puck_continuous(puck, rownames(all_weights), all_weights_vec, ylimit = cur_range) p2 <- my_mod(p2)+ggplot2::scale_colour_gradientn(paste(cell_type,"Weight"), colors = my_pal, limits = cur_range, breaks = cur_range) cell_type = "Oligodendrocytes" cur_range <- c(0,MULT*0.05) gran_genes <- intersect(rownames(marker_data_de[marker_data_de$cell_type == cell_type,]), rownames(puck@counts)) p3 <- plot_puck_continuous(puck,colnames(puck@counts)[puck@nUMI >= 200], MULT*colSums(puck@counts[gran_genes,])/puck@nUMI, ylimit = cur_range) p3 <- my_mod(p3)+ggplot2::scale_colour_gradientn(paste("Oligo","Markers"), colors = my_pal, limits = cur_range, breaks = cur_range) cur_range <- c(0,1) all_weights <- weights_doublet[results_df$spot_class == "doublet_certain" & results_df$second_type == cell_type,2, drop=FALSE] all_weights <- rbind(all_weights, weights_doublet[!(results_df$spot_class == "reject") & results_df$first_type == cell_type,1,drop=FALSE]) all_weights_vec <- as.vector(all_weights); names(all_weights_vec) <- rownames(all_weights) p4 <- plot_puck_continuous(puck, rownames(all_weights), all_weights_vec, ylimit = cur_range) p4 <- my_mod(p4)+ggplot2::scale_colour_gradientn(paste("Oligo","Weight"), colors = my_pal, limits = cur_range, breaks = cur_range) cell_type = "MLI2" cur_range <- c(0,MULT*0.005) gran_genes <- intersect(rownames(marker_data_de[marker_data_de$cell_type == cell_type,]), rownames(puck@counts)) p5 <- plot_puck_continuous(puck,colnames(puck@counts)[puck@nUMI >= 200], MULT*colSums(puck@counts[gran_genes,])/puck@nUMI, ylimit = cur_range) p5 <- my_mod(p5)+ggplot2::scale_colour_gradientn(paste(cell_type,"Markers"), colors = my_pal, limits = cur_range, breaks = cur_range) cur_range <- c(0,1) all_weights <- weights_doublet[results_df$spot_class == "doublet_certain" & results_df$second_type == cell_type,2, drop=FALSE] all_weights <- rbind(all_weights, weights_doublet[!(results_df$spot_class == "reject") & results_df$first_type == cell_type,1,drop=FALSE]) all_weights_vec <- as.vector(all_weights); names(all_weights_vec) <- rownames(all_weights) p6 <- plot_puck_continuous(puck, rownames(all_weights), all_weights_vec, ylimit = cur_range) p6 <- my_mod(p6)+ggplot2::scale_colour_gradientn(paste(cell_type,"Weight"), colors = my_pal, limits = cur_range, breaks = cur_range) ggarrange(p1,p2,p3,p4,p5,p6,nrow=3,ncol=2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.