knitr::opts_chunk$set(echo = F, warning = F, message = F, cache = T, results = 'hide')
library(RCTD)
library(Matrix)
library(ggplot2)
library(ggpubr)
library(gridExtra)
library(reshape2)
library(readr)
library(Seurat)

Convergence Properties of RCTD

load(file = '../Results2/revisions3-1-1_plot.RData')
p_df <- as.data.frame(l1_errors)
p1 <- ggplot(p_df, aes(x=l1_errors)) + geom_density() + theme_classic() + xlab('L1 Norm of Difference in Weight Vector')+ylab('Density of Pixels') + scale_x_continuous(breaks = c(0,0.002,0.004), limits = c(0,0.004)) 
p3 <- ggplot(plot_df, aes(x=log_eps, y = value, colour = barcode)) + geom_line() + theme_classic() + theme(legend.position="none") + ylim(c(0,1)) + xlab('Log Epsilon') + ylab('Estimated Granule Proportion')
p_df <- as.data.frame(as.vector(l1e_mat))
colnames(p_df) <- 'x'
p2 <- ggplot(p_df, aes(x=x)) + geom_density() + theme_classic() + xlab('L1 Norm of Difference in Weight Vector')+ylab('Density of Pixels') + scale_x_continuous(breaks = c(0,2e-6,4e-6), limits = c(0,4e-6)) 
ggarrange(p1,p3,p2, nrow = 2, ncol = 2)

Performance of RCTD on Cell Types that do not appear in the Spatial Data

load('../../data/SpatialRNA/Puck_Viz/results/gathered_results.RData')
results_df <- results$results_df
num_calls <- table(unlist(list(results_df$first_type[results_df$spot_class != 'reject'],results_df$second_type[results_df$spot_class == 'doublet_certain'])))
not_pres <- names(num_calls)[c(3,4,6,8,11,12,13)]
total_miss <- sum(num_calls[not_pres])
total_hit <- sum(num_calls[names(num_calls)[! (names(num_calls) %in% not_pres)]])
plot_df <- cbind(c(total_hit, total_miss), c('in', 'out'))
colnames(plot_df) <- c('count', 'class')
plot_df <- as.data.frame(plot_df)
plot_df$count <- c(total_hit,total_miss)
plot_df$class <- as.character(plot_df$class)
plot_df$class[1] <- 'Cell Types Present' 
plot_df$class[2] <- 'Cell Types Absent' 
p2 <- ggplot(plot_df, aes(x=class, y=count)) + geom_bar(stat="identity") + theme_classic() + ylab('Number of Predictions') + xlab('')
of_int <- num_calls[not_pres]
of_int['In'] <- total_hit / 12
plot_df <- as.data.frame(cbind(names(of_int), of_int))
plot_df$of_int <- of_int
colnames(plot_df) <- c('type', 'value')
plot_df$type <- as.character(plot_df$type)
plot_df['In','type'] <- 'Average Present Type'
p1 <- ggplot(plot_df, aes(x=type,y=value)) + geom_bar(stat="identity") + theme_classic() + ylab('Number of Predictions') + xlab('Cell Type') + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggarrange(p2,p1)

Performance of RCTD on Cell Types that do not appear in the Reference

load(file = '../Results2/out_ref_0.RData')

transform_plot <- function(plot_df) {
  colnames(plot_df) <- c('Reference', 'Prediction', 'value')
  targ_lev <- levels(plot_df$Reference)
  prev_lev <- levels(plot_df$Prediction)
  plot_df$Prediction <- factor(plot_df$Prediction,c(prev_lev[prev_lev %in% targ_lev], prev_lev[! prev_lev %in% targ_lev]))
  plot_df$diag = as.character(plot_df$Prediction) == as.character(plot_df$Reference)
  plot_df$diag[!plot_df$diag] <- NA
  p1 <- ggplot(plot_df, aes(Reference, Prediction, fill= value)) +  geom_tile() +theme_classic() +scale_fill_gradientn(colors = pals::brewer.blues(20)[2:20], limits= c(0,1),name = "Classification Proportion")+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab('True Cell Type')+ ylab('Predicted Cell Type') + geom_tile(data = plot_df, aes(color = diag), size = 0.7) +
    scale_color_manual(guide = FALSE, values = c('TRUE' = "#D55E00"))
  return(p1)
}

gen_plot <- function(classify_mat) {
  classify_mat[is.na(classify_mat)] <- 0
  plot_df <- melt(sweep(classify_mat, 1, rowSums(classify_mat),'/'))
  p1 <- transform_plot(plot_df)
  return(p1)
}
p1 <- gen_plot(classify_mat)
load(file = '../Results2/out_ref_1.RData')
p2 <- gen_plot(classify_mat)
load(file = '../Results2/out_ref_2.RData')
p3 <- gen_plot(classify_mat)
load(file = '..//Results2/out_ref_3.RData')
p4 <- gen_plot(classify_mat)
load(file = '../Results2/out_ref_4.RData')
p5 <- gen_plot(classify_mat)
ggarrange(p1,p2,p3,p4,p5,nrow = 3, ncol = 2, common.legend = T)

Confidence of RCTD

load(file = '../Results2/confidence_class_remove.RData')
library(plyr)
metadir <- "../../data/SpatialRNA/Puck_Viz/MetaData"
meta_data <- readRDS(file.path(metadir,"meta_data.RDS"))
keep_barc <- colnames(myRCTD@spatialRNA@counts)[1:25740 %% 30 <= 2]
meta_df <- meta_data$meta_df
meta_df_sm <- meta_df[keep_barc,]
meta_df_sm <- do.call("rbind", replicate(10, meta_df_sm, simplify = FALSE))
rownames(meta_df_sm) <- as.character(1:25740)
meta_df_sm$UMI_tot <- ceiling(1:25740 / 2574)* 100 #first UMI is no longer accurate, only proportional
cur_types <- myRCTD@cell_type_info$info[[2]][c(1,2,5,7,14,15,16,17)]
my_barc <- (meta_df_sm$first_UMI == 1000 & meta_df_sm$first_type %in% cur_types) | (meta_df_sm$first_UMI == 0 & meta_df_sm$second_type %in% cur_types)
singl_df <- aggregate(myRCTD@results$results_df$spot_class[my_barc] != 'reject', list(floor(myRCTD@spatialRNA@nUMI/100 )[my_barc]), mean)
diff <- singl_df$x - colMeans(conf_df_all_types_alt[cur_types,])/30
plot_df <- as.data.frame(cbind(c(singl_df$x,colMeans(conf_df_all_types_alt[cur_types,])/30),c(rep(1,10),rep(2,10)),rep((1:10)*100,2)))
colnames(plot_df) <- c('perf','class','UMI')
plot_df$class <- factor(plot_df$class)
plot_df$class <- mapvalues(plot_df$class,c(1,2),c('Regular','Class Removed'))
p1 <- ggplot(plot_df, aes(x=UMI,y=perf, group = class, colour = class)) + geom_line() + xlim(0,1000) + ylim(0,1) + theme_classic() + xlab('UMI Counts per Pixel') + ylab('Percent Pixels Confident') + theme(legend.title = element_blank(), legend.position = 'top')
print(mean(diff))

results_df <- myRCTD@results$results_df
first_pred <- myRCTD@internal_vars$class_df[results_df$first_type,] == myRCTD@internal_vars$class_df[as.character(meta_df_sm$first_type),] |
  myRCTD@internal_vars$class_df[results_df$first_type,] == myRCTD@internal_vars$class_df[as.character(meta_df_sm$second_type),]
first_conf <- results_df$spot_class != 'reject'
accuracy_first <- aggregate(first_pred[first_conf], list(myRCTD@spatialRNA@nUMI[first_conf]), mean)

second_pred <- myRCTD@internal_vars$class_df[results_df$second_type,] == myRCTD@internal_vars$class_df[as.character(meta_df_sm$first_type),] |
  myRCTD@internal_vars$class_df[results_df$second_type,] == myRCTD@internal_vars$class_df[as.character(meta_df_sm$second_type),]
second_conf <- results_df$spot_class == 'doublet_certain'
accuracy_second <- aggregate(second_pred[second_conf], list(myRCTD@spatialRNA@nUMI[second_conf]), mean)

accuracy_tot <- (aggregate(first_pred[first_conf], list(myRCTD@spatialRNA@nUMI[first_conf]), sum)$x +
  aggregate(second_pred[second_conf], list(myRCTD@spatialRNA@nUMI[second_conf]), sum)$x) /
  (aggregate(first_pred[first_conf], list(myRCTD@spatialRNA@nUMI[first_conf]), length)$x +
     aggregate(second_pred[second_conf], list(myRCTD@spatialRNA@nUMI[second_conf]), length)$x)
names(accuracy_tot) <- accuracy_first$Group.1
plot_df <- melt(accuracy_tot)
plot_df$UMI <- as.numeric(rownames(plot_df))
p3<- ggplot(plot_df, aes(x=UMI,y=value)) + geom_line() + ylim(0,1) + theme_classic() + xlab('UMI Counts per Pixel') + ylab('Percent Pixels Accurate') + theme(legend.position = 'top') + xlim(0,1000) + ylim(0,1) + labs(colour = "Number of Cell Types per Pixel")


load(file = '../Results2/confidence_cer.RData')
all_df <- rbind(singl_df,doub_df)
all_df$class <- c(rep(1,dim(singl_df)[1]),rep(2,dim(doub_df)[1]))
all_df$class <- factor(all_df$class)
p2<- ggplot(all_df, aes(x=Group.1*100,y=x, group = class, colour = class)) + geom_line() + ylim(0,1) + theme_classic() + xlab('UMI Counts per Pixel') + ylab('Percent Pixels Confident') + theme(legend.position = 'top') + xlim(0,1000) + ylim(0,1) + labs(colour = "Number of Cell Types per Pixel")

load(file = '../Results2/umi_sim.RData')
keep_barc <- colnames(myRCTD@spatialRNA@counts)[1:25740 %% 30 <= 2]
metadir <- "../../data/SpatialRNA/Puck_Viz/MetaData"
meta_data <- readRDS(file.path(metadir,"meta_data.RDS"))
meta_df <- meta_data$meta_df
meta_df_sm <- meta_df[keep_barc,]
meta_df_sm <- do.call("rbind", replicate(10, meta_df_sm, simplify = FALSE))
rownames(meta_df_sm) <- as.character(1:25740)
meta_df_sm$UMI_tot <- ceiling(1:25740 / 2574)* 100 #first UMI is no longer accurate, only proportional
singl_df <- aggregate(myRCTD@results$results_df$spot_class != 'reject', list(floor(myRCTD@spatialRNA@nUMI/100 )), mean)
doublets = myRCTD@results$results_df$spot_class %in% c('doublet_certain','doublet_uncertain')
doub_df <- aggregate(myRCTD@results$results_df$spot_class[doublets] == 'doublet_certain', list(floor(myRCTD@spatialRNA@nUMI[doublets]/100 )), mean)
all_df <- rbind(singl_df,doub_df)
all_df$class <- c(rep(1,dim(singl_df)[1]),rep(2,dim(doub_df)[1]))
all_df$class <- factor(all_df$class)
p4<- ggplot(all_df, aes(x=Group.1*100,y=x, group = class, colour = class)) + geom_line() + ylim(0,1) + theme_classic() + xlab('UMI Counts per Pixel') + ylab('Percent Pixels Confident') + theme(legend.position = 'top') + xlim(0,1000) + ylim(0,1) + labs(colour = "Number of Cell Types per Pixel")

ggarrange(p1,p2,p3,p4, nrow = 2, ncol = 2)

RCTD Cell Class Classification

my_barc <- rownames(res_df)[res_df$meta_df.first_type == 'Bergmann' & res_df$meta_df.second_type == "Granule"]
err_barc <- my_barc[results_df[my_barc,'second_type'] %in% c('Endothelial', 'Fibroblast')]
all_classifications <- table(results_df[250 <= meta_df$first_UMI & meta_df$first_UMI <= 750 & meta_df$first_type == 'Bergmann' & meta_df$second_type == 'Granule' & results_df$spot_class == 'doublet_certain','second_type']) +
  table(results_df[250 <= meta_df$first_UMI & meta_df$first_UMI <= 750 & meta_df$first_type == 'Bergmann' & meta_df$second_type == 'Granule' & results_df$spot_class != 'reject','first_type'])
plot_df <- as.data.frame(all_classifications)
ggplot(plot_df, aes(Var1,Freq)) + geom_bar(stat="identity") + theme_classic() + ylab('Number of Predictions') + xlab('Cell Type') + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Transcriptionally Similar Cell Type Doublets

###PLOT TRANSCRIPTIONALLY SIMILAR DOUBLET RATE###
load('../../data/SpatialRNA/Puck_Viz/results/gathered_results.RData')
results_df <- results$results_df
metadir <- "../../data/SpatialRNA/Puck_Viz/MetaData"
meta_data <- readRDS(file.path(metadir,"meta_data.RDS"))
meta_df <- meta_data$meta_df
sim_barc <- (meta_df$first_type == "Astrocytes" & meta_df$second_type == 'Bergmann') |
  (meta_df$first_type == "Endothelial" & meta_df$second_type == 'Fibroblast') |
  (meta_df$first_type == "MLI1" & meta_df$second_type == 'MLI2') |
  (meta_df$first_type == "Oligodendrocytes" & meta_df$second_type == 'Polydendrocytes')
UMI_list <- meta_data$UMI_list
N_UMI_cond <- (length(UMI_list) + 1)/2
spot_class_df <- as.data.frame(Matrix(0,nrow = N_UMI_cond, ncol = 2))
rownames(spot_class_df) <- UMI_list[1:N_UMI_cond]; colnames(spot_class_df) <- c('singlets','doublets')
barcodes <- rownames(results_df)
for(nUMI in UMI_list) {
  vec = table(results_df[barcodes[meta_df$first_UMI == nUMI],]$spot_class)
  spot_class_df[as.character(nUMI), 'singlets'] = vec["singlet"]
  spot_class_df[as.character(nUMI), 'doublets'] = sum(vec) - vec["singlet"] - vec["reject"]
  vec = table(results_df[barcodes[meta_df$first_UMI == nUMI & sim_barc],]$spot_class)
  spot_class_df[as.character(nUMI), 'singlets_sim'] = vec["singlet"]
  spot_class_df[as.character(nUMI), 'doublets_sim'] = sum(vec) - vec["singlet"] - vec["reject"]
}
frac_error <- spot_class_df["500",'singlets_sim'] / spot_class_df["500",'singlets']
spot_class_df[1:6,] <- spot_class_df[1:6,] + spot_class_df[13:8,]
spot_class_df <- spot_class_df[1:7,c(3,4)]
spot_class_df <- sweep(spot_class_df, 1, rowSums(spot_class_df),'/')
spot_class_df[,"nUMI"] <- UMI_list[1:N_UMI_cond]
df <- melt(spot_class_df,  id.vars = 'nUMI', variable.name = 'series')
df$std <- sqrt(df$value*(1 - df$value)/(240)[1])
df$std[df$nUMI == 500] <- df$std[df$nUMI == 500]*sqrt(2)
doublet_df <- df
my_pal = pals::coolwarm(20)
doublet_df$prop <- doublet_df$nUMI / 1000
plot_df = doublet_df[doublet_df$series == "doublets_sim",]

p1 <- ggplot(plot_df, aes(prop,value)) + geom_line()  +
  geom_errorbar(aes(ymin=value-1.96*std, ymax=value+1.96*std), width=.02,position=position_dodge(.001)) + theme_classic() +
  geom_hline(yintercept=0, linetype="dashed", color = "grey", size=0.5) + geom_hline(yintercept=1, linetype="dashed", color = "grey", size=0.5) + xlab("UMI Proportion of Minority Cell Type") + ylab("Doublet Classification Rate") + scale_color_manual(values=c(my_pal[1], my_pal[20]),labels = c("Doublet"), name = "Class") + scale_y_continuous(breaks = c(0,0.5,1), limits = c(0,1))+ scale_x_continuous(breaks = c(0,0.25,0.5), limits = c(-.03,0.53))
p1

Agreement of Single-Cell and Single-Nucleus Predictions

myRCTDsn <- readRDS('../../myRCTD_cer.rds')
#percent of genes small
sum(rowMeans(myRCTDsn@cell_type_info$renorm[[1]][myRCTDsn@internal_vars$gene_list_reg,]) < 1e-3) / length(myRCTDsn@internal_vars$gene_list_reg)
myRCTDsc <- readRDS('../../myRCTD_cer_viz.rds')
success <- 0
total <- 0
results_df_sc <- myRCTDsc@results$results_df
results_df_sn <- myRCTDsn@results$results_df
for(i in 1:dim(myRCTDsn@results$results_df)[1]) {
  if(results_df_sc$spot_class[i] == 'reject' | results_df_sc$first_class[i]) {
    if(results_df_sc$spot_class[i] == 'doublet_certain' & !results_df_sc$second_class[i])
      sc_types <- c(as.character(results_df_sc$second_type[i]))
    else
      sc_types <- c()
  } else if(results_df_sc$spot_class[i] == 'doublet_certain' & !(results_df_sc$second_class[i])) {
    sc_types <- c(as.character(results_df_sc$first_type[i]), as.character(results_df_sc$second_type[i]))
  } else
    sc_types <- c(as.character(results_df_sc$first_type[i]))
  if(results_df_sn$spot_class[i] == 'reject' | results_df_sn$first_class[i]) {
    if(results_df_sn$spot_class[i] == 'doublet_certain' & !results_df_sn$second_class[i])
      sn_types <- c(as.character(results_df_sn$second_type[i]))
    else
      sn_types <- c()
  } else if(results_df_sn$spot_class[i] == 'doublet_certain' & !(results_df_sn$second_class[i])) {
    sn_types <- c(as.character(results_df_sn$first_type[i]), as.character(results_df_sn$second_type[i]))
  } else
    sn_types <- c(as.character(results_df_sn$first_type[i]))
  success <- success + length(intersect(sc_types,sn_types))
  total <- total + min(length(sc_types), length(sn_types))
}
print(success/total)


sing_barc <- results_df_sn$spot_class == 'singlet' & results_df_sc$spot_class == 'singlet' & !results_df_sn$first_class & !results_df_sc$first_class
co_df <- data.frame(results_df_sn$first_type[sing_barc], results_df_sc$first_type[sing_barc], 1)
colnames(co_df) <- c('x','y','z')
plot_df <- aggregate(. ~x+y, co_df ,sum)
tot_df <- aggregate(. ~ y, plot_df, sum)
rownames(tot_df) <- tot_df$y
plot_df$prop <- plot_df$z / tot_df[plot_df$y,]$z
colnames(plot_df) <- c('Reference','Prediction','skip','value')
p_df <- plot_df[!(plot_df[,2] %in% c('Choroid','Microglia')),c(2,1,4)]
p_mat <- dcast(p_df, formula = Prediction ~ Reference)
rownames(p_mat) <- p_mat$Prediction
p_mat$Prediction <- NULL
p_mat[is.na(p_mat)] <- 0
p1 <- gen_plot(as.matrix(p_mat)) + theme(legend.position = 'top') + xlab('Predicted Cell Type Single-Cell') + ylab('Predicted Cell Type Single-Nucleus')
ggarrange(p1)

Predicted Number of Cell Types per Pixel

##PLOT###
myRCTDmulti <- readRDS('../Results2/RCTDmulti_cer.rds')
num_types_all <- unlist(lapply(myRCTDmulti@results, function(x) length(x$conf_list)))
p_df <- as.data.frame(num_types_all)
p2 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels')
sum(num_types_all >= 3) / length(num_types_all)

myRCTDmulti <- readRDS('../Results2/RCTDmulti_hc.rds')
num_types_all <- unlist(lapply(myRCTDmulti@results, function(x) length(x$conf_list)))
p_df <- as.data.frame(num_types_all)
p1 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels')
sum(num_types_all >= 3) / length(num_types_all)

myRCTDmulti <- readRDS('../Results2/RCTDmulti_som.rds')
num_types_all <- unlist(lapply(myRCTDmulti@results, function(x) length(x$conf_list)))
p_df <- as.data.frame(num_types_all)
p3 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels')
sum(num_types_all >= 3) / length(num_types_all)

myRCTDmulti <- readRDS('../Results2/RCTDmulti_visium.rds')
num_types_all <- unlist(lapply(myRCTDmulti@results, function(x) length(x$conf_list)))
p_df <- as.data.frame(num_types_all)
p4 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels')
sum(num_types_all >= 3) / length(num_types_all)
ggarrange(p2,p1,p3,p4)

Performance of DWLS

weights <- readRDS('../../DWLS-master/ref/DWLS-weights.RDS')
common_cell_types = c("Astrocytes", "Bergmann", "Endothelial", "Fibroblast", "Golgi", "Granule", "MLI1", "MLI2", "Oligodendrocytes", "Polydendrocytes", "Purkinje", "UBCs")
class_df <- data.frame(colnames(weights), row.names = colnames(weights)); colnames(class_df)[1] = "class"
class_df["Bergmann","class"] = "Astrocytes"
class_df["Fibroblast","class"] = "Endothelial"
class_df["MLI2","class"] = "MLI1"
class_df["Macrophages","class"] = "Microglia"
class_df["Polydendrocytes","class"] = "Oligodendrocytes"
meta_data <- readRDS('../../DWLS-master/ref/MetaData/meta_data.RDS')
beads <- readRDS('../../DWLS-master/ref/beads.rds')
meta_df <- meta_data$meta_df
keep_barc <- colnames(beads)[(meta_df$first_UMI == 0 & meta_df$second_type %in% common_cell_types) |
                               (meta_df$first_UMI == 1000 & meta_df$first_type %in% common_cell_types)]
pred <- colnames(weights)[apply(weights,1,which.max)]
names(pred) <- keep_barc
first_barc = keep_barc[meta_df[keep_barc,]$first_UMI == 1000]
sec_barc = keep_barc[meta_df[keep_barc,]$first_UMI == 0]
res_df <- cbind(c(as.character(meta_df[first_barc,'first_type']), as.character(meta_df[sec_barc,'second_type'])),
                c(pred[first_barc],pred[sec_barc]))
colnames(res_df) <- c('true','pred')
res_df <- as.data.frame(res_df)
res_df$y = 1

plot_df <- aggregate(y ~ true + pred, res_df, length)
plot_df <- plot_df[plot_df$true %in% common_cell_types,]
p_mat <- dcast(plot_df, formula = true ~ pred)
rownames(p_mat) <- p_mat$true
p_mat$true <- NULL
p_mat[is.na(p_mat)] <- 0
p1 <- gen_plot(as.matrix(p_mat)) + theme(legend.position = 'top') 
accuracy = sum(plot_df[as.character(plot_df$true) == as.character(plot_df$pred),'y']) / sum(plot_df$y)
accuracy_class = (sum(plot_df[class_df[as.character(plot_df$true),'class'] == class_df[as.character(plot_df$pred),'class'],'y'])) / sum(plot_df$y)


weights <- readRDS('../../DWLS-master/DWLS-weights.RDS')
meta_data <- readRDS('../../DWLS-master/MetaData/meta_data.RDS')
beads <- readRDS('../../DWLS-master/beads.rds')
meta_df <- meta_data$meta_df
keep_barc <- colnames(beads)[(meta_df$first_UMI == 0 | meta_df$first_UMI == 1000) & 1:25740 %% 30 <= 2]
pred <- colnames(weights)[apply(weights,1,which.max)]
names(pred) <- keep_barc
first_barc = keep_barc[meta_df[keep_barc,]$first_UMI == 1000]
sec_barc = keep_barc[meta_df[keep_barc,]$first_UMI == 0]
res_df <- cbind(c(as.character(meta_df[first_barc,'first_type']), as.character(meta_df[sec_barc,'second_type'])),
                c(pred[first_barc],pred[sec_barc]))
colnames(res_df) <- c('true','pred')
res_df <- as.data.frame(res_df)
res_df$y = 1
plot_df <- aggregate(y ~ true + pred, res_df, length)
p_mat <- dcast(plot_df, formula = true ~ pred)
rownames(p_mat) <- p_mat$true
p_mat$true <- NULL
p_mat[is.na(p_mat)] <- 0
p2 <- gen_plot(as.matrix(p_mat)) + theme(legend.position = 'top') 
accuracy = sum(plot_df[as.character(plot_df$true) == as.character(plot_df$pred),'y']) / sum(plot_df$y)
accuracy_class = (sum(plot_df[class_df[as.character(plot_df$true),'class'] == class_df[as.character(plot_df$pred),'class'],'y'])) / sum(plot_df$y)
ggarrange(p1,p2,common.legend = T)

All Cell Types Cerebellum (from Single-Cell)

myRCTDsc <- readRDS('../../myRCTD_cer_viz.rds')
my_pal = pals::brewer.blues(20)[2:20]
my_mod <- function(p) {
  p + scale_x_continuous(breaks = c(1000,3000,5000), limits = c(900,5600)) + scale_y_continuous(breaks = c(1000,3000,5000), limits = c(1000,4900))+ theme(legend.position="top") + geom_segment(aes(x = 1300, y = 1700, xend = 1684.6, yend = 1700), color = "black")+  theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())
}
results_df <- myRCTDsc@results$results_df
weights_doublet <- myRCTDsc@results$weights_doublet
plots <- list()
puck <- myRCTDsc@spatialRNA
for(i in 1:length(myRCTDsc@cell_type_info$info[[2]])) {
  cell_type = myRCTDsc@cell_type_info$info[[2]][i]
  cur_range <- c(0,1)
  all_weights <- weights_doublet[results_df$spot_class == "doublet_certain" & results_df$second_type == cell_type,2, drop=FALSE]
  all_weights <- rbind(all_weights, weights_doublet[!(results_df$spot_class == "reject") & results_df$first_type == cell_type,1,drop=FALSE])
  all_weights_vec <- as.vector(all_weights); names(all_weights_vec) <- rownames(all_weights)
  plots[[i]] <- plot_puck_continuous(puck, rownames(all_weights), all_weights_vec, ylimit = cur_range,title=cell_type)
  plots[[i]] <- my_mod(plots[[i]])+ggplot2::scale_colour_gradientn("Weight", colors = my_pal, limits = cur_range, breaks = cur_range)
}

ggarrange(plotlist = plots,ncol=4,nrow=5, common.legend = T)

All Somato Cell Types

myRCTD <- readRDS('../../data/SpatialRNA/Somato_200306_02/myRCTD_somato_enlarged.rds')
my_pal = pals::brewer.blues(20)[2:20]
my_mod <- function(p) {
  p + scale_x_continuous(breaks = c(1000,3000,5000), limits = c(2300,5600)) + scale_y_continuous(breaks = c(1000,3000,5000), limits = c(2200,5400))+ theme(legend.position="top") + geom_segment(aes(x = 2400, y = 2400, xend = 2784.6, yend = 2400), color = "black")+  theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())
}
results_df <- myRCTD@results$results_df
weights_doublet <- myRCTD@results$weights_doublet
plots <- list()
puck <- myRCTD@spatialRNA
for(i in 1:length(myRCTD@cell_type_info$info[[2]])) {
  cell_type = myRCTD@cell_type_info$info[[2]][i]
  cur_range <- c(0,1)
  all_weights <- weights_doublet[results_df$spot_class == "doublet_certain" & results_df$second_type == cell_type,2, drop=FALSE]
  all_weights <- rbind(all_weights, weights_doublet[!(results_df$spot_class == "reject") & results_df$first_type == cell_type,1,drop=FALSE])
  all_weights_vec <- as.vector(all_weights); names(all_weights_vec) <- rownames(all_weights)
  plots[[i]] <- plot_puck_continuous(puck, rownames(all_weights), all_weights_vec, ylimit = cur_range,title=cell_type)
  plots[[i]] <- my_mod(plots[[i]])+ggplot2::scale_colour_gradientn("Weight", colors = my_pal, limits = cur_range, breaks = cur_range)
}
ggarrange(plotlist = plots,ncol=4,nrow=5, common.legend = T)

Somatosensory Cortex Cortical Cell Types

my_mod <- function(p) {
  p + scale_x_continuous(breaks = c(1000,3000,5000), limits = c(2300,5600)) + scale_y_continuous(breaks = c(1000,3000,5000), limits = c(2200,5400))+ theme(legend.position="top") + geom_segment(aes(x = 2400, y = 2400, xend = 2784.6, yend = 2400), color = "black")+  theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())
}
cell_type_names <- c(myRCTD@cell_type_info$info[[2]][3:9])
results_df <- myRCTD@results$results_df
puck <- myRCTD@spatialRNA
barcodes = rownames(results_df[results_df$spot_class != "reject" & puck@nUMI >= 100,])
my_table = puck@coords[barcodes,]
my_table$class = results_df[barcodes,]$first_type
n_levels = myRCTD@cell_type_info$info[[3]]
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
names(my_pal) = myRCTD@cell_type_info$info[[2]]
my_pal_curr <- my_pal
my_pal_curr[cell_type_names[1]] <- "#CC79A7"
my_pal_curr[cell_type_names[2]] <- "#E69F00"
my_pal_curr[cell_type_names[5]] <- "#56B4E9"
my_pal_curr[cell_type_names[4]] <- "#009E73"
my_pal_curr[cell_type_names[3]] <- "#F0E442"
my_pal_curr[cell_type_names[6]] <- "#0072B2"
my_pal_curr[cell_type_names[7]] <- "#D55E00"
#my_pal_curr[cell_type_names[8]] <- "#000000"
my_table <- my_table[my_table$class %in% cell_type_names,]
pres = unique(as.integer(my_table$class))
pres = pres[order(pres)]
p1 <- ggplot2::ggplot(my_table, ggplot2::aes(x=x, y=y)) + ggplot2::geom_point(ggplot2::aes(size = .1, shape=19,color=class)) + ggplot2::scale_color_manual("",values = my_pal_curr[pres], breaks = cell_type_names, labels = cell_type_names)+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2)))
p1 <- my_mod(p1)
ggarrange(p1)

Visium Simulation Multi Mode

load(file = '../Results2/four_types_viz.RData')
map_class <- function(list_my, class_df) {
  return(as.character(class_df[list_my,'class']))
}
class_df <- RCTD@internal_vars$class_df
N_pixels <- length(RCTD@results)
num_types_all <- unlist(lapply(RCTD@results, function(x) length(x$conf_list)))
p_df <- as.data.frame(num_types_all)
p3 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels')
sum(num_types_all >= 3) / length(num_types_all)
#num_types <- unlist(lapply(RCTD@results, function(x) length(unique(map_class(names(x$conf_list),class_df)))))
#hist(num_types) #num cell types predicted
conf_types_all <- unlist(lapply(RCTD@results, function(x) sum(x$conf_list)))
conf_types_class <- unlist(lapply(RCTD@results, function(x) length(unique(map_class(names(which(x$conf_list)),class_df)))))
correct_num <- unlist(lapply(1:N_pixels, function(i) sum(unique(map_class(RCTD@results[[i]]$cell_type_list[RCTD@results[[i]]$conf_list],class_df)) %in% map_class(names(which(truth[i,] > 0)),class_df))))
sum(correct_num) / sum(conf_types_class) #overall accuracy
plot_df <- numeric(4)
for(i in 1:4)
  plot_df[i] <- mean(correct_num[conf_types_class == i])/i
plot_df <- data.frame(cbind(1:4,plot_df))
colnames(plot_df) <- c('n','accuracy')
p2 <- ggplot(plot_df, aes(x=n,y=accuracy)) + geom_line() + ylim(0,1) + theme_classic() + xlab('Number of Cell Types per Pixel') + ylab('Percent Pixels Accurate') + theme(legend.title = element_blank(), legend.position = 'top')
#for(i in 1:4)
#  hist(correct_num[conf_types == i])
plot_df <- numeric(4)
for(i in 1:4)
  plot_df[i] <- sum(conf_types_all >= i) / sum(num_types_all >= i)
plot_df <- data.frame(cbind(1:4,plot_df))
colnames(plot_df) <- c('n','confidence')
p1 <- ggplot(plot_df, aes(x=n,y=confidence)) + geom_line() + ylim(0,1) + theme_classic() + xlab('Number of Cell Types per Pixel') + ylab('Percent Pixels Confident') + theme(legend.title = element_blank(), legend.position = 'top')
print(mean(diff))
load(file = '../Results2/three_types_viz.RData')
map_class <- function(list_my, class_df) {
  return(as.character(class_df[list_my,'class']))
}
class_df <- RCTD@internal_vars$class_df
N_pixels <- length(RCTD@results)
num_types_all <- unlist(lapply(RCTD@results, function(x) length(x$conf_list)))
p_df <- as.data.frame(num_types_all)
p6 <- ggplot(p_df, aes(x=num_types_all)) + geom_histogram() + theme_classic() + xlab('Predicted Number of Cell Types per Pixel')+ylab('Number of Pixels')
sum(num_types_all >= 3) / length(num_types_all)
#num_types <- unlist(lapply(RCTD@results, function(x) length(unique(map_class(names(x$conf_list),class_df)))))
#hist(num_types) #num cell types predicted
conf_types_all <- unlist(lapply(RCTD@results, function(x) sum(x$conf_list)))
conf_types_class <- unlist(lapply(RCTD@results, function(x) length(unique(map_class(names(which(x$conf_list)),class_df)))))
correct_num <- unlist(lapply(1:N_pixels, function(i) sum(unique(map_class(RCTD@results[[i]]$cell_type_list[RCTD@results[[i]]$conf_list],class_df)) %in% map_class(names(which(truth[i,] > 0)),class_df))))
sum(correct_num) / sum(conf_types_class) #overall accuracy
plot_df <- numeric(4)
for(i in 1:4)
  plot_df[i] <- mean(correct_num[conf_types_class == i])/i
plot_df <- data.frame(cbind(1:4,plot_df))
colnames(plot_df) <- c('n','accuracy')
p5 <- ggplot(plot_df, aes(x=n,y=accuracy)) + geom_line() + ylim(0,1) + theme_classic() + xlab('Number of Cell Types per Pixel') + ylab('Percent Pixels Accurate') + theme(legend.title = element_blank(), legend.position = 'top')
#for(i in 1:4)
#  hist(correct_num[conf_types == i])
plot_df <- numeric(4)
for(i in 1:4)
  plot_df[i] <- sum(conf_types_all >= i) / sum(num_types_all >= i)
plot_df <- data.frame(cbind(1:4,plot_df))
colnames(plot_df) <- c('n','confidence')
p4 <- ggplot(plot_df, aes(x=n,y=confidence)) + geom_line() + ylim(0,1) + theme_classic() + xlab('Number of Cell Types per Pixel') + ylab('Percent Pixels Confident') + theme(legend.title = element_blank(), legend.position = 'top')
print(mean(diff))
ggarrange(p4,p5,p6,p1,p2,p3,nrow = 2,ncol = 3)

Visium Simulation Full Mode

load(file = '../Results2/four_types_viz.RData')
RCTD <- readRDS(file = '../Results2/four_type_full.rds')
transform_class <- function(weights, class_df) {
  class_weights <- matrix(0, nrow = dim(weights)[1], ncol = dim(weights)[2])
  colnames(class_weights) <- colnames(weights)
  for(type in colnames(weights)) {
    class_weights[,class_df[type,]] <- class_weights[,class_df[type,]] + weights[,type]
  }
  return(class_weights)
}
normWeights <- sweep(RCTD@results$weights,1,rowSums(RCTD@results$weights),'/')
class_weights <- transform_class(normWeights, RCTD@internal_vars$class_df)
UMI_beads <- rowSums(truth)
normTruth <- sweep(truth,1,UMI_beads,'/')
class_truth <- transform_class(normTruth, RCTD@internal_vars$class_df)
mean(rowSums(abs(normWeights - normTruth)) / rowSums(abs(normTruth)))
class_weights_random <- class_weights[sample(1:dim(class_weights)[1]),]
par(mfrow = c(1,2))
#hist(sqrt(rowSums(abs(class_weights - class_truth)^2) / rowSums(abs(class_truth)^2)))
#hist(sqrt(rowSums(abs(class_weights_random - class_truth)^2) / rowSums(abs(class_truth)^2)))
R2_val <- numeric(length(common_cell_types)); names(R2_val) = common_cell_types
plots <- list()
i <- 1
cell_classes <- common_cell_types[c(1,3,5,6,7,9,11,12)]
cell_class_names <- c('Astrocytes-Bergmann','Endothelial-Fibroblast','Golgi','Granule','MLI','Oligoden.-Polyden.','Purkinje','UBCs')
names(cell_class_names) <- cell_classes
for(type in cell_classes) {
  plot_df <-as.data.frame(cbind(class_weights[,type],class_truth[,type]))
  plots[[i]] <- ggplot(plot_df, aes(V2,V1)) + geom_point() + xlim(0,1) + ylim(0,1) + theme_classic() + xlab('True Proportion') + ylab('Predicted Proportion') +
    ggplot2::ggtitle(cell_class_names[type])
  i <- i + 1
  R2_val[type] <- cor(plot_df$V1,plot_df$V2)^2
}
ggarrange(plotlist = plots,ncol=2,nrow=4)
load(file = '../Results2/three_types_viz.RData')
RCTD <- readRDS(file = '../Results2/three_type_full.rds')
transform_class <- function(weights, class_df) {
  class_weights <- matrix(0, nrow = dim(weights)[1], ncol = dim(weights)[2])
  colnames(class_weights) <- colnames(weights)
  for(type in colnames(weights)) {
    class_weights[,class_df[type,]] <- class_weights[,class_df[type,]] + weights[,type]
  }
  return(class_weights)
}
normWeights <- sweep(RCTD@results$weights,1,rowSums(RCTD@results$weights),'/')
class_weights <- transform_class(normWeights, RCTD@internal_vars$class_df)
UMI_beads <- rowSums(truth)
normTruth <- sweep(truth,1,UMI_beads,'/')
class_truth <- transform_class(normTruth, RCTD@internal_vars$class_df)
mean(rowSums(abs(normWeights - normTruth)) / rowSums(abs(normTruth)))
class_weights_random <- class_weights[sample(1:dim(class_weights)[1]),]
par(mfrow = c(1,2))
#hist(sqrt(rowSums(abs(class_weights - class_truth)^2) / rowSums(abs(class_truth)^2)))
#hist(sqrt(rowSums(abs(class_weights_random - class_truth)^2) / rowSums(abs(class_truth)^2)))
R2_val <- numeric(length(common_cell_types)); names(R2_val) = common_cell_types
plots <- list()
i <- 1
cell_classes <- common_cell_types[c(1,3,5,6,7,9,11,12)]
cell_class_names <- c('Astrocytes-Bergmann','Endothelial-Fibroblast','Golgi','Granule','MLI','Oligoden.-Polyden.','Purkinje','UBCs')
names(cell_class_names) <- cell_classes
for(type in cell_classes) {
  plot_df <-as.data.frame(cbind(class_weights[,type],class_truth[,type]))
  plots[[i]] <- ggplot(plot_df, aes(V2,V1)) + geom_point() + xlim(0,1) + ylim(0,1) + theme_classic() + xlab('True Proportion') + ylab('Predicted Proportion') +
    ggplot2::ggtitle(cell_class_names[type])
  i <- i + 1
  R2_val[type] <- cor(plot_df$V1,plot_df$V2)^2
}
ggarrange(plotlist = plots,ncol=2,nrow=4)

Marker genes for MLI2

get_marker_data <- function(cell_type_names, cell_type_means, gene_list) {
  marker_means = cell_type_means[gene_list,]
  marker_norm = marker_means / rowSums(marker_means)
  marker_data = as.data.frame(cell_type_names[max.col(marker_means)])
  marker_data$max_epr <- apply(cell_type_means[gene_list,],1,max)
  colnames(marker_data) = c("cell_type",'max_epr')
  rownames(marker_data) = gene_list
  marker_data$log_fc <- 0
  epsilon <- 1e-9
  for(cell_type in unique(marker_data$cell_type)) {
    cur_genes <- gene_list[marker_data$cell_type == cell_type]
    other_mean = rowMeans(cell_type_means[cur_genes,cell_type_names != cell_type])
    marker_data$log_fc[marker_data$cell_type == cell_type] <- log(epsilon + cell_type_means[cur_genes,cell_type]) - log(epsilon + other_mean)
  }
  return(marker_data)
}

myRCTD <- readRDS('../../myRCTD_cer.rds')
cur_cell_types <- c("Bergmann","Granule","Purkinje","MLI2","Oligodendrocytes")
puck <- myRCTD@spatialRNA
cell_type_info_restr = myRCTD@cell_type_info$info
cell_type_info_restr[[1]] = cell_type_info_restr[[1]][,cur_cell_types]
cell_type_info_restr[[2]] = cur_cell_types; cell_type_info_restr[[3]] = length(cur_cell_types)

de_genes <- get_de_genes(cell_type_info_restr, puck, fc_thresh = 4, expr_thresh = .0001, MIN_OBS = 3)
marker_data_de = get_marker_data(cell_type_info_restr[[2]], cell_type_info_restr[[1]], de_genes)
saveRDS(marker_data_de, '../../Data/SpatialRNA/NewCerPuck_190926_08/results/marker_data_de_MLI2.RDS')
load(file = '../../Data/SpatialRNA/NewCerPuck_190926_08/results/gathered_results.RData')
results_df <- results$results_df
weights_doublet <- results$weights_doublet
puck <- iv$puck
marker_data_de <- readRDS('../../Data/SpatialRNA/NewCerPuck_190926_08/results/marker_data_de_standard.RDS')
barcodes = rownames(results_df[results_df$spot_class != "reject" & puck@nUMI >= 100,])
my_table = puck@coords[barcodes,]
my_table$class = results_df[barcodes,]$first_type
n_levels = iv$cell_type_info[[3]]
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
names(my_pal) = iv$cell_type_info[[2]]
my_pal_curr <- my_pal

marker_data_de <- readRDS('../../Data/SpatialRNA/NewCerPuck_190926_08/results/marker_data_de_MLI2.RDS')
my_pal = pals::brewer.blues(20)[2:20]
my_mod <- function(p) {
  p + scale_x_continuous(breaks = c(1000,3000,5000), limits = c(900,5600)) + scale_y_continuous(breaks = c(1000,3000,5000), limits = c(1000,4900))+ theme(legend.position="top") + geom_segment(aes(x = 1300, y = 1700, xend = 1684.6, yend = 1700), color = "black")+  theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())
}

cell_type = "Granule"
MULT <- 500
cur_range <- c(0,MULT*0.03)
gran_genes <- intersect(rownames(marker_data_de[marker_data_de$cell_type == cell_type,]), rownames(puck@counts))
p1 <- plot_puck_continuous(puck,colnames(puck@counts)[puck@nUMI >= 200],MULT*colSums(puck@counts[gran_genes,])/puck@nUMI, ylimit = cur_range)
p1 <- my_mod(p1) +ggplot2::scale_colour_gradientn(paste(cell_type,"Markers"), colors = my_pal, limits = cur_range, breaks = cur_range)

cur_range <- c(0,1)
all_weights <- weights_doublet[results_df$spot_class == "doublet_certain" & results_df$second_type == cell_type,2, drop=FALSE]
all_weights <- rbind(all_weights, weights_doublet[!(results_df$spot_class == "reject") & results_df$first_type == cell_type,1,drop=FALSE])
all_weights_vec <- as.vector(all_weights); names(all_weights_vec) <- rownames(all_weights)
p2 <- plot_puck_continuous(puck, rownames(all_weights), all_weights_vec, ylimit = cur_range)
p2 <- my_mod(p2)+ggplot2::scale_colour_gradientn(paste(cell_type,"Weight"), colors = my_pal, limits = cur_range, breaks = cur_range)

cell_type = "Oligodendrocytes"
cur_range <- c(0,MULT*0.05)
gran_genes <- intersect(rownames(marker_data_de[marker_data_de$cell_type == cell_type,]), rownames(puck@counts))
p3 <- plot_puck_continuous(puck,colnames(puck@counts)[puck@nUMI >= 200], MULT*colSums(puck@counts[gran_genes,])/puck@nUMI, ylimit = cur_range)
p3 <- my_mod(p3)+ggplot2::scale_colour_gradientn(paste("Oligo","Markers"), colors = my_pal, limits = cur_range, breaks = cur_range)

cur_range <- c(0,1)
all_weights <- weights_doublet[results_df$spot_class == "doublet_certain" & results_df$second_type == cell_type,2, drop=FALSE]
all_weights <- rbind(all_weights, weights_doublet[!(results_df$spot_class == "reject") & results_df$first_type == cell_type,1,drop=FALSE])
all_weights_vec <- as.vector(all_weights); names(all_weights_vec) <- rownames(all_weights)
p4 <- plot_puck_continuous(puck, rownames(all_weights), all_weights_vec, ylimit = cur_range)
p4 <- my_mod(p4)+ggplot2::scale_colour_gradientn(paste("Oligo","Weight"), colors = my_pal, limits = cur_range, breaks = cur_range)

cell_type = "MLI2"
cur_range <- c(0,MULT*0.005)
gran_genes <- intersect(rownames(marker_data_de[marker_data_de$cell_type == cell_type,]), rownames(puck@counts))
p5 <- plot_puck_continuous(puck,colnames(puck@counts)[puck@nUMI >= 200], MULT*colSums(puck@counts[gran_genes,])/puck@nUMI, ylimit = cur_range)
p5 <- my_mod(p5)+ggplot2::scale_colour_gradientn(paste(cell_type,"Markers"), colors = my_pal, limits = cur_range, breaks = cur_range)

cur_range <- c(0,1)
all_weights <- weights_doublet[results_df$spot_class == "doublet_certain" & results_df$second_type == cell_type,2, drop=FALSE]
all_weights <- rbind(all_weights, weights_doublet[!(results_df$spot_class == "reject") & results_df$first_type == cell_type,1,drop=FALSE])
all_weights_vec <- as.vector(all_weights); names(all_weights_vec) <- rownames(all_weights)
p6 <- plot_puck_continuous(puck, rownames(all_weights), all_weights_vec, ylimit = cur_range)
p6 <- my_mod(p6)+ggplot2::scale_colour_gradientn(paste(cell_type,"Weight"), colors = my_pal, limits = cur_range, breaks = cur_range)

ggarrange(p1,p2,p3,p4,p5,p6,nrow=3,ncol=2)


dmcable/RCTD documentation built on Feb. 24, 2024, 11:03 p.m.