knitr::opts_chunk$set(echo = T, warning = F, message = F, cache = T, results = 'hide')
library(spacexr) library(Matrix) library(devtools) library(ggplot2) library(ggpubr) library(reshape2) library(dplyr) source('~/Documents/MIT/Research/Rafalab/Projects/spacexr/AnalysisCSIDE/helper_functions/de_simulation_helper.R') devtools::load_all() load_all() set_global_Q_all() ref <- readRDS("../../data/Reference/10xCer/reference.rds") resultsdir <- '../../data/Reference/10xCer/de_simulation' common_cell_types <- c('Bergmann', 'Purkinje') n_cell_types = length(common_cell_types) trials = 77 n_conditions = 13 boundary = ceiling(n_conditions / 2) N_samples = (n_cell_types * trials * n_conditions * (n_cell_types - 1))/2 first_UMI = numeric(N_samples); first_type = character(N_samples); second_type = character(N_samples) UMI_tot = 1000; UMI_step = UMI_tot / (n_conditions-1) UMI_tot = round(UMI_step * (n_conditions-1)); UMI1_vec = round(0:(n_conditions-1)*UMI_step) nUMI = ref@nUMI UMI1_vec = round(0:(n_conditions-1)*UMI_step) UMI1 <- rep(UMI1_vec,each = trials) UMI_vect <- rep(UMI_tot, N_samples) cell_type_names <- levels(ref@cell_types)
puck.original <- SpatialRNA(NULL, ref@counts, nUMI = ref@nUMI, T) print('create.RCTD: getting regression differentially expressed genes: ') cell_type_info <- list(info = process_cell_type_info(ref, cell_type_names, CELL_MIN = 25), renorm = NULL) gene_list= get_de_genes(cell_type_info$info, puck.original) puck <- generate_sim_puck(common_cell_types, gene_list, ref) reference <- restrict_reference_cell_types(ref, common_cell_types) # only relevant cell types myRCTD <- create.RCTD(puck, reference, max_cores = 1) myRCTD <- run.RCTD(myRCTD) saveRDS(myRCTD, file.path(resultsdir, 'myRCTD.rds'))
myRCTD <- readRDS(file.path(resultsdir, 'myRCTD.rds')) puck <- myRCTD@originalSpatialRNA sigma_init <- as.character(100*myRCTD@internal_vars$sigma) results <- myRCTD@results norm_weights = sweep(results$weights, 1, rowSums(results$weights), '/') beta <- as.matrix(norm_weights) cell_type_info <- myRCTD@cell_type_info$info
de_gt_vals <- exp((-3:3)/5) gt_val <- 1 COND <- length(de_gt_vals) # number of conditions for DE REPLICATES <- 100 # 100 alpha_res <- array(0, dim = c(REPLICATES, 2, COND)) thresh <- 5 #flip 50% of them region_base <- c(rep(0, floor(N_samples/2)), rep(1, ceiling(N_samples/2))) region <- region_base N_int <- 10 region[((1:N_samples) %% N_int) < thresh] <- 1 - region[((1:N_samples) %% 10) < thresh] de_gene <- 'Nrxn3' for(condi in 1:COND) { de_ground_truth <- c(gt_val, de_gt_vals[condi]) de_res <- sim_SPARK(de_ground_truth, REPLICATES, de_gene, ref, N_samples, nUMI, common_cell_types, UMI1, UMI_tot, sigma_init, puck, myRCTD@cell_type_info, beta,UMI_vect, other_methods = T, region_orig = region) alpha_res[,, condi] <- de_res } saveRDS(alpha_res, 'spark-contam.rds')
alpha_res <- readRDS(file.path(resultsdir, 'other_methods_contamination/alpha_res.rds')) dec_res <- readRDS(file.path(resultsdir, 'other_methods_contamination/dec_res.rds')) bulk_res <- readRDS(file.path(resultsdir, 'other_methods_contamination/bulk_res.rds')) sing_res <- readRDS(file.path(resultsdir, 'other_methods_contamination/sing_res.rds')) spark_res <- readRDS(file.path(resultsdir, 'spark/spark-contam.rds')) REPLICATES <- 100 de_gt_vals <- exp((-3:3)/5) gt_val <- 1 plot_df <- as.data.frame(cbind( log(gt_val), log(de_gt_vals), apply(alpha_res,3:2,mean), apply(spark_res, 3:2, mean), apply(dec_res,3:2,mean), apply(bulk_res,2,mean), apply(sing_res,3:2,mean), apply(alpha_res,3:2,sd),apply(spark_res, 3:2, sd), apply(dec_res,3:2,sd), apply(bulk_res,2,sd), apply(sing_res,3:2,sd))) colnames(plot_df) <- c('log_fc_b', 'log_fc_p', 'CSIDE_b', 'CSIDE_p', 'SPARK_b', 'SPARK_p','dec_b', 'dec_p', 'bulk','sing_b','sing_p', 'sd_CSIDE_b','sd_CSIDE_p','sd_SPARK_b','sd_SPARK_p', 'sd_dec_b','sd_dec_p','sd_bulk', 'sd_sing_b','sd_sing_p') graph_df <- plot_df %>% select(log_fc_p, CSIDE_b, sd_CSIDE_b) %>% rename(estimate=CSIDE_b, sd=sd_CSIDE_b) %>% mutate(type = 'CSIDE_b') %>% bind_rows(plot_df %>% select(log_fc_p, CSIDE_p, sd_CSIDE_p) %>% rename(estimate=CSIDE_p, sd=sd_CSIDE_p) %>% mutate(type = 'CSIDE_p')) %>% bind_rows(plot_df %>% select(log_fc_p, SPARK_p, sd_SPARK_p) %>% rename(estimate=SPARK_p, sd=sd_SPARK_p) %>% mutate(type = 'SPARK_p')) %>% bind_rows(plot_df %>% select(log_fc_p, SPARK_b, sd_SPARK_b) %>% rename(estimate=SPARK_b, sd=sd_SPARK_b) %>% mutate(type = 'SPARK_b')) %>% bind_rows(plot_df %>% select(log_fc_p, dec_b, sd_dec_b) %>% rename(estimate=dec_b, sd=sd_dec_b) %>% mutate(type = 'dec_b')) %>% bind_rows(plot_df %>% select(log_fc_p, dec_p, sd_dec_p) %>% rename(estimate=dec_p, sd=sd_dec_p) %>% mutate(type = 'dec_p')) %>% bind_rows(plot_df %>% select(log_fc_p, bulk, sd_bulk) %>% rename(estimate=bulk, sd=sd_bulk) %>% mutate(type = 'bulk')) %>% bind_rows(plot_df %>% select(log_fc_p, sing_p, sd_sing_p) %>% rename(estimate=sing_p, sd=sd_sing_p) %>% mutate(type = 'sing_p')) %>% bind_rows(plot_df %>% select(log_fc_p, sing_b, sd_sing_b) %>% rename(estimate=sing_b, sd=sd_sing_b) %>% mutate(type = 'sing_b')) graph_df[,1:3] <- graph_df[,1:3]*log(exp(1),2) #scale to log 2 p1 <- ggplot(graph_df[graph_df$type %in% c('CSIDE_p', 'dec_p', 'bulk', 'sing_p','SPARK_p'),], aes(x=log_fc_p,y=estimate,group=type, color = type)) + geom_line() + theme_classic() + geom_abline(slope=1,intercept=0) + geom_errorbar(aes(ymin = estimate - sd*1.96/sqrt(REPLICATES),ymax=estimate+sd*1.96/sqrt(REPLICATES)), width = 0.03) + ylab('Estimated cell type A DE')+ xlab('Differential expression in cell type A') + ggplot2::scale_color_manual("Method",values = c('#0072B2','#009E73','#D55E00', '#EFCB00','#CC79A7'), breaks = c('bulk', 'dec_p' ,'CSIDE_p', 'sing_p','SPARK_p'), labels = c('Bulk','Decompose','CSIDE','Single','SPARK')) + scale_x_continuous(breaks = unique(graph_df$log_fc_p), labels = round(unique(graph_df$log_fc_p),2)) p2 <- ggplot(graph_df[graph_df$type %in% c('CSIDE_b', 'dec_b', 'bulk', 'sing_b', 'SPARK_b'),], aes(x=log_fc_p,y=estimate,group=type, color = type)) + geom_line() + theme_classic() + geom_abline(slope=0,intercept=0) + geom_errorbar(aes(ymin = estimate - sd*1.96/sqrt(REPLICATES),ymax=estimate+sd*1.96/sqrt(REPLICATES)), width = 0.03)+ ylab('Estimated cell type B DE')+ xlab('Differential expression in the other cell type')+ ggplot2::scale_color_manual("Method",values = c('#0072B2','#009E73','#D55E00','#EFCB00', '#CC79A7'), breaks = c('bulk', 'dec_b' ,'CSIDE_b', 'sing_b','SPARK_b'), labels = c('Bulk','Decompose','CSIDE', 'Single','SPARK')) + scale_x_continuous(breaks = unique(graph_df$log_fc_p), labels = round(unique(graph_df$log_fc_p),2)) ggarrange(p1,p2, nrow = 2)
de_ground_truth <- c(1,1) de_gene <- 'Astn2' de_gt_vals <- 2:8 #cell type proportions N_int <- 10 region_base <- c(rep(0, floor(N_samples/2)), rep(1, ceiling(N_samples/2))) COND <- length(de_gt_vals) # number of conditions for DE REPLICATES <- 100 alpha_res <- array(0, dim = c(REPLICATES, 2, COND)) for(condi in 1:COND) { print(condi) thresh <- de_gt_vals[condi] region <- region_base region[((1:N_samples) %% N_int) < thresh] <- 1 - region[((1:N_samples) %% 10) < thresh] de_res <- sim_SPARK(de_ground_truth, REPLICATES, de_gene, ref, N_samples, nUMI, common_cell_types, UMI1, UMI_tot, sigma_init, puck, myRCTD@cell_type_info, beta, UMI_vect, other_methods = T,region_orig = region) alpha_res[,, condi] <- de_res } saveRDS(alpha_res,'spark_proportion.rds')
REPLICATES <- 100 de_ground_truth <- c(1,1) de_gt_vals <- 2:8 alpha_res <- readRDS(file.path(resultsdir, 'other_methods_proportion/alpha_res.rds')) dec_res <- readRDS(file.path(resultsdir, 'other_methods_proportion/dec_res.rds')) bulk_res <- readRDS(file.path(resultsdir, 'other_methods_proportion/bulk_res.rds')) sing_res <- readRDS(file.path(resultsdir, 'other_methods_proportion/sing_res.rds')) spark_res <- readRDS(file.path(resultsdir, 'spark/spark_proportion.rds')) N_int <- 10 ct_prop <- de_gt_vals / N_int plot_df <- as.data.frame(cbind(ct_prop, log(de_ground_truth[1]), log(de_ground_truth[2]), apply(alpha_res,3:2,mean), apply(spark_res, 3:2, mean),apply(dec_res,3:2,mean), apply(bulk_res,2,mean), apply(sing_res,3:2,mean), apply(alpha_res,3:2,sd), apply(spark_res, 3:2, sd),apply(dec_res,3:2,sd), apply(bulk_res,2,sd), apply(sing_res,3:2,sd))) colnames(plot_df) <- c('ct_prop','log_fc_b', 'log_fc_p', 'CSIDE_b', 'CSIDE_p','SPARK_b', 'SPARK_p', 'dec_b', 'dec_p', 'bulk', 'sing_b', 'sing_p', 'sd_CSIDE_b','sd_CSIDE_p','sd_SPARK_b','sd_SPARK_p', 'sd_dec_b','sd_dec_p','sd_bulk', 'sd_sing_b', 'sd_sing_p') graph_df <- plot_df %>% select(ct_prop, CSIDE_b, sd_CSIDE_b) %>% rename(estimate=CSIDE_b, sd=sd_CSIDE_b) %>% mutate(type = 'CSIDE_b') %>% bind_rows(plot_df %>% select(ct_prop, CSIDE_p, sd_CSIDE_p) %>% rename(estimate=CSIDE_p, sd=sd_CSIDE_p) %>% mutate(type = 'CSIDE_p')) %>% bind_rows(plot_df %>% select(ct_prop, SPARK_p, sd_SPARK_p) %>% rename(estimate=SPARK_p, sd=sd_SPARK_p) %>% mutate(type = 'SPARK_p')) %>% bind_rows(plot_df %>% select(ct_prop, SPARK_b, sd_SPARK_b) %>% rename(estimate=SPARK_b, sd=sd_SPARK_b) %>% mutate(type = 'SPARK_b')) %>% bind_rows(plot_df %>% select(ct_prop, dec_b, sd_dec_b) %>% rename(estimate=dec_b, sd=sd_dec_b) %>% mutate(type = 'dec_b')) %>% bind_rows(plot_df %>% select(ct_prop, dec_p, sd_dec_p) %>% rename(estimate=dec_p, sd=sd_dec_p) %>% mutate(type = 'dec_p')) %>% bind_rows(plot_df %>% select(ct_prop, bulk, sd_bulk) %>% rename(estimate=bulk, sd=sd_bulk) %>% mutate(type = 'bulk')) %>% bind_rows(plot_df %>% select(ct_prop, sing_p, sd_sing_p) %>% rename(estimate=sing_p, sd=sd_sing_p) %>% mutate(type = 'sing_p')) %>% bind_rows(plot_df %>% select(ct_prop, sing_b, sd_sing_b) %>% rename(estimate=sing_b, sd=sd_sing_b) %>% mutate(type = 'sing_b')) graph_df[,2:3] <- graph_df[,2:3]*log(exp(1),2) #scale to log 2 graph_df$ct_prop <- 2*(graph_df$ct_prop - 0.5) p1 <- ggplot(graph_df[graph_df$type %in% c('CSIDE_p', 'dec_p', 'bulk', 'sing_p','SPARK_p'),], aes(x=ct_prop,y=estimate,group=type, color = type)) + geom_line() + theme_classic() + geom_abline(slope=0,intercept=0) + geom_errorbar(aes(ymin = estimate - sd*1.96/sqrt(REPLICATES),ymax=estimate+sd*1.96/sqrt(REPLICATES)), width = 0.03) + ylab('Estimated cell type A DE')+ xlab('Difference of mean cell type proportion across regions') + ggplot2::scale_color_manual("Method",values = c('#0072B2','#009E73','#D55E00','#EFCB00', '#CC79A7'), breaks = c('bulk', 'dec_p' ,'CSIDE_p', 'sing_p', 'SPARK_p'), labels = c('Bulk','Decompose','CSIDE','Single', 'SPARK')) + scale_x_continuous(breaks = unique(graph_df$ct_prop), labels = round(unique(graph_df$ct_prop),2)) p2 <- ggplot(graph_df[graph_df$type %in% c('CSIDE_b', 'dec_b', 'bulk', 'sing_b', 'SPARK_b'),], aes(x=ct_prop,y=estimate,group=type, color = type)) + geom_line() + theme_classic() + geom_abline(slope=0,intercept=0) + geom_errorbar(aes(ymin = estimate - sd*1.96/sqrt(REPLICATES),ymax=estimate+sd*1.96/sqrt(REPLICATES)), width = 0.03) + ylab('Estimated cell type B DE')+ xlab('Difference of mean cell type proportion across regions') + ggplot2::scale_color_manual("Method",values = c('#0072B2','#009E73','#D55E00','#EFCB00', '#CC79A7'), breaks = c('bulk', 'dec_b' ,'CSIDE_b','sing_b', 'SPARK_b'), labels = c('Bulk','Decompose','CSIDE','Single','SPARK'))+ scale_x_continuous(breaks = unique(graph_df$ct_prop), labels = round(unique(graph_df$ct_prop),2)) ggarrange(p1,p2, nrow = 2)
gene_list <- rownames(myRCTD@spatialRNA@counts) N_genes <- 15 for(non_null in c(T,F)) { high_genes <- gene_list[which(cell_type_info[[1]][gene_list, common_cell_types][,1] > 3e-4 & cell_type_info[[1]][gene_list, common_cell_types][,2] > 3e-4)] if(T) high_genes <- high_genes[which(cell_type_info[[1]][high_genes, common_cell_types][,1] < 3e-2 & cell_type_info[[1]][high_genes, common_cell_types][,2] < 3e-2)] set.seed(123) cur_gene_list <- sample(high_genes,N_genes) if(!non_null) { REPLICATES <- 500 cur_gene_list <- sample(high_genes, N_genes) } else { REPLICATES <- 100 } e_all <- array(0, dim = c(REPLICATES,2,length(cur_gene_list))) s_all <- array(0, dim = c(REPLICATES,2,length(cur_gene_list))) de_ground_truth <- c(1,1) if(non_null) { de_vals <- (((1:length(cur_gene_list)) - 1)/(length(cur_gene_list) - 1))*2-1 } else { de_vals <- rep(0, length(cur_gene_list)) } for(i in 1:length(cur_gene_list)) { print(i) de_gene <- cur_gene_list[i] de_ground_truth[2] <- exp(de_vals[i]) de_res <- sim_CSIDE(de_ground_truth, REPLICATES, de_gene, ref, N_samples, nUMI, common_cell_types, UMI1, UMI_tot, sigma_init, puck, myRCTD@cell_type_info, beta, UMI_vect) e_all[,, i] <- de_res$e_res s_all[,,i] <- de_res$s_res } if(non_null) { saveRDS(e_all, file.path(resultsdir, 'non_null_mean/e_all.rds')) saveRDS(s_all, file.path(resultsdir, 'non_null_mean/s_all.rds')) } else { saveRDS(e_all, file.path(resultsdir, 'null_mean/e_all.rds')) saveRDS(s_all, file.path(resultsdir, 'null_mean/s_all.rds')) } }
gene_list <- rownames(myRCTD@spatialRNA@counts) high_genes <- gene_list[which(cell_type_info[[1]][gene_list, common_cell_types][,1] > 3e-4 & cell_type_info[[1]][gene_list, common_cell_types][,2] > 3e-4)] N_genes <- 15 REPLICATES <- 500 non_null <- F set.seed(123) cur_gene_list <- sample(high_genes,N_genes) if(!non_null) { cur_gene_list <- sample(high_genes, N_genes) } de_vals <- rep(0, length(cur_gene_list)) if(non_null) { e_all <- readRDS(file.path(resultsdir, 'non_null_mean/e_all.rds')) s_all <- readRDS(file.path(resultsdir, 'non_null_mean/s_all.rds')) } else { e_all <- readRDS(file.path(resultsdir, 'null_mean/e_all.rds')) s_all <- readRDS(file.path(resultsdir, 'null_mean/s_all.rds')) } se <- apply(e_all[,2,], 2,sd)/sqrt(REPLICATES) results <- colMeans(e_all[,2,]) plot_df <- data.frame(cur_gene_list[1:length(results)], results, se, de_vals) colnames(plot_df) <- c('gene', 'mean', 'se', 'de') plot_df[,c('mean','se')] <- plot_df[,c('mean','se')] * log(exp(1),2) # rescale to log 2 if(! non_null) { p <- ggplot(plot_df,aes(x=gene, y = mean)) + geom_point() + geom_errorbar(aes(x=gene,ymin = mean-1.96*se, ymax = mean + 1.96*se), width = .2)+ geom_hline(yintercept = 0) + theme_classic() + theme(axis.text.x = element_text(angle=25,hjust = 1)) + xlab('Gene') + ylab('Log CSIDE estimated differential expression')#+ ggrepel::geom_label_repel(aes(label = gene),nudge_x = 0.15,na.rm = TRUE) p } else { p <- ggplot(plot_df,aes(x=de, y = mean)) + geom_point() + geom_errorbar(aes(ymin = mean-1.96*se, ymax = mean + 1.96*se), width = 0.05) + geom_line(aes(x = de, y = de)) + theme_classic() + ggrepel::geom_label_repel(aes(label = gene),nudge_x = 0.15,na.rm = TRUE) p }
z_all <- e_all / s_all p_res <- (2*(1 - pnorm(abs(z_all)))) alpha_vals = c(.0025,.005,.01, .02, .03, .05, .1, .2) enrichment = numeric(length(alpha_vals)) se = numeric(length(alpha_vals)) N_trials <- length(p_res) for(i in 1:length(alpha_vals)) { alpha <- alpha_vals[i] p_curr <- (sum(p_res < alpha) / (N_trials)) enrichment[i] <- p_curr / alpha se[i] <- sqrt(p_curr*(1-p_curr)/N_trials)/alpha } plot_df <- data.frame(alpha_vals, enrichment, se) p1 <- ggplot(plot_df, aes(x=alpha_vals,y = log(enrichment,2))) + geom_point() + ylim(c(-1.5,1.5)) + theme_classic() + geom_hline(yintercept = 0, linetype = 'dotted') + geom_errorbar(aes(ymin = log(enrichment - 1.96*se,2), ymax = log(enrichment + 1.96*se,2)), width = .005) + xlab('Significance level') + ylab('Log ratio of observed vs expected false positive rate') p1
errors <- sweep(e_all,3,de_vals,'-') vars <- s_all^2 errors <- errors[order(vars)]^2 vars <- vars[order(vars)] bin_size <- 500 N_bins <- length(vars) / bin_size pred_list <- numeric(N_bins) obs_list <- numeric(N_bins) se_list <- numeric(N_bins) for(i in 1:N_bins) { offset <- bin_size * (i-1) pred_list[i] <- mean(vars[(1 + offset):(100+offset)]) obs_list[i] <- mean(errors[(1 + offset):(100+offset)]) se_list[i] <- sd(errors[(1 + offset):(100+offset)])/sqrt(bin_size) } plot_df <- data.frame(pred_list, obs_list, se_list) colnames(plot_df) <- c('pred', 'obs', 'se') plot_df <- plot_df * log(exp(1),2) p <- ggplot(plot_df) + geom_point(aes(sqrt(pred), sqrt(obs))) + geom_errorbar(aes(x = sqrt(pred), ymin = sqrt(obs - 1.96*se), ymax = sqrt(obs + 1.96*se))) + geom_abline() + theme_classic() + xlim(c(0,0.4)) + ylim(c(0,0.4)) + xlab('CSIDE predicted standard error') + ylab('Average observed standard error') p
gene_list <- rownames(myRCTD@spatialRNA@counts) high_genes <- gene_list[which(cell_type_info[[1]][gene_list, common_cell_types][,1] > 3e-4 & cell_type_info[[1]][gene_list, common_cell_types][,2] > 3e-4)] high_genes <- high_genes[which(cell_type_info[[1]][high_genes, common_cell_types][,1] < 3e-2 & cell_type_info[[1]][high_genes, common_cell_types][,2] < 3e-2)] REPLICATES <- 100 DE_CONDITIONS <- 7 de_vals <- (((1:DE_CONDITIONS) - 1)/(DE_CONDITIONS - 1))*2-1 de_ground_truth <- c(1,1) cur_gene_list <- high_genes NUM_CELLS <- c(250, 500, 1000) e_all <- array(0, dim = c(REPLICATES,2,length(cur_gene_list), length(NUM_CELLS), DE_CONDITIONS)) s_all <- array(0, dim = c(REPLICATES,2,length(cur_gene_list), length(NUM_CELLS), DE_CONDITIONS)) dimnames(e_all) <- list(NULL,NULL,high_genes,NULL,NULL) dimnames(s_all) <- list(NULL,NULL,high_genes,NULL,NULL) for(k in 1:DE_CONDITIONS) { print(paste("DE",k)) for(j in 1:length(NUM_CELLS)) { print(paste('Ncells',j)) for(i in 1:length(cur_gene_list)) { print(paste('gene',i)) de_gene <- cur_gene_list[i] de_ground_truth[2] <- exp(de_vals[k]) de_res <- sim_CSIDE(de_ground_truth, REPLICATES, de_gene, ref, N_samples, nUMI, common_cell_types, UMI1, UMI_tot, sigma_init, puck, myRCTD@cell_type_info, beta, UMI_vect, subset_cells = NUM_CELLS[j], regularize_expr = T) e_all[,, i, j , k] <- de_res$e_res s_all[,,i, j, k] <- de_res$s_res } } } saveRDS(e_all, file.path(resultsdir, 'power/e_all.rds')) saveRDS(s_all, file.path(resultsdir, 'power/s_all.rds'))
NUM_CELLS <- c(250, 500, 1000) DE_CONDITIONS <- 7 de_vals <- (((1:DE_CONDITIONS) - 1)/(DE_CONDITIONS - 1))*2-1 gene_list <- rownames(myRCTD@spatialRNA@counts) high_genes <- gene_list[which(cell_type_info[[1]][gene_list, common_cell_types][,1] > 3e-4 & cell_type_info[[1]][gene_list, common_cell_types][,2] > 3e-4)] high_genes <- high_genes[which(cell_type_info[[1]][high_genes, common_cell_types][,1] < 3e-2 & cell_type_info[[1]][high_genes, common_cell_types][,2] < 3e-2)] e_all <- readRDS(file.path(resultsdir, 'power/e_all.rds')) s_all <- readRDS(file.path(resultsdir, 'power/s_all.rds')) z_all <- e_all / s_all p_all <- 2*(1 - pnorm(abs(z_all))) alpha <- 0.01 power_estimates <- apply(p_all[,2,,,] < 0.01,c(2,3,4),mean) purk_expr <- log(cell_type_info[[1]][high_genes,'Purkinje']) plot_df <- data.frame(purk_expr, rowMeans(power_estimates[,1,])) colnames(plot_df) <- c('expr','power') R2 <- cor(plot_df$expr, plot_df$power)^2 p1 <- ggplot(plot_df, aes(x = expr, y = power)) + geom_point() + ggtitle(paste('R2=',R2)) + ylim(c(0,1)) + theme_classic() my_order <- order(purk_expr) pre_df <- as.data.frame(t(power_estimates[my_order,1,])) pre_df$de <- de_vals pre_df$ncell <- NUM_CELLS[1] plot_df1 <- reshape2::melt(pre_df, id = c('de', 'ncell')) pre_df <- as.data.frame(t(power_estimates[my_order,2,])) pre_df$de <- de_vals pre_df$ncell <- NUM_CELLS[2] plot_df2 <- reshape2::melt(pre_df, id = c('de', 'ncell')) pre_df <- as.data.frame(t(power_estimates[my_order,3,])) pre_df$de <- de_vals pre_df$ncell <- NUM_CELLS[3] plot_df3 <- reshape2::melt(pre_df, id = c('de', 'ncell')) plot_df <- rbind(plot_df1, plot_df2, plot_df3) norm_purk_expr <- purk_expr - min(purk_expr) norm_purk_expr <- norm_purk_expr / max(norm_purk_expr) plot_df <- rbind(plot_df, data.frame('de' = 0, 'ncell' = "Expression", 'variable' = high_genes,'value' = norm_purk_expr)) plot_df$ncell <- factor(plot_df$ncell, levels = c('Expression','250','500','1000')) plot_df$label = round(plot_df$value, 2) plot_df$label[plot_df$ncell == 'Expression'] <- NA plot_df$de <- plot_df$de*log(exp(1),2) my_labels <- names(table(plot_df$de)) p2 <- ggplot(plot_df[plot_df$ncell != 'Expression',], aes(y = variable, x = de, fill = value)) + facet_wrap(ncell ~., nrow = 1) + geom_raster() + theme_classic() + scale_fill_continuous("Power",high = "#132B43", low = "#56B1F7") + geom_text(aes(label = label), size = 2, color = 'white') + scale_x_continuous(breaks=round(as.numeric(my_labels),2)) + ylab("Gene") + xlab("Cell type A differential expression") + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank()) p3 <- ggplot(plot_df[plot_df$ncell == 'Expression',], aes(y = variable, x = de, fill = value)) + facet_wrap(ncell ~., nrow = 1) + geom_raster() + theme_classic() + scale_x_continuous(breaks=round(as.numeric(my_labels),2)) +scale_fill_continuous("Log cell type A expression",high = pals::brewer.reds(20)[20], low = pals::brewer.reds(20)[2], breaks = c(0,1), labels =round(c(min(purk_expr), max(purk_expr))*log(exp(1),2),2)) + xlab("") + ylab("Gene")+ theme(axis.ticks.y = element_blank(), axis.text.y = element_blank()) ggarrange(p2,p3,nrow = 1, widths = c(3.3,1))
p_my <- p_all[,2,,,] ind <- 3 # DE alpha <- 0.05 ncells <- 3 alpha_vals <- c((1:1000)/1000) alpha_vals <- c(alpha_vals / 1000^3, alpha_vals / 1000^2, alpha_vals / 1000, alpha_vals) all_res <- array(dim = c(2,3,4,length(alpha_vals))) # fpr/tpr ncells ind alpha_vals for(k in 1:2) { for(ncells in 1:3) { for(ind in 1:4) { fpr <- numeric(length(alpha_vals)) tpr <- numeric(length(alpha_vals)) for(i in 1:length(alpha_vals)) { alpha <- alpha_vals[i] all_res[2,ncells,ind,i] <- mean(p_my[,,ncells,c(ind, 8 - ind)] < alpha) all_res[1,ncells,ind,i] <- mean(p_my[,,ncells,4] < alpha) } } } } plot_df <- melt(all_res[,,1:3,]) plot_df <- cbind(plot_df[(1:(dim(plot_df)[1]/2))*2,], plot_df[(1:(dim(plot_df)[1]/2))*2 - 1,]) plot_df <- plot_df[c(1,2,3,4,5,10)] colnames(plot_df) <- c('type', 'cells', 'ind', 'aind', 'tpr', 'fpr') #plot_df$ind <- factor(plot_df$ind) plot_df$cells <- (2^plot_df$cells*125) plot_df <- bind_rows(data.frame(type=c(2,2,2),cells=c(250,500,1000),ind=c(3,3,3),aind=c(3,3,3),tpr=c(0,0,0),fpr=c(0,0,0)), plot_df) plot_df$ind <- factor(plot_df$ind) plot_df$cells <- factor(plot_df$cells) ggplot(plot_df, aes(x=fpr, y = tpr, color = ind)) + geom_line() + facet_wrap(cells ~., nrow = 1) + theme_classic() + xlab('False positive rate') + ylab('True positive rate') + scale_color_manual("True DE",values = c('#0072B2','#009E73','#D55E00'), labels = c(1.44,.96,.48))+ coord_fixed() + geom_abline(lty = 'dashed') + xlim(c(0,1)) + ylim(c(0,1)) #plot(fpr, tpr, ylim = c(0,1), main = ind)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.