For each entry in Cell by Gene count matrix, split read counts using binomial.
# for each entry in raw_data, split read counts using binomial raw_data <- readRDS("../data/merged-mouse_counts_tsne-QC_low-mt.rds") # create train data set.seed(562393) raw_data_train <- apply(raw_data, 2, function(y) sapply(y, function(x) rbinom(1, size = x, prob = 0.8))) raw_data_train2 <- Matrix(raw_data_train, sparse = TRUE) rm(raw_data_train) # create test data raw_data_test <- raw_data - raw_data_train2 # force compression raw_data_test <- Matrix(as.matrix(raw_data_test), sparse = TRUE) # sanity check sum(raw_data < raw_data_train2) saveRDS(raw_data_train2, "../data/imputation/raw_data_train.rds") saveRDS(raw_data_test,"../data/imputation/raw_data_test.rds")
Calculate information required to untransorm the data back to counts. We need the standard deviations we divided by and the library sizes.
raw_data <- readRDS("../data/raw_data_train.rds") dge <- t(raw_data) library(Matrix) library_size <- Matrix::colSums(dge) #gene_sizes <- rowSums(dge) lib_size_correction_factor <- library_size/median(sqrt(library_size)) dge <- t(t(dge)/lib_size_correction_factor) cell_subset <- library_size >= 200 gene_mean <- Matrix::rowMeans(dge[, cell_subset]) gene_subset <- data.table(gene_mean, names(gene_mean))[order(-gene_mean)][1:round(length(gene_mean) * 2/3)]$V2 dge <- dge[gene_subset, cell_subset] dge <- sqrt(10000 * dge) dge <- t(dge) colSdColMeans <- function(x, na.rm = TRUE) { if (na.rm) { n <- Matrix::colSums(!is.na(x)) } else { n <- nrow(x) } colVar <- Matrix::colMeans(x * x, na.rm = na.rm) - (Matrix::colMeans(x, na.rm = na.rm))^2 return(sqrt(colVar * n/(n - 1))) } sds <- colSdColMeans(dge) dge <- t(t(dge)/colSdColMeans(dge)) save(gene_subset, sds, lib_size_correction_factor, cell_subset, file = "../data/count_dge_recovery_information_train.rds") # this part can't be reversed as throwing information away # dge2 <- copy(dge) # sum(dge2 > 10) # dge2[dge2 > 10] <- 10 # dge2[dge2 < (-10)] <- (-10) # check reverse_normalisation works # tmp <- reverse_normalisation(dge) # identical(round(tmp), raw_data[cell_subset, gene_subset]) # TRUE
data <- readRDS("../data/imputation/raw_data_train.rds") normalised_data <- dropsim::normaliseDGE(t(data), center = FALSE, scale = TRUE, threshold = 10, min_library_size = 200, gene_subset = (2/3)) saveRDS(normalised_data,"../data/imputation/train_0.8_normalised.rds")
library(SDAtools) export_data(as.matrix(normalised_data), name = "merged_mouse_V3_SDA_train_0.8.data", path="") run_SDA(sda_location = "../sda", out = "../results/conradV3_sda_train_0.8_1", data = "merged_mouse_V3_SDA_train_0.8.data", num_comps = 50, max_iter = 10000, save_freq = 1000, set_seed = "79151 17351", N = 20036, eigen_parallel = TRUE, num_openmp_threads = 5, num_blocks = 5)
library(SDAtools) # predicted with mask results_train <- load_results(results_folder = "../data/SDA/conradV3_sda_train_0.8_1/", data_path = "../data/count_matrices/") #data/SDA #data/count_matrices/ rownames(results_train$loadings[[1]]) <- paste0("V",1:50) rownames(results_train$pips[[1]]) <- paste0("V",1:50) predicted_train <- results_train$scores %*% results_train$loadings[[1]] str(results_train) str(predicted_train)
library(flashpcaR) pca_training <- flashpca(as.matrix(normalised_data), ndim=50, stan="none", verbose=T, seed=42, do_loadings=T, divisor = "none") rownames(pca_training$projection) <- rownames(normalised_data) rownames(pca_training$loadings) <- colnames(normalised_data) saveRDS(pca_training,"../data/imputation/pca_training.rds")
pca_training <- readRDS("../data/imputation/pca_training.rds") predicted_train_PCA <- pca_training$projection %*% t(pca_training$loadings) str(predicted_train_PCA)
normalised_data <- readRDS("../data/imputation/train_0.8_normalised.rds") a <- Sys.time() ICA_decomp <- fastICA::fastICA(as.matrix(normalised_data), n.comp = 50, verbose = TRUE) b <- Sys.time() b-a # 10 hours saveRDS(ICA_decomp,"../data/imputation/ICA_decomp_training.rds") colnames(ICA_decomp$A) <- colnames(ICA_decomp$X) ICA_decomp$X_Center <- attributes(ICA_decomp$X)[3][[1]] saveRDS(ICA_decomp[-1],"../data/imputation/ICA_decomp_training_sansX.rds") ###### normalised_data <- readRDS("../data/imputation/train_0.8_normalised.rds") a <- Sys.time() ICA_decomp <- fastICA::fastICA(as.matrix(normalised_data), n.comp = 50, verbose = TRUE, method="C") b <- Sys.time()
ica_training <- readRDS("../data/imputation/ICA_decomp_training_sansX.rds") # X = SA predicted_train_ica <- ica_training$S %*% ica_training$A # add center back, which ICA removes automatically predicted_train_ica <- sweep(predicted_train_ica, 2, ica_training$X_Center, "+") str(predicted_train_ica)
normalised_data <- readRDS("../data/imputation/train_0.8_normalised.rds") NNMF_training <- NNLM::nnmf(as.matrix(normalised_data), 50, rel.tol = 1e-5) saveRDS(NNMF_training,"../data/imputation/NNMF_training.rds")
NNMF_training <- readRDS("../data/imputation/NNMF_training.rds") predicted_train_NNMF <- NNMF_training$W %*% NNMF_training$H str(predicted_train_NNMF)
normalised <- readRDS("../data/imputation/train_0.8_normalised.rds") library(Rmagic) MAGIC_data <- Rmagic::magic(as.matrix(normalised), n.jobs=3) MAGIC_data$params$data <- NULL saveRDS(MAGIC_data, file = "../data/imputation/MAGIC_data.rds") MAGIC_AUCs <- calculateAUCs(as.matrix(MAGIC_data$result)) saveRDS(MAGIC_AUCs[[1]], file = "../data/imputation/cellAUCs_MAGIC.rds") save(MAGIC_AUCs, file = "../data/imputation/imputed_ranks_MAGIC.rds") # MAGIC expression jumps around quite a lot... ggplot(cell_data[sda_predict("Prdm9")], aes(-PseudoTime, Prdm9)) + geom_point(stroke=0, alpha=0.3, size=0.5) ggplot(cell_data[nmf_predict("Prdm9")], aes(-PseudoTime, Prdm9)) + geom_point(stroke=0, alpha=0.3, size=0.5) ggplot(cell_data[magic_predict("Prdm9",MAGIC_data)], aes(-PseudoTime, Prdm9)) + geom_point(stroke=0, alpha=0.3, size=0.5) ggplot(cell_data[magic_predict("Ssxb1",MAGIC_data)], aes(-PseudoTime, Ssxb1)) + geom_point(stroke=0, alpha=0.3, size=0.5) ggplot(cell_data[nmf_predict("Ssxb1")], aes(-PseudoTime, Ssxb1)) + geom_point(stroke=0, alpha=0.3, size=0.5) ggplot(cell_data[sda_predict("Ssxb1")], aes(-PseudoTime, Ssxb1)) + geom_point(stroke=0, alpha=0.3, size=0.5)
MAGIC_data <- readRDS("../data/imputation/MAGIC_data.rds") results_train2 <- results_train results_train2$loadings[[1]] <- as.numeric(results_train2$pips[[1]]>0.5) * results_train2$loadings[[1]] results_train2$loadings[[1]] <- Matrix(results_train2$loadings[[1]], sparse = TRUE) format(object.size(MAGIC_data$result), units="Mb", digits=1) format(object.size(results_train[c("scores","loadings")]), units="Mb", digits=1) format(object.size(data), units="Mb", digits=1) format(object.size(results_train2[c("scores","loadings")]), units="Mb", digits=1) format(object.size(results_train[c("loadings")]), units="Mb", digits=1) format(object.size(results_train2[c("loadings")]), units="Mb", digits=1)
# pip3 install dca --user wells # pip3 install tensorflow --user wells # # from dca.api import dca # import anndata # # adata = anndata.read_text("../data/conradV3/V3_SDAmerged_mouse_V3_SDA.data") # # adata.obs_names = cellinfo.Cell # adata.var_names = geneinfo.Gene raw_data <- readRDS("../data/imputation/raw_data_train.rds") normalised <- readRDS("../data/imputation/train_0.8_normalised.rds") write.csv(as.matrix(normalised), "../data/imputation/counts_train_normalised.csv") write.csv(as.matrix(t(raw_data[rownames(normalised), colnames(normalised)])), "../data/imputation/counts_train.csv") library(data.table) write.csv(as.matrix(t(raw_data[rownames(normalised), colnames(normalised)])), "../data/imputation/counts_train.csv") str(fread("../data/imputation/counts_train.csv")) # ~.local/bin/dca ../data/imputation/counts_train.csv ../data/imputation/DCAresults dcapredict <- fread("../data/imputation/DCAresults/mean.tsv") dcapredict_m <- t(as.matrix(dcapredict[,-c("V1"),with=F])) colnames(dcapredict_m) <- dcapredict$V1 # Not sure this will work due to different normalisation within DCA DCA_AUCs <- calculateAUCs(dcapredict_m) saveRDS(DCA_AUCs[[1]], file = "../data/imputation/cellAUCs_DCA.rds") save(DCA_AUCs, file = "../data/imputation/imputed_ranks_DCA.rds")
Not sure calculateAUC will work due to different normalisation within DCA
# NB this function wastes time by recaculating train and average AUCs calculateAUCs <- function(predicted_train, raw_data_test_file="../data/imputation/raw_data_test.rds", raw_data_train_file="../data/imputation/raw_data_train.rds", count_recovery_file="../data/imputation/count_dge_recovery_information.rds", shift=F){ # Reverse Normalisation of training data load(count_recovery_file) cell_subset <- rownames(predicted_train) predicted_train <- reverse_normalisation(predicted_train, sign = TRUE, cells=cell_subset, lib_correction=lib_size_correction_factor, standard_dev=sds) mean(predicted_train<0) if(shift){ predicted_train <- predicted_train + abs(quantile(predicted_train, 1/1e3)) } predicted_train[predicted_train < 0] <- 0 mean(predicted_train==0) # Normalise each cell by total # row = cell, col = gene predicted_train <- predicted_train/rowSums(predicted_train) str(predicted_train) # rm(predicted_train_counts_0) # rm(predicted_train) # Load raw test & train data raw_data_test <- readRDS(raw_data_test_file) raw_data_test <- raw_data_test[rownames(predicted_train), colnames(predicted_train)] # row = cell, gene = col raw_data_train <- readRDS(raw_data_train_file) raw_data_train <- raw_data_train[rownames(predicted_train), colnames(predicted_train)] # AUC # randomise ordering, because gene order is from average high to low by default, leaking information set.seed(42) reorder = sample(1:ncol(raw_data_train)) # row = cell, gene = col av_exp_order <- names(sort(Matrix::colSums(raw_data_train), T)) predicted_train = predicted_train[,reorder] raw_data_test2 = raw_data_test[,reorder] raw_data_train2 = raw_data_train[,reorder] cumsum_predictF <- function(i){ testvec = raw_data_test2[i,] cumsum(testvec[order(predicted_train[i,], decreasing=T)]) / sum(testvec) } cumsum_trainF <- function(i){ testvec = raw_data_test2[i,] cumsum(testvec[order(raw_data_train2[i,], decreasing=T)]) / sum(testvec) } cumsum_meanF <- function(i){ testvec = raw_data_test2[i,] cumsum(testvec[av_exp_order]) / sum(testvec) #order(reorder) } cumsum_predict <- sapply(1:nrow(raw_data_train), cumsum_predictF) cumsum_train <- sapply(1:nrow(raw_data_train), cumsum_trainF) cumsum_mean <- sapply(1:nrow(raw_data_train), cumsum_meanF) cellAUCs <- data.table(cell = rownames(raw_data_train), predict = colSums(cumsum_predict) /ncol(raw_data_train), train = colSums(cumsum_train) /ncol(raw_data_train), mean = colSums(cumsum_mean) /ncol(raw_data_train)) return(list(cellAUCs, cumsum_predict, cumsum_train, cumsum_mean)) }
NB do this part on a server with lots of memory >16GB
SDA_AUCs <- calculateAUCs(predicted_train) saveRDS(SDA_AUCs[[1]], file = "../data/imputation/cellAUCs_SDA.rds") save(SDA_AUCs, file = "../data/imputation/imputed_ranks.rds") SDA_AUCs_shifted <- calculateAUCs(predicted_train, shift=T) saveRDS(SDA_AUCs_shifted[[1]], file = "../data/imputation/cellAUCs_SDA_shifted.rds") save(SDA_AUCs_shifted, file = "../data/imputation/imputed_ranks_SDA_shifted.rds") PCA_AUCs <- calculateAUCs(predicted_train_PCA) saveRDS(PCA_AUCs[[1]], file = "../data/imputation/cellAUCs_PCA.rds") save(PCA_AUCs, file = "../data/imputation/imputed_ranks_PCA.rds") ICA_AUCs <- calculateAUCs(predicted_train_ica) saveRDS(ICA_AUCs[[1]], file = "../data/imputation/cellAUCs_ICA.rds") save(ICA_AUCs, file = "../data/imputation/imputed_ranks_ICA.rds") NNMF_AUCs <- calculateAUCs(predicted_train_NNMF) saveRDS(NNMF_AUCs[[1]], file = "../data/imputation/cellAUCs_NNMF.rds") save(NNMF_AUCs, file = "../data/imputation/imputed_ranks_NNMF.rds")
cellAUCs <- readRDS("../data/imputation/cellAUCs_SDA.rds") density_auc <- get_density(cellAUCs$mean, cellAUCs$predict, kern=0.025) density_auc_t <- get_density(cellAUCs$train, cellAUCs$predict, kern=0.025) plot_train <- ggplot(cbind(cellAUCs, density_auc_t), aes(train, predict, color=density_auc_t)) + geom_point(size=0.1) + geom_abline(colour="red") + scale_colour_viridis(option="inferno") + theme_minimal() + theme(legend.position = "none") + labs(colour="Cell density", x="Cellwise Training Expression AUC", y="Imputed Expression AUC") plot_mean <- ggplot(cbind(cellAUCs, density_auc), aes(mean, predict, color=density_auc)) + geom_point(size=0.1) + geom_abline(colour="red") + scale_colour_viridis(option="inferno") + theme_minimal() + theme(legend.position = "none") + labs(colour="Cell density", x="Average Training Expression AUC", y="Imputed Expression AUC") plot_train plot_mean
cellAUCs <- readRDS("../data/imputation/cellAUCs_SDA.rds") cellAUCs_SDA_shifted <- readRDS("../data/imputation/cellAUCs_SDA_shifted.rds") cellAUCs_PCA <- readRDS("../data/imputation/cellAUCs_PCA.rds") cellAUCs_ICA <- readRDS("../data/imputation/cellAUCs_ICA.rds") cellAUCs_NNMF <- readRDS("../data/imputation/cellAUCs_NNMF.rds") cellAUCs_MAGIC <- readRDS("../data/imputation/cellAUCs_MAGIC.rds") #cellAUCs_DCA <- readRDS("../data/imputation/cellAUCs_DCA.rds") cellAUCs <- merge(cellAUCs, cellAUCs_PCA, by="cell", suffixes = c("",".PCA")) cellAUCs <- merge(cellAUCs, cellAUCs_ICA, by="cell", suffixes = c("",".ICA")) cellAUCs <- merge(cellAUCs, cellAUCs_NNMF, by="cell", suffixes = c("",".NNMF")) cellAUCs <- merge(cellAUCs, cellAUCs_MAGIC, by="cell", suffixes = c("",".MAGIC")) #cellAUCs <- merge(cellAUCs, cellAUCs_SDA_shifted, by="cell", suffixes = c("",".SDA_shifted")) cellAUCs <- cellAUCs[,c("cell","predict","train","mean","predict.PCA","predict.ICA","predict.NNMF","predict.MAGIC"),with=F] saveRDS(cellAUCs, "../data/imputation/cellAUCs.rds") ggplot(cellAUCs, aes(train, predict)) + geom_hex(bins=150) + scale_fill_viridis(option="inferno") + geom_abline(colour="red") + ylim(0.8,1) #ggplot(cellAUCs, aes(train, predict.SDA_shifted)) + geom_hex(bins=150) + scale_fill_viridis(option="inferno") + geom_abline(colour="red") + ylim(0.8,1)
# fraction cells better or not vs SDA cellAUCs[,mean(predict>predict.PCA)] # 0.74 cellAUCs[,mean(predict<predict.PCA)] # 0.26 cellAUCs[,mean(predict>predict.NNMF)] # 0.21 cellAUCs[,mean(predict<predict.NNMF)] # 0.79 # X vs Y plot ggplot(cellAUCs, aes(predict.PCA, predict)) + geom_point(stroke=0, size=1, alpha=0.3) + geom_abline(intercept = 0,slope=1,col='red') ggplot(cellAUCs, aes(predict.ICA, predict)) + geom_point(stroke=0, size=1, alpha=0.3) + geom_abline(intercept = 0,slope=1,col='red') ggplot(cellAUCs, aes(predict.NNMF, predict)) + geom_point(stroke=0, size=1, alpha=0.3) + geom_abline(intercept = 0,slope=1,col='red') # density difference plot ggplot(cellAUCs, aes(predict.PCA - predict)) + geom_density() ggplot(cellAUCs, aes(predict.ICA - predict)) + geom_density() ggplot(cellAUCs, aes(predict.NNMF - predict)) + geom_density() # Overlaping Densities plot tmp <- melt(cellAUCs, id="cell", variable.name = "Method", value.name = "AUC")[Method %in% c("predict","train","mean","predict.PCA","predict.ICA","predict.NNMF","predict.MAGIC")] tmp[,Method := gsub("predict\\.?","",Method)] tmp[,Method := gsub("train","Unimputed",Method)] tmp[,Method := gsub("mean","Mean_Cell",Method)] tmp[Method=="",Method := "SDA"] tmp[, Method := factor(Method, levels=c("Unimputed","Mean_Cell","SDA","NNMF","ICA","PCA","MAGIC"))] compare_imputation <- ggplot(tmp, aes(AUC, colour=Method)) + geom_density() + scale_color_manual(values=c("#E41A1C", "black",brewer.pal(7, "Set1")[-c(6,1)])) + theme(legend.position = "none") + facet_zoom(xlim = c(0.91,0.94), zoom.size = 1) saveRDS(compare_imputation, "../data/plots/compare_imputation.rds") pdf("../results/other_factorisations/compare_imputation.pdf") compare_imputation dev.off()
which cells are better worse in NNMF vs SDA
ggplot(cell_data[cellAUCs], aes(Tsne1_QC1, Tsne2_QC1, colour=predict.NNMF/predict < 0.99)) + geom_jitter(size=0.5, stroke=0, height = 0.25, width = 0.25) + scale_color_manual(values=c("lightgrey","red")) + theme_minimal() ggplot(cell_data[cellAUCs], aes(Tsne1_QC1, Tsne2_QC1, colour=predict.NNMF/predict > 1.01)) + geom_jitter(size=0.5, stroke=0, height = 0.25, width = 0.25) + scale_color_manual(values=c("lightgrey","red")) + theme_minimal()
All methods are similar
example_gene = "Smok2b" pred_exp_example <- data.table("Unimputed"=(normalised[,c(example_gene)]), cell=rownames(normalised)) setkey(pred_exp_example, cell) pred_exp_example <- pred_exp_example[sda_predict(example_gene, results_train, ".SDA")][nmf_predict(example_gene, NNMF_training, ".NNMF")][ica_predict(example_gene, ica_training, ".ICA")][pca_predict(example_gene, pca_training, ".PCA")][magic_predict(example_gene, MAGIC_data,".MAGIC")] setnames(pred_exp_example, names(pred_exp_example), gsub(paste0(example_gene,"."),"",names(pred_exp_example))) ex_imputed <- ggplot(melt(pred_exp_example, id.vars = "cell", value.name = example_gene, variable.name = "Method")[cell_data, on="cell"][!is.na(Method)], aes(-PseudoTime, get(example_gene), colour=Method)) + geom_point(stroke=0, size=0.3, alpha=0.5)+ scale_colour_manual(values = brewer.pal(7, "Set1")[-6])+ facet_wrap(~Method, ncol=1, scales = "free_y") + ylab(paste0(example_gene, " Imputed Expression")) + theme(legend.position = "none")
set.seed(42) sample_cells <- c(5749, sample(1:20036, size = 99, replace = F)) load(file = "../data/imputation/imputed_ranks_NNMF.rds") NNMF_cumsum_pred <- NNMF_AUCs[[2]][,sample_cells] load(file = "../data/imputation/imputed_ranks_ICA.rds") ICA_cumsum_pred <- ICA_AUCs[[2]][,sample_cells] load(file = "../data/imputation/imputed_ranks_PCA.rds") PCA_cumsum_pred <- cumsum_predict[,sample_cells] load(file = "../data/imputation/imputed_ranks.rds") SDA_cumsum_pred <- cumsum_predict[,sample_cells] load(file = "../data/imputation/imputed_ranks_MAGIC.rds") MAGIC_cumsum_pred <- MAGIC_AUCs[[2]][,sample_cells] saveRDS(list("Unimputed" = cumsum_train[,sample_cells], "Mean_Cell" = cumsum_mean[,sample_cells], "NNMF" = NNMF_AUCs[[2]][,sample_cells], "ICA" = ICA_AUCs[[2]][,sample_cells], "PCA" = PCA_cumsum_pred, "SDA" = SDA_cumsum_pred, "MAGIC" = MAGIC_cumsum_pred), "../data/imputation/imputed_ranks_sample_all_methods.rds")
imputed_ranks <- readRDS("../data/imputation/imputed_ranks_sample_all_methods.rds") plotCellAUC(1, imputed_ranks) plotCellAUC(4, imputed_ranks) plotCellAUC(6, imputed_ranks)
Which cells are better with / without imputation
vs the mean cell apprach
setkey(cellAUCs, cell) # which cells are best c <- ggplot(merge(cell_data, cellAUCs)[predict/mean>1], aes(Tsne1_QC1, Tsne2_QC1, colour=predict/mean)) + geom_point(size=0.3, stroke=0) + scale_color_viridis(direction = -1) + theme_classic(base_size=8) + theme(legend.position = "top", legend.justification = "right", legend.margin=margin(t = 0, unit='cm'), axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), rect = element_rect(fill = "transparent"), panel.background = element_rect(fill = "transparent"), # bg of the panel plot.background = element_rect(fill = "transparent", color = NA)) + guides(colour=guide_colourbar(barwidth=5, barheight=0.5, title.position="top")) + labs(x="", y="", colour="Ratio of Imputed to\n Average Expression AUC") c2 <- ggplot(merge(cell_data, cellAUCs)[predict/mean>1], aes(Tsne1_QC1, Tsne2_QC1, colour=predict/mean)) + geom_point(size=1, stroke=0) + scale_color_viridis(direction = -1) + labs(x="tSNE1", y="tSNE2", colour="Ratio of Imputed to\n Average Expression AUC") + theme(legend.position = "bottom") + guides(colour=guide_colourbar(barwidth=15, barheight=0.5, title.position="top")) c
vs the no imputation
density_lib <- get_density(log(merge(cell_data, cellAUCs)$library_size), merge(cell_data, cellAUCs)[,predict/train], kern=0.07) ggplot(merge(cell_data, cellAUCs), aes(log(library_size), predict/train, colour=density_lib)) + geom_point(size=0.1)+ scale_colour_viridis() + theme_minimal() + geom_hline(yintercept = 1, colour="red") + labs(y="Ratio of SDA Imputed AUC\n to Unimputed AUC", x="Log Library Size (Total UMI Counts)") lib_size_plot <- ggplot(merge(cell_data, cellAUCs), aes(log10(library_size), predict/train, colour=-PseudoTime)) + geom_point(size=0.1)+ scale_colour_viridis(guide_colorbar(title="PseudoTime")) + theme_minimal() + scale_y_continuous(breaks=c(1.0,1.4,1.8,2.2))+ guides(colour=guide_colourbar(barwidth=10,label.position="bottom")) + theme(legend.position = "bottom") + geom_hline(yintercept = 1, colour="black") + labs(y="Ratio of SDA Imputed AUC\n to Unimputed AUC", x="Log10 Library Size (Total UMI Counts)") lib_size_plot
predict80 <- apply(cumsum_predict, 2, function(x) min(which(x>0.8)/nrow(cumsum_predict))) train80 <- apply(cumsum_train, 2, function(x) min(which(x>0.8)/nrow(cumsum_predict))) mean80 <- apply(cumsum_mean, 2, function(x) min(which(x>0.8)/nrow(cumsum_predict))) frac80 <- data.table(mean80, train80, predict80) density_frac80 <- get_density(frac80$mean80, frac80$predict80, kern=0.025) density_frac80_t <- get_density(frac80$train80, frac80$predict80, kern=0.025) ggplot(cbind(frac80, density_frac80_t), aes(train80, predict80, color=density_frac80_t)) + geom_point(size=0.1) + geom_abline(colour="red") + scale_colour_viridis() + theme_minimal() + theme(legend.position = "none") + labs(colour="Cell density", x="Cellwise Training Fraction of Genes for 80% Cumsum", y="Imputed Fraction of Genes for 80% Cumsum") ggplot(cbind(cellAUCs, density_frac80), aes(mean80, predict80, color=density_frac80)) + geom_point(size=0.1) + geom_abline(colour="red") + scale_colour_viridis() + theme_minimal() + theme(legend.position = "none") + labs(colour="Cell density", x="Average Training Fraction of Genes for 80% Cumsum", y="Imputed Fraction of Genes for 80% Cumsum")
# add tSNE subplot plot_mean_2 <- ggdraw() + draw_plot(plot_mean, 0, 0, 1, 1) + draw_plot(c, 0.5, 0.07, 0.5, 0.5)
genes_tmp <- c("Zbtb16","Prdm9","Piwil1","Ccna1","Ssxb1","Lrrd1","Gapdhs") imputation_plot <- imputed_vs_raw(genes_tmp) imputation_plot compare_imputation <- readRDS("../data/plots/compare_imputation.rds") right_panelA <- plot_grid(plotCellAUC(1, imputed_ranks[c(1:2,6,3:5,7)]) + scale_colour_manual(values=c("#E41A1C", "black",brewer.pal(7, "Set1")[-c(6,1)])), compare_imputation, ncol=2, labels=c("B","C")) right_panelB <- plot_grid(gonia_SDA_zoom, gonia_NNMF, ncol=2, rel_widths = c(3,4.5), labels = c("D","")) right_panelC <- plot_grid(Rhox2h_prediction, blood_SDA_vs_NNMF, ncol=2, rel_widths = c(3,2), labels = c("E","F")) right_panel <- plot_grid(right_panelA, right_panelB, right_panelC, ncol=1) pdf(width = 13, height = 11, file = "../results/imputation/imputation.pdf") plot_grid(imputation_plot, right_panel, labels = c("A",""), ncol=2) dev.off() #pdf(width = 4250, height = 2500, res = 300, file = "../results/imputation/imputationAUC2.png")
heatmapgrob <- readRDS("../data/plots/heatmapgrob.rds") right <- plot_grid(heatmapgrob, plot_grid(NULL, lib_size_plot),ncol=1, rel_heights = c(3,1), labels = c("B","C")) pdf("../results/Imputation_Supp.pdf", height = 12, width=15) plot_grid(ex_imputed, right, ncol=2, rel_widths = c(1,3), labels = c("A","")) dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.