We can get rid of some of the batch effects by running tSNE on the SDA scores rather than counts directly
set.seed(1234) # s42? 28 - AV2? tsne_data3 <- Rtsne::Rtsne(SDAresults$scores[,-QC_fail_components], verbose=TRUE, pca=FALSE, perplexity = 50, max_iter=1000) # PCA_onSDA <- prcomp(SDAresults$scores) # # ggplot(data.table(PCA_onSDA$x[,1:4]), aes(PC1,PC2)) +geom_point() # add tsne to cell_data tsne_data_dt <- data.table(tsne_data3$Y) setnames(tsne_data_dt, c("Tsne1_QC1", "Tsne2_QC1")) tsne_data_dt$cell <- rownames(SDAresults$scores) setkey(tsne_data_dt, cell) cell_data[,Tsne1_QC1 := NULL] cell_data[,Tsne2_QC1 := NULL] setkey(cell_data, cell) cell_data <- merge(cell_data, tsne_data_dt) ggplot(cell_data, aes(Tsne1_QC1, Tsne2_QC1, color=log(library_size))) + geom_point(size=0.25) + scale_color_viridis(direction=-1) + theme(legend.position = "bottom") + ggtitle("t-SNE - library size") ggplot(cell_data, aes(Tsne1_QC1, Tsne2_QC1, color=experiment)) + geom_point(size=0.5) + theme(legend.position = "bottom") + ggtitle("t-SNE - experiment") + scale_color_manual(values = c(rep(brewer.pal(12,"Paired"),2),"black","grey")) #scale_colour_brewer(palette="Paired") ggplot(cell_data, aes(Tsne1_QC1, Tsne2_QC1, color=group)) + geom_point(size=0.5) + scale_color_manual(values = brewer.pal(11,"Paired")) + theme(legend.position = "bottom") + ggtitle("t-SNE - experiment")
component_tsne <- plot_grid(plotlist = create_grob_list(fn = print_tsne, input = c(11,32,40,26,37,45,31,33,2,5,38,13,42,39,20,30,35,15,17,18,34)), nrow=3) component_tsne pdf("../results/component_tsne.pdf", width=24, height=13.5) component_tsne dev.off() component_tsne <- plot_grid(plotlist = create_grob_list(fn = function(x) print_tsne(x, point_size = 0.5), input = component_order_dt[!grep("Single",name)][order(-pseudotime_average, na.last = F)]$component_number), nrow=6) pdf("../results/component_tsne_all.pdf", width=25, height=20) component_tsne dev.off()
Odd components:
grid.arrange(grobs=create_grob_list(fn = print_tsne, input = QC_fail_components), nrow=3, heights = c(1,1,1))
marker_genes <- c("Csf1r", "Dcn","Cyp11a1","Aard","Trim7","Gfra1","Nanos1","Esx1","Ctcfl","Prdm9","Ccnb3","Rhox2d","Piwil1","Ccna1","Ssxb1","Acrv1","Lrrd1","Fam71b","Tnp2","Prm2") marker_tsne <- plot_grid(plotlist = create_grob_list(fn = function(x){print_tsne(x,point_size = 0.25, predict = T) + scale_color_gradient2(mid="lightgrey", high="midnightblue", low=muted("red"))}, input = marker_genes), ncol = 5) pdf("../results/markers.pdf", width=9, height=6) marker_tsne dev.off()
Check Tsne is stable over different seeds
library(SDAtools) library(ggplot2) library(data.table) SDAresults <- load_results(results_folder = "../results/conradV3_sda_1/", data_path = "../data/conradV3/") rownames(SDAresults$loadings[[1]]) <- paste0("V",1:50) rownames(SDAresults$pips[[1]]) <- paste0("V",1:50) str(SDAresults) do_tsne <- function(i){ seed <- sample(10^6,1) set.seed(seed) tsne_data5 <- Rtsne::Rtsne(SDAresults$scores[,-QC_fail_components], verbose=TRUE, pca=FALSE, perplexity = 50, max_iter=1000) tsne_data5$seed <- seed return(tsne_data5) } QC_fail_components <- c(22,6,25,29,12,28,41,1,46,4,8,14,9,43) tsne_repo_rand_seeds <- mclapply(1:20, function(x) do_tsne(x), mc.cores = 16, mc.silent=TRUE, mc.preschedule = FALSE) saveRDS(tsne_repo_rand_seeds, "../data/tmp_cache/tsne_repo_rand_seeds.rds") tsne_repo_rand_seeds <- readRDS("../data/tmp_cache/tsne_repo_rand_seeds.rds") #c(262279, 609512, 225666, 140187, 857033, 857033, 880642, 940864, 293105, 648670) selected <- which(sapply(tsne_repo_rand_seeds, function(x) x$seed) %in% c(225666, 262279, 648670)) #880642, rotations <- c(-165, -50, 45) #95,
Compared to t-SNE it preserves as much of the local, and more of the global data structure, with a shorter runtime.
# devtools::install_github("jlmelville/uwot") library(uwot) set.seed(116189) # nb need to set threads to 1 for repoduciability to work sda_umap <- umap(SDAresults$scores[,-QC_fail_components], n_neighbors = 100, n_threads = 1) colnames(sda_umap) <- c("Umap1","Umap2") saveRDS(sda_umap, file = "../data/tmp_cache/umap.rds") str(sda_umap) cell_data$Umap1 <- NULL cell_data$Umap2 <- NULL cell_data <- merge(cell_data, cbind(data.table(sda_umap), cell=rownames(SDAresults$scores)) ) saveRDS(cell_data, file = "../data/cache/cell_data.rds") print_tsne("PseudoTime",dim="Umap1",dim2="Umap2") print_tsne(5,dim="Umap1",dim2="Umap2") print_tsne(20,dim="Umap1",dim2="Umap2") print_tsne(34,dim="Umap1",dim2="Umap2") print_tsne(38,dim="Umap1",dim2="Umap2") # X activation print_tsne(48,dim="Umap1",dim2="Umap2") # odd pachytene clump print_tsne(9,dim="Umap1",dim2="Umap2") # respiration print_tsne(22,dim="Umap1",dim2="Umap2") # ribosomal print_tsne("Prdm9",dim="Umap1",dim2="Umap2")
tsne_repo_plots <- vector("list", 4) for(i in 1:3){ tsne_repo_plots[[i]] <- ggplot(data.table(rotate_matrix(tsne_repo_rand_seeds[[selected[i]]]$Y, rotations[i])), aes(V1, V2, colour=cell_data$PseudoTime[match(rownames(SDAresults$scores), cell_data$cell)])) + geom_point(size=0.1) + scale_color_viridis() + theme(legend.position = "none") + labs(x="tSNE 1", y="tSNE 2") + ggtitle(paste0("Random Seed: ",tsne_repo_rand_seeds[[selected[i]]]$seed," (rotated ",rotations[i],"°)")) } tsne_repo_plots[[4]] <- ggplot(cell_data, aes(Umap1, -Umap2, colour=PseudoTime)) + geom_point(size=0.1) + scale_color_viridis() + theme(legend.position = "none") + labs(x="UMAP 1", y="-UMAP 2") + ggtitle("UMAP, Random Seed: 116189") png("../results/tsne_random_seeds.png", width = 3000, height = 2400, res=300) plot_grid(plotlist = tsne_repo_plots, labels = "AUTO") dev.off()
by transposing cell scores matrix
set.seed(123) tsne_scores <- Rtsne::Rtsne(abs(t(SDAresults$scores)), verbose=TRUE, pca=FALSE, perplexity=2, max_iter=5000) set.seed(12345) comp_clusters <- data.table(tsne_scores$Y, Component = paste0("C",sort(component_labels)," ", names(sort(component_labels))), cluster = kmeans(tsne_scores$Y, 7, nstart = 3)$cluster) comp_clusters <- comp_clusters[data.table(cluster=c(1,2,3,4,5,6,7), Cluster=c("Spermatogonia","Leptotene/Zygotene","Somatic","Somatic (Sertoli)","Pachytene","Spermiogenesis","Round Spermatid (Acrosomal)")), on="cluster"] pdf("../results/tSNE_Components.pdf", width = 20, height=11) ggplot(comp_clusters, aes(V1, V2)) + geom_mark_ellipse(aes(fill = Cluster, label=Cluster),label.buffer = unit(25, 'mm'), label.fontsize=17) + geom_point() + geom_label_repel(aes(label=Component)) + scale_fill_brewer(palette = "Set1", guide=F) + labs(x="tSNE1", y="tSNE2") + expand_limits(y=-110) dev.off()
library(uwot) set.seed(116189) # nb need to set threads to 1 for repoduciability to work sda_umap <- umap(abs(t(SDAresults$scores)), n_neighbors = 2, n_threads = 1) colnames(sda_umap) <- c("Umap1","Umap2") comp_clusters <- data.table(sda_umap, Component = paste0("C",sort(component_labels)," ", names(sort(component_labels))), cluster = kmeans(sda_umap, 9, nstart = 3)$cluster) ggplot(comp_clusters, aes(Umap1, Umap2)) + geom_point(aes(colour=factor(cluster))) + geom_label_repel(aes(label=Component)) + scale_fill_brewer(palette = "Set1", guide=F) + labs(x="tSNE1", y="tSNE2")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.