library(data.table) data <- fread("../data/conradV3/V3_SDAmerged_mouse_V3_SDA.data") dimnames_conradV3 <- readRDS("../data/conradV3/V3_SDAmerged_mouse_V3_SDA_dimnames.rds") data <- as.matrix(data) rownames(data) <- dimnames_conradV3[[1]] colnames(data) <- dimnames_conradV3[[2]] Sys.time() nnmf_decomp <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5) Sys.time() nnmf_decomp saveRDS(nnmf_decomp,"../data/conradV3/nnmf_decomp_v1.rds") #nnmf_decomp$run.time["user.self"]/60 # 27.5 mins gc() # Try with different Beta values for regularisation Sys.time() nnmf_decomp2 <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, beta = c(0,0,10)) Sys.time() saveRDS(nnmf_decomp2,"../data/conradV3/nnmf_decomp_v2.rds") Sys.time() nnmf_decomp3 <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, beta = c(0,0,1e4)) Sys.time() saveRDS(nnmf_decomp3,"../data/conradV3/nnmf_decomp_v3.rds") Sys.time() nnmf_decomp4 <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, beta = c(0,0,1e3)) Sys.time() saveRDS(nnmf_decomp4,"../data/conradV3/nnmf_decomp_v4.rds") Sys.time() nnmf_decomp5 <- NNLM::nnmf(as.matrix(data), 100, rel.tol = 1e-5) Sys.time() saveRDS(nnmf_decomp5,"../data/conradV3/nnmf_decomp_v5.rds") Sys.time() nnmf_decomp <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, n.threads = 15, loss="mkl") Sys.time() nnmf_decomp saveRDS(nnmf_decomp,"../data/conradV3/nnmf_decomp_v6.rds")
[1] "2019-02-26 22:27:55 GMT" nnmf_decomp <- nnmf(as.matrix(data), 50, rel.tol = 1e-5)
0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| ********|
Sys.time() [1] "2019-02-26 22:55:02 GMT" nnmf_decomp Non-negative matrix factorization: Algorithm: Sequential coordinate-wise descent Loss: Mean squared error MSE: 0.6380184 MKL: 0.3526576 Target: 0.3190092 Rel. tol.: 9.69e-06 Total epochs: 1605
Running time: user system elapsed 1653.837 194.547 1622.978
[1] "2019-02-27 19:35:29 GMT"
nnmf_decomp2 <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, beta = c(0,0,10)) 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| ********| Sys.time() [1] "2019-02-27 20:11:48 GMT"
Sys.time() [1] "2019-03-26 17:38:30 GMT"
nnmf_decomp <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5) 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----|
********|
Sys.time() [1] "2019-03-26 18:16:34 GMT" gc() used (Mb) gc trigger (Mb) max used (Mb) Ncells 427117 22.9 878160 46.9 878160 46.9 Vcells 394326611 3008.5 1129838049 8620.0 1371726427 10465.5
Sys.time() [1] "2019-03-27 02:06:05 GMT"
nnmf_decomp5 <- NNLM::nnmf(as.matrix(data), 100, rel.tol = 1e-5) 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| ********| Sys.time() [1] "2019-03-27 03:14:12 GMT"
with 15 threads
[1] "2019-04-05 15:49:08 BST"
nnmf_decomp <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, n.threads = 15) 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| ********| Sys.time() [1] "2019-04-05 15:59:28 BST" nnmf_decomp Non-negative matrix factorization: Algorithm: Sequential coordinate-wise descent Loss: Mean squared error MSE: 0.6379496 MKL: 0.3527002 Target: 0.3189748 Rel. tol.: 7.92e-06 Total epochs: 1856
Running time: user system elapsed 1897.522 164.514 618.858
dimnames_conradV3 <- readRDS("../data/conradV3/V3_SDAmerged_mouse_V3_SDA_dimnames.rds") rownames(nnmf_decomp$W) <- dimnames_conradV3[[1]] colnames(nnmf_decomp$W) <- paste0("V",1:50) colnames(nnmf_decomp$H) <- dimnames_conradV3[[2]] rownames(nnmf_decomp$H) <- paste0("V",1:50)
ICA_decomp <- fastICA::fastICA(data, n.comp = 50, verbose = TRUE) # a <- Sys.time() # ICA_decomp <- fastICA::fastICA(data, n.comp = 50, verbose = TRUE, method="C") # b <- Sys.time() # Error in fastICA::fastICA(as.matrix(normalised_data), n.comp = 50, verbose = TRUE, : # svd on 19262 x 197201408 exceeds Fortran indexing limits saveRDS(ICA_decomp,"../data/conradV3/ICA_decomp_v1.rds")
library(flashpcaR) pcaresult <- flashpca(as.matrix(data), ndim=50, stan="none", verbose=T, seed=42, do_loadings=T, divisor = "none") rownames(pcaresult$loadings) <- dimnames_conradV3[[2]] colnames(pcaresult$loadings) <- paste0("V",1:50) rownames(pcaresult$vectors) <- dimnames_conradV3[[1]] colnames(pcaresult$vectors) <- paste0("V",1:50) rownames(pcaresult$projection) <- rownames(pcaresult$vectors) colnames(pcaresult$projection) <- paste0("V",1:50) saveRDS(pcaresult, "../data/conradV3/pcaresultV1.rds")
nnmf_decomp <- readRDS("../data/alt_factorisations/nnmf_decomp_v1.rds") nnmf_decomp2 <- readRDS("../data/alt_factorisations/nnmf_decomp_v2.rds") nnmf_decomp3 <- readRDS("../data/alt_factorisations/nnmf_decomp_v3.rds") nnmf_decomp4 <- readRDS("../data/alt_factorisations/nnmf_decomp_v4.rds") nnmf_decomp5 <- readRDS("../data/alt_factorisations/nnmf_decomp_v5.rds") rownames(nnmf_decomp2$H) <- paste0("V",1:50) rownames(nnmf_decomp3$H) <- paste0("V",1:50) rownames(nnmf_decomp4$H) <- paste0("V",1:50) rownames(nnmf_decomp5$H) <- paste0("V",1:100) pcaresult <- readRDS("../data/alt_factorisations/pcaresultV1.rds") # X = SA ica_decomp <- readRDS("../data/alt_factorisations/ICA_decomp_v1.rds") ica_decomp$X <- NULL rownames(ica_decomp$K) <- colnames(ica_decomp$A) <- colnames(nnmf_decomp$H) colnames(ica_decomp$K) <- rownames(ica_decomp$A) <- paste0("V",1:50)
threshold = exp(seq(-10,0,0.1)) loading_sparsity <- data.table(threshold, t(sapply(threshold, function(x) c("NNMF"=mean(abs(nnmf_decomp$H)/max(abs(nnmf_decomp$H)) < x), "SDA"=mean(abs(SDAresults$loadings[[1]])/max(abs(SDAresults$loadings[[1]])) < x), "PCA"=mean(abs(pcaresult$loadings)/max(abs(pcaresult$loadings)) < x), "ICA"=mean(abs(ica_decomp$A)/max(abs(ica_decomp$A)) < x))))) loading_sparsity <- melt(loading_sparsity, id.vars = "threshold", variable.name = "Method", value.name = "Fraction") gene_loading_sparsity <- ggplot(loading_sparsity, aes(threshold, Fraction, colour=Method)) + geom_line() + ylab("Fraction of absolute gene\n loadings below threshold") + xlab("Scaled Threshold \n(threshold * max(abs(loadings)))") + scale_x_continuous(trans = reverselog_trans())+ scale_color_brewer(palette = "Set1") + theme_minimal() + theme(legend.position = "none") + geom_text_repel(data=loading_sparsity[threshold==threshold[45]], aes(label = Method), nudge_x = c(0.5,0.5,-0.5,0.5), nudge_y = 0.02, na.rm = TRUE, segment.colour='darkgrey', colour="black") gene_loading_sparsity saveRDS(gene_loading_sparsity, "../data/plots/gene_loading_sparsity.rds") pdf("../results/other_factorisations/gene_loading_sparsity.pdf", width = 5, height = 5) gene_loading_sparsity dev.off()
Non log scale version
gene_loading_sparsity + scale_x_continuous() + coord_flip()
NMF is highest as expected
threshold = exp(seq(-12,0,0.1)) cellscore_sparsity <- data.table(threshold, t(sapply(threshold, function(x) c("PCA"=mean(abs(pcaresult$projection)/max(abs(pcaresult$projection)) < x), "ICA"=mean(abs(ica_decomp$S)/max(abs(ica_decomp$S)) < x), "SDA"=mean(abs(SDAresults$scores)/max(abs(SDAresults$scores)) < x), "NNMF"=mean(abs(nnmf_decomp$W)/max(abs(nnmf_decomp$W)) < x))))) cellscore_sparsity <- melt(cellscore_sparsity, id.vars = "threshold", variable.name = "Method", value.name = "Fraction") cellscore_sparsity_p <- ggplot(cellscore_sparsity, aes(threshold, Fraction, colour=Method)) + geom_line() + ylab("Fraction of absolute cell scores below threshold") + xlab("Scaled Threshold (t * max(abs(cell_score)))") + scale_x_continuous(trans = reverselog_trans())+ scale_color_brewer(palette = "Set1") + theme_minimal() + theme(legend.position = "none")+ geom_text_repel(data=cellscore_sparsity[threshold==threshold[70]], aes(label = Method), nudge_x = c(-0.5,0.5,1,-1.5), na.rm = TRUE, segment.colour='darkgrey', colour="black") cellscore_sparsity_p pdf("../results/other_factorisations/cell_score_sparsity.pdf", width = 5, height = 5) cellscore_sparsity_p dev.off()
Find which component is the Prdm9 one
sort(nnmf_decomp$H[,"Prdm9"]) sort(ica_decomp$A[,"Prdm9"]) sort(pcaresult$loadings["Prdm9",]) head(sort(-nnmf_decomp$H[50,]),20) head(sort(ica_decomp$A[15,]),20) head(sort(pcaresult$loadings[,"V14"], F),20)
SDAtools::genome_loadings(SDAresults$loadings[[1]][5,], label_both = T, max.items = 20, label.size = 3, gene_locations = gene_annotations, hide_unknown = T) + ylab("Gene Loadings (SDA Component 5)") + scale_y_reverse() genome_loadings(nnmf_decomp$H["V50",], label_both = FALSE, max.items = 20, label.size = 3, gene_locations = gene_annotations, hide_unknown = T) + ylab("Gene Loadings (NNMF Component 50)") SDAtools::genome_loadings(pcaresult$loadings[,"V14"], label_both = T, max.items = 20, label.size = 3, gene_locations = gene_annotations, hide_unknown = T) + ylab("Gene Loadings (PCA Component 14)") + scale_y_reverse() SDAtools::genome_loadings(ica_decomp$A[15,], label_both = F, max.items = 20, label.size = 3, gene_locations = gene_annotations, hide_unknown = T) + ylab("Gene Loadings (ICA Component 15)") + scale_y_reverse()
SDAtools::genome_loadings(SDAresults$loadings[[1]][38,], label_both = T, max.items = 20, label.size = 3, gene_locations = gene_annotations, hide_unknown = T) + ylab("Gene Loadings (SDA Component 38)") genome_loadings(nnmf_decomp$H["V3",], label_both = FALSE, max.items = 20, label.size = 3, gene_locations = gene_annotations, hide_unknown = T) + ylab("Gene Loadings (NNMF Component 3)")
To check if anything interesting looking
for(i in 1:50){ print(genome_loadings(nnmf_decomp$H[i,], label_both = FALSE, max.items = 20, label.size = 3, gene_locations = gene_annotations, hide_unknown = T) + ylab(i)) }
Seems to just make whole components 0? not what I wanted
plot(rowSums(nnmf_decomp$H)) plot(rowSums(nnmf_decomp2$H)) plot(rowSums(nnmf_decomp3$H)) plot(nnmf_decomp$H[,"Prdm9"]) plot(nnmf_decomp3$H[,"Prdm9"]) plot(sort(nnmf_decomp$H[50,]), pch='.') plot(sort(nnmf_decomp3$H[28,]), pch='.') plot(sort(rowSums(nnmf_decomp$H))) plot(sort(rowSums(nnmf_decomp3$H)))
Very similar predictions
plot(sda_predict(c("Prdm9"))$Prdm9, nmf_predict(c("Prdm9"))$Prdm9, pch=".") plot(sda_predict(c("Prdm9"))$Prdm9, pca_predict(c("Prdm9"))$Prdm9, pch=".") plot(sda_predict(c("Prm1"))$Prm1, pca_predict(c("Prm1"))$Prm1, pch=".") plot(sda_predict(c("Acrv1"))$Acrv1, pca_predict(c("Acrv1"))$Acrv1, pch=".")
ggplot(cell_data[nmf_predict(c("Prdm9"))], aes(-PseudoTime, Prdm9)) + geom_point(stroke=0, alpha=0.3) + ggtitle("NNMF") ggplot(cell_data[sda_predict(c("Prdm9"))], aes(-PseudoTime, Prdm9)) + geom_point(stroke=0, alpha=0.3) + ggtitle("SDA") ggplot(cell_data[pca_predict(c("Prdm9"))], aes(-PseudoTime, Prdm9)) + geom_point(stroke=0, alpha=0.3) + ggtitle("PCA")
C <- chol(S <- toeplitz(.9 ^ (0:31))) set.seed(17) X <- matrix(rnorm(32000), 1000, 32) Z <- X %*% C pZ <- prcomp(Z, tol = 0.1) pZ2 <- flashpca(Z, ndim=14, stan="none", verbose=T, seed=42, do_loadings=T, divisor = "none") plot(pZ$rotation[,1], -pZ2$loadings[,1]);abline(0,1,col='red') all.equal(pZ$rotation[,1], -pZ2$loadings[,1]) plot(pZ$x[,1], -pZ2$projection[,1]);abline(0,1,col='red') plot(pZ$x[,1], -pZ2$vectors[,1]) all.equal(pZ$x[,1], -pZ2$projection[,1])
compare_factorisations(nnmf_decomp$H, SDAresults$loadings[[1]], names=c("NNMF","SDA")) compare_factorisations(t(pcaresult$loadings), SDAresults$loadings[[1]], names=c("PCA","SDA")) compare_factorisations(ica_decomp$A, SDAresults$loadings[[1]], names=c("ICA","SDA"))
compare_factorisations(SDAresults$loadings[[1]],nnmf_decomp5$H, names=c("SDA","NNMF"), method="pearson")
named_loadings <- SDAresults$loadings[[1]] rownames(named_loadings) <- paste0("Mix",1:50," ",names(sort(component_labels))) #paste0("Mix_",1:50)
pdf("../results/other_factorisations/NNMF_original.pdf", height=9, width=10) compare_factorisations(nnmf_decomp$H, named_loadings, names=c("NNMF","SDA"), method = "pearson") dev.off() compare_factorisations(t(pcaresult$loadings), SDAresults$loadings[[1]], names=c("PCA","SDA"), method = "pearson") compare_factorisations(ica_decomp$A, SDAresults$loadings[[1]], names=c("ICA","SDA"), method = "pearson")
rot_NNMF <- vegan::procrustes(t(SDAresults$loadings[[1]]), t(nnmf_decomp$H)) colnames(rot_NNMF$Yrot) <- paste0("V",1:50) pdf("../results/other_factorisations/NNMF_rotated.pdf", height=9, width=10) compare_factorisations(t(rot_NNMF$Yrot), named_loadings, c("NNMF Rotated","SDA"), method = "pearson") dev.off()
rot_named_loadings <- vegan::procrustes(t(nnmf_decomp$H), t(named_loadings)) colnames(rot_named_loadings$Yrot) <- paste0("V",1:50) pdf("../results/other_factorisations/SDA_rotated.pdf", height=9, width=10) compare_factorisations(t(rot_named_loadings$Yrot), nnmf_decomp$H, c("SDA Rotated","NNMF"), method = "pearson") dev.off()
corrs <- compare_factorisations(t(rot_NNMF$Yrot), named_loadings, c("NNMF Rotated","SDA"), method = "pearson", return="cor") heatmapgrob <- ggplot(melt(corrs[50:1,], value.name="Pearson_correlation"), aes(`NNMF Rotated`,SDA)) + geom_tile(aes(fill=Pearson_correlation)) + scale_fill_viridis() + theme_bw(base_size = 12) + theme(axis.text.x = element_text(angle = 90), legend.position = "bottom") saveRDS(heatmapgrob, "../data/plots/heatmapgrob.rds")
sda_load_promax <- promax(t(SDAresults$loadings[[1]])) nnmf_load_promax <- promax(t(nnmf_decomp$H)) ## compare_factorisations(t(nnmf_load_promax$loadings), t(sda_load_promax$loadings), c("NNMF","SDA"), method = "pearson") # compare promax to original compare_factorisations(t(sda_load_promax$loadings), named_loadings, c("NNMF","SDA"), method = "pearson") compare_factorisations(t(nnmf_load_promax$loadings), nnmf_decomp$H, c("NNMF","SDA"), method = "pearson") plot(sda_load_promax$loadings[,"V5"],SDAresults$loadings[[1]]["V5",]) plot(nnmf_load_promax$loadings[,"V50"],nnmf_decomp$H["V50",]) plot(rot_NNMF$Yrot[,"V5"], SDAresults$loadings[[1]]["V5",]) plot(nnmf_decomp$H["V50",],SDAresults$loadings[[1]]["V5",])
gene_cor <- numeric(length = length(colnames(SDAresults$loadings[[1]]))) for(i in seq_along(gene_cor)){ gene_cor[i] <- cor(nmf_predict(c(colnames(SDAresults$loadings[[1]])[i]))[,2][[1]], sda_predict(c(colnames(SDAresults$loadings[[1]])[i]))[,2][[1]]) } names(gene_cor) <- colnames(SDAresults$loadings[[1]]) saveRDS(gene_cor, "../data/imputation/NNMF_vs_SDA_Gene_correlations.rds") gene_cor <- readRDS("../data/imputation/NNMF_vs_SDA_Gene_correlations.rds") manhatten_plot(component_enrichment(names(head(sort(gene_cor),500))), topn = 14) manhatten_plot(component_enrichment(names(head(sort(gene_cor),500)), loadings=nnmf_decomp$H), topn = 14)
Gene Correlation without single cell components
named_SDAresults <- SDAresults rownames(named_SDAresults$loadings[[1]]) <- paste0("Mix",1:50," ",names(sort(component_labels))) colnames(named_SDAresults$scores) <- paste0("Mix",1:50," ",names(sort(component_labels))) sc_components <- grep("Single",rownames(named_SDAresults$loadings[[1]])) named_SDAresults$loadings[[1]] <- named_SDAresults$loadings[[1]][-sc_components,] named_SDAresults$scores <- named_SDAresults$scores[,-sc_components] gene_cor <- numeric(length = length(colnames(SDAresults$loadings[[1]]))) for(i in seq_along(gene_cor)){ gene_cor[i] <- cor(nmf_predict(c(colnames(SDAresults$loadings[[1]])[i]))[,2][[1]], sda_predict(c(colnames(SDAresults$loadings[[1]])[i]), factorisation = named_SDAresults)[,2][[1]]) } names(gene_cor) <- colnames(SDAresults$loadings[[1]]) saveRDS(gene_cor, "../data/imputation/NNMF_vs_SDA_Gene_correlations_sans_single_cell.rds") gene_cor <- readRDS("../data/imputation/NNMF_vs_SDA_Gene_correlations_sans_single_cell.rds") plot(sort(gene_cor)) head(sort(gene_cor)) str(rank(gene_cor)["Gfra1"]) manhatten_plot(component_enrichment(names(head(sort(gene_cor),500))), topn = 7) manhatten_plot(component_enrichment(names(head(sort(gene_cor),500)), loadings=nnmf_decomp$H, orderSDA = F), topn = 0)
blood_SDA <- tricolour_tsne(c("Cd3g","Csf1r")) + facet_zoom(xlim=c(-10,0), ylim=c(-6,-1)) + ggtitle("SDA Imputed Expression") + labs(y="t-SNE 2", x="t-SNE 1") blood_NNMF <- tricolour_tsne(c("Cd3g","Csf1r"), predict = "NNMF") + facet_zoom(xlim=c(-10,0), ylim=c(-6,-1)) + ggtitle("NNMF Imputed Expression") + labs(y="t-SNE 2", x="t-SNE 1") blood_SDA blood_NNMF pdf("../results/other_factorisations/blood_SDA.pdf", width = 7, height = 5) blood_SDA dev.off() pdf("../results/other_factorisations/blood_NNMF.pdf", width = 7, height = 5) blood_NNMF dev.off()
Scatter Plot
corss <- rbind(sda_predict(c("Cd3g","Csf1r")), nmf_predict(c("Cd3g","Csf1r"))) corss$Method <- "NNMF" corss[1:(nrow(corss)/2)]$Method <- "SDA" corss$rgb <- ternaryColours(corss, c("Cd3g","Csf1r")) ggplot(corss, aes(Csf1r, Cd3g, colour=rgb)) + scale_colour_identity() + geom_point() + facet_wrap(~Method, scales = "free") blood_SDA_vs_NNMF <- ggplot(corss, aes(Csf1r, Cd3g, colour=Method)) + geom_point(alpha=0.5) + scale_color_brewer(palette = "Set1") + theme_minimal() + theme(legend.position = "bottom") blood_SDA_vs_NNMF pdf("../results/other_factorisations/blood_SDA_vs_NNMF.pdf", width = 5, height = 5) blood_SDA_vs_NNMF dev.off()
tricolour_tsne(c("Gfra1","Upp1","Crabp1")) + facet_zoom(xlim=c(-5,5), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Crabp1"), predict = "NNMF") + facet_zoom(xlim=c(-5,5), ylim=c(35,43), zoom.size = 2) + ggtitle("NNMF Prediction") gonia_SDA <- tricolour_tsne(c("Gfra1","Lin28a","Nanos1")) + facet_zoom(xlim=c(-5,5), ylim=c(36,43), zoom.size = 2) + ggtitle("SDA Imputed Expression") + labs(y="t-SNE 2", x="t-SNE 1") gonia_SDA_zoom <- tricolour_tsne(c("Gfra1","Lin28a","Nanos1")) + xlim(c(-4,5)) + ylim(c(36,44)) + ggtitle("SDA Imputed Expression") + labs(y="t-SNE 2", x="t-SNE 1") gonia_NNMF <- tricolour_tsne(c("Gfra1","Lin28a","Nanos1"), predict = "NNMF") + facet_zoom(xlim=c(-4,5), ylim=c(36,44), zoom.size = 2) + ggtitle("NNMF Imputed Expression") + labs(y="t-SNE 2", x="t-SNE 1") # ccnd2 or gfra1 # upp1 or foxf1 # ccna2 or crabp1 gonia_SDA gonia_NNMF pdf("../results/other_factorisations/Spermatogonia_SDA.pdf", width = 7, height = 5) gonia_SDA dev.off() pdf("../results/other_factorisations/Spermatogonia_NNMF.pdf", width = 7, height = 5) gonia_NNMF dev.off()
Scatter plot
corss <- rbind(sda_predict(c("Gfra1","Upp1")), nmf_predict(c("Gfra1","Upp1"))) corss$Method <- "NNMF" corss[1:(nrow(corss)/2)]$Method <- "SDA" corss$rgb <- ternaryColours(corss, c("Gfra1","Upp1")) ggplot(corss, aes(Upp1, Gfra1, colour=rgb)) + scale_colour_identity() + geom_point() + facet_wrap(~Method, scales = "free") ggplot(corss, aes(Upp1, Gfra1, colour=Method)) + geom_point() + scale_color_brewer(palette = "Set1") + theme_minimal()
corss <- rbind(sda_predict(c("Igfbp5","Upp1")), nmf_predict(c("Igfbp5","Upp1"))) corss$Method <- "NNMF" corss[1:(nrow(corss)/2)]$Method <- "SDA" ggplot(corss, aes(Upp1, Igfbp5, colour=Method)) + geom_point() + scale_color_brewer(palette = "Set1") + theme_minimal() corss <- rbind(sda_predict(c("Gm14019","Upp1")), nmf_predict(c("Gm14019","Upp1"))) corss$Method <- "NNMF" corss[1:(nrow(corss)/2)]$Method <- "SDA" ggplot(corss, aes(Upp1, Gm14019, colour=Method)) + geom_point() + scale_color_brewer(palette = "Set1") + theme_minimal()
100 component NNMF versions
tricolour_tsne(c("Cd3g","Csf1r"), predict = "NNMF2") + facet_zoom(xlim=c(-10,0), ylim=c(-6,-1)) + ggtitle("NNMF 100C Prediction") tricolour_tsne(c("Gfra1","Upp1","Crabp1"), predict = "NNMF2") + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("NNMF 100C Prediction")
(with expression as colour)
#plot(nnmf_decomp$W[,"V4"][cell_data[order(experiment)]$cell], pch=".") tmp <- tricolour_tsne(c("Cd3g","Csf1r"), returndf = T, predict = F) ggplot(tmp, aes(V3,V11,colour=rgb)) + geom_point() + scale_colour_identity() + theme_dark() + annotate("text", Inf, Inf, hjust = 2, vjust = 2, label = "Cd3g", colour="red", fontface="bold") + annotate("text", Inf, Inf, hjust = 2, vjust = 4, label = "Csf1r", colour="blue", fontface="bold") tmp$NNMF_V4 <- nnmf_decomp$W[,"V4"][cell_data$cell] tmp$NNMF_V11 <- nnmf_decomp$W[,"V11"][cell_data$cell] tmp$NNMF_V34 <- nnmf_decomp$W[,"V34"][cell_data$cell] ggplot(tmp[order(Cd3g/2+Csf1r)], aes(NNMF_V4,NNMF_V11,colour=rgb)) + geom_point() + scale_colour_identity() + theme_dark() + annotate("text", Inf, Inf, hjust = 2, vjust = 2, label = "Cd3g", colour="red", fontface="bold") + annotate("text", Inf, Inf, hjust = 2, vjust = 4, label = "Csf1r", colour="blue", fontface="bold")
ggplot(tmp[order(Cd3g/2+Csf1r)], aes(NNMF_V4,NNMF_V11,colour=rgb)) + geom_point() + scale_colour_identity() + theme_dark() + annotate("text", Inf, Inf, hjust = 2, vjust = 2, label = "Cd3g", colour="red", fontface="bold") + annotate("text", Inf, Inf, hjust = 2, vjust = 4, label = "Csf1r", colour="blue", fontface="bold") tmp$NNMF_Crgb <- ternaryColours(tmp, c("NNMF_V4","NNMF_V11","NNMF_V34")) ggplot(tmp, aes(Tsne1_QC1, Tsne2_QC1)) + geom_point(aes(colour=NNMF_Crgb), stroke=0, size=1) + scale_color_identity() + theme_dark() + facet_zoom(xlim=c(-10,0), ylim=c(-6,-1), zoom.size = 1) ggplot(tmp[order(Cd3g)], aes(Tsne1_QC1, Tsne2_QC1)) + geom_point(aes(colour=Cd3g), stroke=0, size=1) + scale_color_viridis(direction = -1) + facet_zoom(xlim=c(-10,0), ylim=c(-6,-1), zoom.size = 1)
tmp$V3L <- asinh(tmp$V3) tmp$V11L <- asinh(tmp$V11) tmp$SDA_Crgb <- ternaryColours(tmp, c("V11L","V3L")) ggplot(tmp, aes(Tsne1_QC1, Tsne2_QC1)) + geom_point(aes(colour=SDA_Crgb), stroke=0, size=1) + scale_color_identity() + theme_dark() + facet_zoom(xlim=c(-9.5,0), ylim=c(-5.5,-1), zoom.size = 1) + annotate("text", Inf, Inf, hjust = 1.5, vjust = 2, label = "SDA_C11", colour="red", fontface="bold") + annotate("text", Inf, Inf, hjust = 1.5, vjust = 4, label = "SDA_C3", colour="blue", fontface="bold")
tricolour_tsne(c("Cd52","Tyrobp"), predict = F, ptsize = 1) + facet_zoom(xlim=c(-10,0), ylim=c(-6,-1), zoom.size = 1)
etc_enrich_nnmf[order(p.value)][1:5] etc_enrich[order(p.value)][1:5] manhatten_plot(etc_enrich, legend_position = c(0.2,0.8)) manhatten_plot(etc_enrich_nnmf, legend_position = c(0.2,0.8)) plot(single_component_enrichment(nnmf_decomp$H["V46",], etc_genes_clean, threshold = 500)$ranks) plot(single_component_enrichment(SDAresults$loadings[[1]][9,], etc_genes_clean, threshold = 500, pos = F)$ranks) plot(single_component_enrichment(nnmf_decomp$H["V46",], etc_genes_clean, threshold = 500)$ranks, single_component_enrichment(SDAresults$loadings[[1]][9,], etc_genes_clean, threshold = 500, pos = F)$ranks)
nnmf_ranks <- single_component_enrichment(nnmf_decomp$H["V46",], etc_genes_clean, threshold = 500)$ranks sda_ranks <- single_component_enrichment(SDAresults$loadings[[1]][9,], etc_genes_clean, threshold = 500, pos = F)$ranks plot(nnmf_ranks, sda_ranks) abline(0,1) plot(nnmf_ranks / sda_ranks, ylab="Ratio rank NNMF/SDA") abline(h=1) qplot(1:length(nnmf_ranks), nnmf_ranks[order(names(nnmf_ranks))] / sda_ranks[order(names(sda_ranks))], ylab="Ratio paired rank NNMF/SDA") + scale_y_log10() + geom_hline(yintercept = 1)
kojima_stra8_enrich <- component_enrichment(kojima_stra8_chip) kojima_stra8_enrich_nnmf <- component_enrichment(kojima_stra8_chip, loadings_matrix = nnmf_decomp$H, orderSDA = F) kojima_stra8_enrich[order(p.value)][1:5] kojima_stra8_enrich_nnmf[order(p.value)][1:5] manhatten_plot(kojima_stra8_enrich, legend_position = c(0.2,0.8)) manhatten_plot(kojima_stra8_enrich_nnmf, legend_position = c(0.2,0.8)) plot(single_component_enrichment(SDAresults$loadings[[1]][5,], kojima_stra8_chip, threshold = 500, pos = F)$ranks) plot(single_component_enrichment(nnmf_decomp$H["V50",], kojima_stra8_chip, threshold = 500)$ranks)
Meiotic Sex Chromosome Activation in Hormad1 KO
In NNMF this effect bleeds into the WT cells
# plot(SDAresults$scores[cell_data[order(-PseudoTime)]$cell,"V38"],col=rgb(0,0,0,1-as.double(grepl("Hormad",cell_data[order(-PseudoTime)]$experiment))),xlim=c(0,5000),ylim=c(-5,20)) # plot(nnmf_decomp$W[cell_data[order(-PseudoTime)]$cell,"V3"],col=rgb(0,0,0,1-as.double(grepl("Hormad",cell_data[order(-PseudoTime)]$experiment))),xlim=c(0,5000)) # # plot(SDAresults$scores[cell_data[order(-PseudoTime)]$cell,"V38"],col=1+as.double(grepl("Hormad",cell_data[order(-PseudoTime)]$experiment)),xlim=c(0,5000),ylim=c(-5,20)) # plot(nnmf_decomp$W[cell_data[order(-PseudoTime)]$cell,"V3"],col=1+as.double(grepl("Hormad",cell_data[order(-PseudoTime)]$experiment)),xlim=c(0,5000)) tmp <- data.table("NNMF"=nnmf_decomp$W[cell_data[order(-PseudoTime)]$cell,"V3"], "SDA"=SDAresults$scores[cell_data[order(-PseudoTime)]$cell,"V38"], "Hormad1KO"=grepl("Hormad",cell_data[order(-PseudoTime)]$experiment), "PseudoTime" = cell_data[order(-PseudoTime)]$PseudoTime, "cell" = cell_data[order(-PseudoTime)]$cell, "msci_ratio" = cell_data[order(-PseudoTime)]$msci_ratio) tmp[Hormad1KO==TRUE, Hormad_Genotype := "Hormad -/-"] tmp[Hormad1KO==FALSE, Hormad_Genotype := "Hormad WT"] tmp <- melt(tmp[PseudoTime>14500], id.vars = c("cell","msci_ratio","PseudoTime","Hormad1KO","Hormad_Genotype"), variable.name = "Method", value.name = "Cell_Score") hormad_cell_score <- ggplot(tmp, aes(-PseudoTime, Cell_Score, colour=Method)) + geom_point(alpha=0.5, stroke=0) + facet_grid(Hormad_Genotype~Method, scales="free_y") + scale_color_brewer(palette = "Set1", guide=FALSE) + geom_smooth(colour="black") + theme_bw() pdf("../results/other_factorisations/MSCA_Cell_Score.pdf", width = 7) hormad_cell_score dev.off()
hormad_gene <- "Rhox2h" cell_data[grepl("Hormad",experiment), Hormad_Genotype := "Hormad -/-"] cell_data[!grepl("Hormad",experiment), Hormad_Genotype := "Hormad WT"] tmp <- cell_data[nmf_predict(hormad_gene,name_extension = "_NNMF")][sda_predict(hormad_gene, name_extension = "_SDA")] tmp <- melt(tmp[PseudoTime>14500],id.vars = c("cell","PseudoTime","Hormad_Genotype"), measure.vars = paste0(hormad_gene,"_",c("NNMF","SDA")), variable.name = "Method", value.name = hormad_gene) tmp[,Method := gsub(paste0(hormad_gene,"_"),"",Method)] Rhox2h_prediction <- ggplot(tmp, aes(-PseudoTime, get(hormad_gene), colour=Method)) + geom_point(alpha=0.5, stroke=0) + facet_grid(Hormad_Genotype~Method, scales="free_y") + scale_color_brewer(palette = "Set1", guide=FALSE) + geom_smooth(colour="black") + ylab(paste(hormad_gene,"Imputed Expression")) + scale_x_continuous(breaks = c(-15000,-16000)) + theme_bw() Rhox2h_prediction pdf("../results/other_factorisations/MSCA_Rhox2h.pdf", width = 7) Rhox2h_prediction dev.off()
With 100 NNMF components
Better
cell_data[grepl("Hormad1_3",experiment), Genotype := "2"] cell_data[!grepl("Hormad1_3",experiment), Genotype := "1"] tmp <- cell_data[nmf_predict(c("Rhox2h"),nnmf_decomp5, name_extension = "_NNMF")][sda_predict(c("Rhox2h"), name_extension = "_SDA")] tmp <- melt(tmp[PseudoTime>14500],id.vars = c("cell","PseudoTime","Hormad_Genotype"), measure.vars = c("Rhox2h_NNMF","Rhox2h_SDA"), variable.name = "Method", value.name = "Rhox2h") #tmp[,Method := gsub("Rhox2h_","",Method)] Rhox2h_prediction <- ggplot(tmp, aes(-PseudoTime, Rhox2h, colour=Method)) + geom_point(alpha=0.5) + facet_grid(Hormad_Genotype~Method, scales="free_y") + scale_color_brewer(palette = "Set1") + geom_smooth(colour="black") + theme_bw() Rhox2h_prediction
too slow didn't finish
library(data.table) data <- fread("../data/conradV3/V3_SDAmerged_mouse_V3_SDA.data") dimnames_conradV3 <- readRDS("../data/conradV3/V3_SDAmerged_mouse_V3_SDA_dimnames.rds") data <- as.matrix(data) rownames(data) <- dimnames_conradV3[[1]] colnames(data) <- dimnames_conradV3[[2]] rfa <- factanal(data, factors = 50, rotation = "promax")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.