In MyersGroup/testisAtlas: testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq

NNMF

library(data.table)
data <- fread("../data/conradV3/V3_SDAmerged_mouse_V3_SDA.data")

dimnames_conradV3 <- readRDS("../data/conradV3/V3_SDAmerged_mouse_V3_SDA_dimnames.rds")
data <- as.matrix(data)
rownames(data) <- dimnames_conradV3[[1]]
colnames(data) <- dimnames_conradV3[[2]]



Sys.time()
nnmf_decomp <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5)
Sys.time()
nnmf_decomp

saveRDS(nnmf_decomp,"../data/conradV3/nnmf_decomp_v1.rds")

#nnmf_decomp$run.time["user.self"]/60
# 27.5 mins

gc()

# Try with different Beta values for regularisation

Sys.time()
nnmf_decomp2 <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, beta = c(0,0,10))
Sys.time()

saveRDS(nnmf_decomp2,"../data/conradV3/nnmf_decomp_v2.rds")


Sys.time()
nnmf_decomp3 <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, beta = c(0,0,1e4))
Sys.time()

saveRDS(nnmf_decomp3,"../data/conradV3/nnmf_decomp_v3.rds")

Sys.time()
nnmf_decomp4 <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, beta = c(0,0,1e3))
Sys.time()

saveRDS(nnmf_decomp4,"../data/conradV3/nnmf_decomp_v4.rds")


Sys.time()
nnmf_decomp5 <- NNLM::nnmf(as.matrix(data), 100, rel.tol = 1e-5)
Sys.time()

saveRDS(nnmf_decomp5,"../data/conradV3/nnmf_decomp_v5.rds")


Sys.time()
nnmf_decomp <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, n.threads = 15, loss="mkl")
Sys.time()
nnmf_decomp

saveRDS(nnmf_decomp,"../data/conradV3/nnmf_decomp_v6.rds")

[1] "2019-02-26 22:27:55 GMT" nnmf_decomp <- nnmf(as.matrix(data), 50, rel.tol = 1e-5)

0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| ********|

Sys.time() [1] "2019-02-26 22:55:02 GMT" nnmf_decomp Non-negative matrix factorization: Algorithm: Sequential coordinate-wise descent Loss: Mean squared error MSE: 0.6380184 MKL: 0.3526576 Target: 0.3190092 Rel. tol.: 9.69e-06 Total epochs: 1605

Interation: 39

Running time: user system elapsed 1653.837 194.547 1622.978

[1] "2019-02-27 19:35:29 GMT"

nnmf_decomp2 <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, beta = c(0,0,10)) 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| ********| Sys.time() [1] "2019-02-27 20:11:48 GMT"

Sys.time() [1] "2019-03-26 17:38:30 GMT"

nnmf_decomp <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5) 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----|

********|

Sys.time() [1] "2019-03-26 18:16:34 GMT" gc() used (Mb) gc trigger (Mb) max used (Mb) Ncells 427117 22.9 878160 46.9 878160 46.9 Vcells 394326611 3008.5 1129838049 8620.0 1371726427 10465.5

Sys.time() [1] "2019-03-27 02:06:05 GMT"

nnmf_decomp5 <- NNLM::nnmf(as.matrix(data), 100, rel.tol = 1e-5) 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| ********| Sys.time() [1] "2019-03-27 03:14:12 GMT"

with 15 threads

[1] "2019-04-05 15:49:08 BST"

nnmf_decomp <- NNLM::nnmf(as.matrix(data), 50, rel.tol = 1e-5, n.threads = 15) 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| ********| Sys.time() [1] "2019-04-05 15:59:28 BST" nnmf_decomp Non-negative matrix factorization: Algorithm: Sequential coordinate-wise descent Loss: Mean squared error MSE: 0.6379496 MKL: 0.3527002 Target: 0.3189748 Rel. tol.: 7.92e-06 Total epochs: 1856

Interation: 45

Running time: user system elapsed 1897.522 164.514 618.858

dimnames_conradV3 <- readRDS("../data/conradV3/V3_SDAmerged_mouse_V3_SDA_dimnames.rds")
rownames(nnmf_decomp$W) <- dimnames_conradV3[[1]]
colnames(nnmf_decomp$W) <- paste0("V",1:50)

colnames(nnmf_decomp$H) <- dimnames_conradV3[[2]]
rownames(nnmf_decomp$H) <- paste0("V",1:50)

ICA

ICA_decomp <- fastICA::fastICA(data, n.comp = 50, verbose = TRUE)

# a <- Sys.time()
# ICA_decomp <- fastICA::fastICA(data, n.comp = 50, verbose = TRUE, method="C")
# b <- Sys.time()

# Error in fastICA::fastICA(as.matrix(normalised_data), n.comp = 50, verbose = TRUE,  :
#   svd on 19262 x 197201408 exceeds Fortran indexing limits

saveRDS(ICA_decomp,"../data/conradV3/ICA_decomp_v1.rds")

PCA

library(flashpcaR)
pcaresult <- flashpca(as.matrix(data), ndim=50, stan="none", verbose=T, seed=42, do_loadings=T, divisor = "none")

rownames(pcaresult$loadings) <- dimnames_conradV3[[2]]
colnames(pcaresult$loadings) <- paste0("V",1:50)

rownames(pcaresult$vectors) <- dimnames_conradV3[[1]]
colnames(pcaresult$vectors) <- paste0("V",1:50)

rownames(pcaresult$projection) <- rownames(pcaresult$vectors)
colnames(pcaresult$projection) <- paste0("V",1:50)


saveRDS(pcaresult, "../data/conradV3/pcaresultV1.rds")

Load results

nnmf_decomp <- readRDS("../data/alt_factorisations/nnmf_decomp_v1.rds")

nnmf_decomp2 <- readRDS("../data/alt_factorisations/nnmf_decomp_v2.rds")

nnmf_decomp3 <- readRDS("../data/alt_factorisations/nnmf_decomp_v3.rds")

nnmf_decomp4 <- readRDS("../data/alt_factorisations/nnmf_decomp_v4.rds")

nnmf_decomp5 <- readRDS("../data/alt_factorisations/nnmf_decomp_v5.rds")


rownames(nnmf_decomp2$H) <- paste0("V",1:50)
rownames(nnmf_decomp3$H) <- paste0("V",1:50)
rownames(nnmf_decomp4$H) <- paste0("V",1:50)
rownames(nnmf_decomp5$H) <- paste0("V",1:100)


pcaresult <- readRDS("../data/alt_factorisations/pcaresultV1.rds")

# X = SA
ica_decomp <- readRDS("../data/alt_factorisations/ICA_decomp_v1.rds")
ica_decomp$X <- NULL
rownames(ica_decomp$K) <- colnames(ica_decomp$A) <- colnames(nnmf_decomp$H)
colnames(ica_decomp$K) <- rownames(ica_decomp$A) <- paste0("V",1:50)

compare sparsity of gene loadings

threshold = exp(seq(-10,0,0.1))

loading_sparsity <- data.table(threshold,
                               t(sapply(threshold, function(x) c("NNMF"=mean(abs(nnmf_decomp$H)/max(abs(nnmf_decomp$H)) < x),
                                                                "SDA"=mean(abs(SDAresults$loadings[[1]])/max(abs(SDAresults$loadings[[1]])) < x),
                                                                "PCA"=mean(abs(pcaresult$loadings)/max(abs(pcaresult$loadings)) < x),
                                                                "ICA"=mean(abs(ica_decomp$A)/max(abs(ica_decomp$A)) < x)))))


loading_sparsity <- melt(loading_sparsity, id.vars = "threshold", variable.name = "Method", value.name = "Fraction")

gene_loading_sparsity <- ggplot(loading_sparsity, aes(threshold, Fraction, colour=Method)) +
  geom_line() +
  ylab("Fraction of absolute gene\n loadings below threshold") + 
  xlab("Scaled Threshold \n(threshold * max(abs(loadings)))") +
  scale_x_continuous(trans = reverselog_trans())+
  scale_color_brewer(palette = "Set1") +
  theme_minimal() +
  theme(legend.position = "none") + 
  geom_text_repel(data=loading_sparsity[threshold==threshold[45]], aes(label = Method),
                  nudge_x = c(0.5,0.5,-0.5,0.5), nudge_y = 0.02,
                  na.rm = TRUE, segment.colour='darkgrey', colour="black")

gene_loading_sparsity

saveRDS(gene_loading_sparsity, "../data/plots/gene_loading_sparsity.rds")

pdf("../results/other_factorisations/gene_loading_sparsity.pdf", width = 5, height = 5)
gene_loading_sparsity
dev.off()

Non log scale version

gene_loading_sparsity + scale_x_continuous() + coord_flip()

Cell score sparcity

NMF is highest as expected

threshold = exp(seq(-12,0,0.1))

cellscore_sparsity <- data.table(threshold,
                               t(sapply(threshold, function(x) c("PCA"=mean(abs(pcaresult$projection)/max(abs(pcaresult$projection)) < x),
                                                                 "ICA"=mean(abs(ica_decomp$S)/max(abs(ica_decomp$S)) < x),
                                                                 "SDA"=mean(abs(SDAresults$scores)/max(abs(SDAresults$scores)) < x),
                                                                 "NNMF"=mean(abs(nnmf_decomp$W)/max(abs(nnmf_decomp$W)) < x)))))

cellscore_sparsity <- melt(cellscore_sparsity, id.vars = "threshold", variable.name = "Method", value.name = "Fraction")

cellscore_sparsity_p <- ggplot(cellscore_sparsity, aes(threshold, Fraction, colour=Method)) +
  geom_line() +
  ylab("Fraction of absolute cell scores below threshold") + 
  xlab("Scaled Threshold (t * max(abs(cell_score)))") +
  scale_x_continuous(trans = reverselog_trans())+
  scale_color_brewer(palette = "Set1") +
  theme_minimal() +
  theme(legend.position = "none")+
  geom_text_repel(data=cellscore_sparsity[threshold==threshold[70]], aes(label = Method),
                  nudge_x = c(-0.5,0.5,1,-1.5),
                  na.rm = TRUE, segment.colour='darkgrey', colour="black")

cellscore_sparsity_p

pdf("../results/other_factorisations/cell_score_sparsity.pdf", width = 5, height = 5)
cellscore_sparsity_p
dev.off()

Compare Prdm9 Component Loadings

Find which component is the Prdm9 one

sort(nnmf_decomp$H[,"Prdm9"])
sort(ica_decomp$A[,"Prdm9"])
sort(pcaresult$loadings["Prdm9",])

head(sort(-nnmf_decomp$H[50,]),20)
head(sort(ica_decomp$A[15,]),20)
head(sort(pcaresult$loadings[,"V14"], F),20)

Compare Leptotene Loadings

SDAtools::genome_loadings(SDAresults$loadings[[1]][5,], label_both = T, max.items = 20, label.size = 3,
                                     gene_locations = gene_annotations, hide_unknown = T) +
  ylab("Gene Loadings (SDA Component 5)") +
  scale_y_reverse()

genome_loadings(nnmf_decomp$H["V50",], label_both = FALSE, max.items = 20, label.size = 3,
                                     gene_locations = gene_annotations, hide_unknown = T) +
  ylab("Gene Loadings (NNMF Component 50)")

SDAtools::genome_loadings(pcaresult$loadings[,"V14"], label_both = T, max.items = 20, label.size = 3,
                                     gene_locations = gene_annotations, hide_unknown = T) +
  ylab("Gene Loadings (PCA Component 14)") +
  scale_y_reverse()

SDAtools::genome_loadings(ica_decomp$A[15,], label_both = F, max.items = 20, label.size = 3,
                                     gene_locations = gene_annotations, hide_unknown = T) +
  ylab("Gene Loadings (ICA Component 15)") +
  scale_y_reverse()

Compare X Chrom Activation Loadings

SDAtools::genome_loadings(SDAresults$loadings[[1]][38,], label_both = T, max.items = 20, label.size = 3,
                                     gene_locations = gene_annotations, hide_unknown = T) +
  ylab("Gene Loadings (SDA Component 38)")

genome_loadings(nnmf_decomp$H["V3",], label_both = FALSE, max.items = 20, label.size = 3,
                                     gene_locations = gene_annotations, hide_unknown = T) +
  ylab("Gene Loadings (NNMF Component 3)")

All NMF Loadings

To check if anything interesting looking

for(i in 1:50){
  print(genome_loadings(nnmf_decomp$H[i,], label_both = FALSE, max.items = 20, label.size = 3,
                                     gene_locations = gene_annotations, hide_unknown = T) +
  ylab(i))
}

compare NNMF L1 sparsity

Seems to just make whole components 0? not what I wanted

plot(rowSums(nnmf_decomp$H))
plot(rowSums(nnmf_decomp2$H))
plot(rowSums(nnmf_decomp3$H))

plot(nnmf_decomp$H[,"Prdm9"])
plot(nnmf_decomp3$H[,"Prdm9"])

plot(sort(nnmf_decomp$H[50,]), pch='.')
plot(sort(nnmf_decomp3$H[28,]), pch='.')

plot(sort(rowSums(nnmf_decomp$H)))
plot(sort(rowSums(nnmf_decomp3$H)))

Imputation

Very similar predictions

plot(sda_predict(c("Prdm9"))$Prdm9, nmf_predict(c("Prdm9"))$Prdm9, pch=".")
plot(sda_predict(c("Prdm9"))$Prdm9, pca_predict(c("Prdm9"))$Prdm9, pch=".")
plot(sda_predict(c("Prm1"))$Prm1, pca_predict(c("Prm1"))$Prm1, pch=".")
plot(sda_predict(c("Acrv1"))$Acrv1, pca_predict(c("Acrv1"))$Acrv1, pch=".")

ggplot(cell_data[nmf_predict(c("Prdm9"))], aes(-PseudoTime, Prdm9)) + geom_point(stroke=0, alpha=0.3) + ggtitle("NNMF")
ggplot(cell_data[sda_predict(c("Prdm9"))], aes(-PseudoTime, Prdm9)) + geom_point(stroke=0, alpha=0.3) + ggtitle("SDA")
ggplot(cell_data[pca_predict(c("Prdm9"))], aes(-PseudoTime, Prdm9)) + geom_point(stroke=0, alpha=0.3) + ggtitle("PCA")

test PCA implementaitons

C <- chol(S <- toeplitz(.9 ^ (0:31)))
set.seed(17)
X <- matrix(rnorm(32000), 1000, 32)
Z <- X %*% C
pZ <- prcomp(Z, tol = 0.1)


pZ2 <- flashpca(Z, ndim=14, stan="none", verbose=T, seed=42, do_loadings=T, divisor = "none")

plot(pZ$rotation[,1], -pZ2$loadings[,1]);abline(0,1,col='red')
all.equal(pZ$rotation[,1], -pZ2$loadings[,1])

plot(pZ$x[,1], -pZ2$projection[,1]);abline(0,1,col='red')
plot(pZ$x[,1], -pZ2$vectors[,1])
all.equal(pZ$x[,1], -pZ2$projection[,1])

Correlations

compare_factorisations(nnmf_decomp$H, SDAresults$loadings[[1]], names=c("NNMF","SDA"))
compare_factorisations(t(pcaresult$loadings), SDAresults$loadings[[1]], names=c("PCA","SDA"))
compare_factorisations(ica_decomp$A, SDAresults$loadings[[1]], names=c("ICA","SDA"))

100 componentns

compare_factorisations(SDAresults$loadings[[1]],nnmf_decomp5$H, names=c("SDA","NNMF"), method="pearson")

Pearson

named_loadings <- SDAresults$loadings[[1]]

rownames(named_loadings) <- paste0("Mix",1:50," ",names(sort(component_labels))) #paste0("Mix_",1:50)

pdf("../results/other_factorisations/NNMF_original.pdf", height=9, width=10)
compare_factorisations(nnmf_decomp$H, named_loadings, names=c("NNMF","SDA"), method = "pearson")
dev.off()

compare_factorisations(t(pcaresult$loadings), SDAresults$loadings[[1]], names=c("PCA","SDA"), method = "pearson")
compare_factorisations(ica_decomp$A, SDAresults$loadings[[1]], names=c("ICA","SDA"), method = "pearson")

Rotated NNMF

rot_NNMF <- vegan::procrustes(t(SDAresults$loadings[[1]]), t(nnmf_decomp$H))

colnames(rot_NNMF$Yrot) <- paste0("V",1:50)


pdf("../results/other_factorisations/NNMF_rotated.pdf", height=9, width=10)
compare_factorisations(t(rot_NNMF$Yrot),
                       named_loadings,
                       c("NNMF Rotated","SDA"),
                       method = "pearson")
dev.off()

Rotated SDA

rot_named_loadings <- vegan::procrustes(t(nnmf_decomp$H), t(named_loadings))

colnames(rot_named_loadings$Yrot) <- paste0("V",1:50)


pdf("../results/other_factorisations/SDA_rotated.pdf", height=9, width=10)
compare_factorisations(t(rot_named_loadings$Yrot),
                       nnmf_decomp$H,
                       c("SDA Rotated","NNMF"),
                       method = "pearson")
dev.off()

GGplot version

corrs <- compare_factorisations(t(rot_NNMF$Yrot),
                       named_loadings,
                       c("NNMF Rotated","SDA"),
                       method = "pearson", return="cor")

heatmapgrob <- ggplot(melt(corrs[50:1,], value.name="Pearson_correlation"), aes(`NNMF Rotated`,SDA)) +
  geom_tile(aes(fill=Pearson_correlation)) +
  scale_fill_viridis() +
  theme_bw(base_size = 12) +
  theme(axis.text.x = element_text(angle = 90), legend.position = "bottom")

saveRDS(heatmapgrob, "../data/plots/heatmapgrob.rds")

Compare Promax

sda_load_promax <- promax(t(SDAresults$loadings[[1]]))
nnmf_load_promax <- promax(t(nnmf_decomp$H))

##


compare_factorisations(t(nnmf_load_promax$loadings),
                       t(sda_load_promax$loadings),
                       c("NNMF","SDA"),
                       method = "pearson")

# compare promax to original

compare_factorisations(t(sda_load_promax$loadings),
                       named_loadings,
                       c("NNMF","SDA"),
                       method = "pearson")

compare_factorisations(t(nnmf_load_promax$loadings),
                       nnmf_decomp$H,
                       c("NNMF","SDA"),
                       method = "pearson")

plot(sda_load_promax$loadings[,"V5"],SDAresults$loadings[[1]]["V5",])
plot(nnmf_load_promax$loadings[,"V50"],nnmf_decomp$H["V50",])

plot(rot_NNMF$Yrot[,"V5"], SDAresults$loadings[[1]]["V5",])
plot(nnmf_decomp$H["V50",],SDAresults$loadings[[1]]["V5",])

Expression predictions

Correlate imputed expression NNMF vs SDA

gene_cor <- numeric(length = length(colnames(SDAresults$loadings[[1]])))

for(i in seq_along(gene_cor)){
  gene_cor[i] <- cor(nmf_predict(c(colnames(SDAresults$loadings[[1]])[i]))[,2][[1]],
                     sda_predict(c(colnames(SDAresults$loadings[[1]])[i]))[,2][[1]])
}

names(gene_cor) <- colnames(SDAresults$loadings[[1]])

saveRDS(gene_cor, "../data/imputation/NNMF_vs_SDA_Gene_correlations.rds")

gene_cor <- readRDS("../data/imputation/NNMF_vs_SDA_Gene_correlations.rds")

manhatten_plot(component_enrichment(names(head(sort(gene_cor),500))), topn = 14)
manhatten_plot(component_enrichment(names(head(sort(gene_cor),500)), loadings=nnmf_decomp$H), topn = 14)

Gene Correlation without single cell components

named_SDAresults <- SDAresults

rownames(named_SDAresults$loadings[[1]]) <- paste0("Mix",1:50," ",names(sort(component_labels)))
colnames(named_SDAresults$scores) <- paste0("Mix",1:50," ",names(sort(component_labels)))

sc_components <- grep("Single",rownames(named_SDAresults$loadings[[1]]))

named_SDAresults$loadings[[1]] <- named_SDAresults$loadings[[1]][-sc_components,]
named_SDAresults$scores <- named_SDAresults$scores[,-sc_components]

gene_cor <- numeric(length = length(colnames(SDAresults$loadings[[1]])))

for(i in seq_along(gene_cor)){
  gene_cor[i] <- cor(nmf_predict(c(colnames(SDAresults$loadings[[1]])[i]))[,2][[1]],
                     sda_predict(c(colnames(SDAresults$loadings[[1]])[i]), factorisation = named_SDAresults)[,2][[1]])
}

names(gene_cor) <- colnames(SDAresults$loadings[[1]])

saveRDS(gene_cor, "../data/imputation/NNMF_vs_SDA_Gene_correlations_sans_single_cell.rds")

gene_cor <- readRDS("../data/imputation/NNMF_vs_SDA_Gene_correlations_sans_single_cell.rds")

plot(sort(gene_cor))
head(sort(gene_cor))
str(rank(gene_cor)["Gfra1"])
manhatten_plot(component_enrichment(names(head(sort(gene_cor),500))), topn = 7)
manhatten_plot(component_enrichment(names(head(sort(gene_cor),500)),
                                    loadings=nnmf_decomp$H, orderSDA = F), topn = 0)

Blood

blood_SDA <- tricolour_tsne(c("Cd3g","Csf1r")) +
  facet_zoom(xlim=c(-10,0), ylim=c(-6,-1)) +
  ggtitle("SDA Imputed Expression") + labs(y="t-SNE 2", x="t-SNE 1")

blood_NNMF <- tricolour_tsne(c("Cd3g","Csf1r"), predict = "NNMF") +
  facet_zoom(xlim=c(-10,0), ylim=c(-6,-1)) +
  ggtitle("NNMF Imputed Expression") + labs(y="t-SNE 2", x="t-SNE 1")

blood_SDA
blood_NNMF

pdf("../results/other_factorisations/blood_SDA.pdf", width = 7, height = 5)
blood_SDA
dev.off()

pdf("../results/other_factorisations/blood_NNMF.pdf", width = 7, height = 5)
blood_NNMF
dev.off()

Scatter Plot

corss <- rbind(sda_predict(c("Cd3g","Csf1r")), nmf_predict(c("Cd3g","Csf1r")))
corss$Method <- "NNMF"
corss[1:(nrow(corss)/2)]$Method <- "SDA"

corss$rgb <- ternaryColours(corss, c("Cd3g","Csf1r"))

ggplot(corss, aes(Csf1r, Cd3g, colour=rgb)) + scale_colour_identity() + geom_point() + facet_wrap(~Method, scales = "free")

blood_SDA_vs_NNMF <- ggplot(corss, aes(Csf1r, Cd3g, colour=Method)) +
  geom_point(alpha=0.5) +
  scale_color_brewer(palette = "Set1") +
  theme_minimal() + theme(legend.position = "bottom")

blood_SDA_vs_NNMF

pdf("../results/other_factorisations/blood_SDA_vs_NNMF.pdf", width = 5, height = 5)
blood_SDA_vs_NNMF
dev.off()

Stem Cells

tricolour_tsne(c("Gfra1","Upp1","Crabp1")) +
  facet_zoom(xlim=c(-5,5), ylim=c(35,43), zoom.size = 2) +
  ggtitle("SDA Prediction")

tricolour_tsne(c("Gfra1","Upp1","Crabp1"), predict = "NNMF") +
  facet_zoom(xlim=c(-5,5), ylim=c(35,43), zoom.size = 2) +
  ggtitle("NNMF Prediction")

gonia_SDA <- tricolour_tsne(c("Gfra1","Lin28a","Nanos1")) +
  facet_zoom(xlim=c(-5,5), ylim=c(36,43), zoom.size = 2) +
  ggtitle("SDA Imputed Expression") + labs(y="t-SNE 2", x="t-SNE 1")

gonia_SDA_zoom <- tricolour_tsne(c("Gfra1","Lin28a","Nanos1")) +
  xlim(c(-4,5)) + ylim(c(36,44)) +
  ggtitle("SDA Imputed Expression") + labs(y="t-SNE 2", x="t-SNE 1")

gonia_NNMF <- tricolour_tsne(c("Gfra1","Lin28a","Nanos1"), predict = "NNMF") +
  facet_zoom(xlim=c(-4,5), ylim=c(36,44), zoom.size = 2) +
  ggtitle("NNMF Imputed Expression") + labs(y="t-SNE 2", x="t-SNE 1")
# ccnd2 or gfra1
# upp1 or foxf1
# ccna2 or crabp1

gonia_SDA
gonia_NNMF

pdf("../results/other_factorisations/Spermatogonia_SDA.pdf", width = 7, height = 5)
gonia_SDA
dev.off()

pdf("../results/other_factorisations/Spermatogonia_NNMF.pdf", width = 7, height = 5)
gonia_NNMF
dev.off()

Scatter plot

corss <- rbind(sda_predict(c("Gfra1","Upp1")), nmf_predict(c("Gfra1","Upp1")))
corss$Method <- "NNMF"
corss[1:(nrow(corss)/2)]$Method <- "SDA"

corss$rgb <- ternaryColours(corss, c("Gfra1","Upp1"))

ggplot(corss, aes(Upp1, Gfra1, colour=rgb)) + scale_colour_identity() + geom_point() + facet_wrap(~Method, scales = "free")

ggplot(corss, aes(Upp1, Gfra1, colour=Method)) +
  geom_point() +
  scale_color_brewer(palette = "Set1") +
  theme_minimal()

corss <- rbind(sda_predict(c("Igfbp5","Upp1")), nmf_predict(c("Igfbp5","Upp1")))
corss$Method <- "NNMF"
corss[1:(nrow(corss)/2)]$Method <- "SDA"

ggplot(corss, aes(Upp1, Igfbp5, colour=Method)) +
  geom_point() +
  scale_color_brewer(palette = "Set1") +
  theme_minimal()

corss <- rbind(sda_predict(c("Gm14019","Upp1")), nmf_predict(c("Gm14019","Upp1")))
corss$Method <- "NNMF"
corss[1:(nrow(corss)/2)]$Method <- "SDA"

ggplot(corss, aes(Upp1, Gm14019, colour=Method)) +
  geom_point() +
  scale_color_brewer(palette = "Set1") +
  theme_minimal()

100 component NNMF versions

tricolour_tsne(c("Cd3g","Csf1r"), predict = "NNMF2") +
  facet_zoom(xlim=c(-10,0), ylim=c(-6,-1)) +
  ggtitle("NNMF 100C Prediction")

tricolour_tsne(c("Gfra1","Upp1","Crabp1"), predict = "NNMF2") +
  facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) +
  ggtitle("NNMF 100C Prediction")

Compare components not expression

(with expression as colour)

#plot(nnmf_decomp$W[,"V4"][cell_data[order(experiment)]$cell], pch=".")

tmp <- tricolour_tsne(c("Cd3g","Csf1r"), returndf = T, predict = F)

ggplot(tmp, aes(V3,V11,colour=rgb)) +
  geom_point() +
  scale_colour_identity() +
  theme_dark() +
  annotate("text", Inf, Inf, hjust = 2, vjust = 2, label = "Cd3g", colour="red", fontface="bold") +
  annotate("text", Inf, Inf, hjust = 2, vjust = 4, label = "Csf1r", colour="blue", fontface="bold")

tmp$NNMF_V4 <- nnmf_decomp$W[,"V4"][cell_data$cell]
tmp$NNMF_V11 <- nnmf_decomp$W[,"V11"][cell_data$cell]
tmp$NNMF_V34 <- nnmf_decomp$W[,"V34"][cell_data$cell]

ggplot(tmp[order(Cd3g/2+Csf1r)], aes(NNMF_V4,NNMF_V11,colour=rgb)) +
  geom_point() +
  scale_colour_identity() +
  theme_dark() +
  annotate("text", Inf, Inf, hjust = 2, vjust = 2, label = "Cd3g", colour="red", fontface="bold") +
  annotate("text", Inf, Inf, hjust = 2, vjust = 4, label = "Csf1r", colour="blue", fontface="bold")

ggplot(tmp[order(Cd3g/2+Csf1r)], aes(NNMF_V4,NNMF_V11,colour=rgb)) +
  geom_point() +
  scale_colour_identity() +
  theme_dark() +
  annotate("text", Inf, Inf, hjust = 2, vjust = 2, label = "Cd3g", colour="red", fontface="bold") +
  annotate("text", Inf, Inf, hjust = 2, vjust = 4, label = "Csf1r", colour="blue", fontface="bold")

tmp$NNMF_Crgb <- ternaryColours(tmp, c("NNMF_V4","NNMF_V11","NNMF_V34"))

ggplot(tmp, aes(Tsne1_QC1, Tsne2_QC1)) +
  geom_point(aes(colour=NNMF_Crgb), stroke=0, size=1) +
  scale_color_identity()  +
  theme_dark() +
  facet_zoom(xlim=c(-10,0), ylim=c(-6,-1), zoom.size = 1)

ggplot(tmp[order(Cd3g)], aes(Tsne1_QC1, Tsne2_QC1)) +
  geom_point(aes(colour=Cd3g), stroke=0, size=1) +
  scale_color_viridis(direction = -1) +
  facet_zoom(xlim=c(-10,0), ylim=c(-6,-1), zoom.size = 1)

tmp$V3L <- asinh(tmp$V3)
tmp$V11L <- asinh(tmp$V11)
tmp$SDA_Crgb <- ternaryColours(tmp, c("V11L","V3L"))

ggplot(tmp, aes(Tsne1_QC1, Tsne2_QC1)) +
  geom_point(aes(colour=SDA_Crgb), stroke=0, size=1) +
  scale_color_identity()  +
  theme_dark() +
  facet_zoom(xlim=c(-9.5,0), ylim=c(-5.5,-1), zoom.size = 1) +
  annotate("text", Inf, Inf, hjust = 1.5, vjust = 2, label = "SDA_C11", colour="red", fontface="bold") +
  annotate("text", Inf, Inf, hjust = 1.5, vjust = 4, label = "SDA_C3", colour="blue", fontface="bold")

tricolour_tsne(c("Cd52","Tyrobp"), predict = F, ptsize = 1)  + facet_zoom(xlim=c(-10,0), ylim=c(-6,-1), zoom.size = 1)

Enrichments

ETC

etc_enrich_nnmf[order(p.value)][1:5]
etc_enrich[order(p.value)][1:5]

manhatten_plot(etc_enrich, legend_position = c(0.2,0.8))
manhatten_plot(etc_enrich_nnmf, legend_position = c(0.2,0.8))

plot(single_component_enrichment(nnmf_decomp$H["V46",], etc_genes_clean, threshold = 500)$ranks)
plot(single_component_enrichment(SDAresults$loadings[[1]][9,], etc_genes_clean, threshold = 500, pos = F)$ranks)

plot(single_component_enrichment(nnmf_decomp$H["V46",], etc_genes_clean, threshold = 500)$ranks, 
     single_component_enrichment(SDAresults$loadings[[1]][9,], etc_genes_clean, threshold = 500, pos = F)$ranks)

nnmf_ranks <- single_component_enrichment(nnmf_decomp$H["V46",], etc_genes_clean, threshold = 500)$ranks
sda_ranks <- single_component_enrichment(SDAresults$loadings[[1]][9,], etc_genes_clean, threshold = 500, pos = F)$ranks

plot(nnmf_ranks, sda_ranks)
abline(0,1)

plot(nnmf_ranks /
     sda_ranks,
     ylab="Ratio rank NNMF/SDA")
abline(h=1)

qplot(1:length(nnmf_ranks), nnmf_ranks[order(names(nnmf_ranks))] /
     sda_ranks[order(names(sda_ranks))],
     ylab="Ratio paired rank NNMF/SDA") + scale_y_log10() + geom_hline(yintercept = 1)

Stra8 Enrichment

kojima_stra8_enrich <- component_enrichment(kojima_stra8_chip)
kojima_stra8_enrich_nnmf <- component_enrichment(kojima_stra8_chip, loadings_matrix = nnmf_decomp$H, orderSDA = F)

kojima_stra8_enrich[order(p.value)][1:5]
kojima_stra8_enrich_nnmf[order(p.value)][1:5]

manhatten_plot(kojima_stra8_enrich, legend_position = c(0.2,0.8))
manhatten_plot(kojima_stra8_enrich_nnmf, legend_position = c(0.2,0.8))

plot(single_component_enrichment(SDAresults$loadings[[1]][5,], kojima_stra8_chip, threshold = 500, pos = F)$ranks)
plot(single_component_enrichment(nnmf_decomp$H["V50",], kojima_stra8_chip, threshold = 500)$ranks)

MSCA

Meiotic Sex Chromosome Activation in Hormad1 KO

In NNMF this effect bleeds into the WT cells

Compare Hormad1KO Cell Scores

# plot(SDAresults$scores[cell_data[order(-PseudoTime)]$cell,"V38"],col=rgb(0,0,0,1-as.double(grepl("Hormad",cell_data[order(-PseudoTime)]$experiment))),xlim=c(0,5000),ylim=c(-5,20))
# plot(nnmf_decomp$W[cell_data[order(-PseudoTime)]$cell,"V3"],col=rgb(0,0,0,1-as.double(grepl("Hormad",cell_data[order(-PseudoTime)]$experiment))),xlim=c(0,5000))
# 
# plot(SDAresults$scores[cell_data[order(-PseudoTime)]$cell,"V38"],col=1+as.double(grepl("Hormad",cell_data[order(-PseudoTime)]$experiment)),xlim=c(0,5000),ylim=c(-5,20))
# plot(nnmf_decomp$W[cell_data[order(-PseudoTime)]$cell,"V3"],col=1+as.double(grepl("Hormad",cell_data[order(-PseudoTime)]$experiment)),xlim=c(0,5000))

tmp <- data.table("NNMF"=nnmf_decomp$W[cell_data[order(-PseudoTime)]$cell,"V3"],
           "SDA"=SDAresults$scores[cell_data[order(-PseudoTime)]$cell,"V38"],
           "Hormad1KO"=grepl("Hormad",cell_data[order(-PseudoTime)]$experiment),
           "PseudoTime" = cell_data[order(-PseudoTime)]$PseudoTime,
           "cell" = cell_data[order(-PseudoTime)]$cell,
           "msci_ratio" = cell_data[order(-PseudoTime)]$msci_ratio)

tmp[Hormad1KO==TRUE, Hormad_Genotype := "Hormad -/-"]
tmp[Hormad1KO==FALSE, Hormad_Genotype := "Hormad WT"]

tmp <- melt(tmp[PseudoTime>14500], id.vars = c("cell","msci_ratio","PseudoTime","Hormad1KO","Hormad_Genotype"), variable.name = "Method", value.name = "Cell_Score")

hormad_cell_score <- ggplot(tmp, aes(-PseudoTime, Cell_Score, colour=Method)) +
  geom_point(alpha=0.5, stroke=0) +
  facet_grid(Hormad_Genotype~Method, scales="free_y") +
  scale_color_brewer(palette = "Set1", guide=FALSE) +
  geom_smooth(colour="black") +
  theme_bw()

pdf("../results/other_factorisations/MSCA_Cell_Score.pdf", width = 7)
hormad_cell_score
dev.off()

Compare Rhox2h Prediction

hormad_gene <- "Rhox2h"

cell_data[grepl("Hormad",experiment), Hormad_Genotype := "Hormad -/-"]
cell_data[!grepl("Hormad",experiment), Hormad_Genotype := "Hormad WT"]

tmp <- cell_data[nmf_predict(hormad_gene,name_extension = "_NNMF")][sda_predict(hormad_gene, name_extension = "_SDA")]

tmp <- melt(tmp[PseudoTime>14500],id.vars = c("cell","PseudoTime","Hormad_Genotype"), measure.vars = paste0(hormad_gene,"_",c("NNMF","SDA")), variable.name = "Method", value.name = hormad_gene)

tmp[,Method := gsub(paste0(hormad_gene,"_"),"",Method)]

Rhox2h_prediction <- ggplot(tmp, aes(-PseudoTime, get(hormad_gene), colour=Method)) +
  geom_point(alpha=0.5, stroke=0) +
  facet_grid(Hormad_Genotype~Method, scales="free_y") +
  scale_color_brewer(palette = "Set1", guide=FALSE) +
  geom_smooth(colour="black") +
  ylab(paste(hormad_gene,"Imputed Expression")) +
  scale_x_continuous(breaks = c(-15000,-16000)) +
  theme_bw()

Rhox2h_prediction

pdf("../results/other_factorisations/MSCA_Rhox2h.pdf", width = 7)
Rhox2h_prediction
dev.off()

With 100 NNMF components

Better

cell_data[grepl("Hormad1_3",experiment), Genotype := "2"]
cell_data[!grepl("Hormad1_3",experiment), Genotype := "1"]

tmp <- cell_data[nmf_predict(c("Rhox2h"),nnmf_decomp5, name_extension = "_NNMF")][sda_predict(c("Rhox2h"), name_extension = "_SDA")]

tmp <- melt(tmp[PseudoTime>14500],id.vars = c("cell","PseudoTime","Hormad_Genotype"), measure.vars = c("Rhox2h_NNMF","Rhox2h_SDA"), variable.name = "Method", value.name = "Rhox2h")

#tmp[,Method := gsub("Rhox2h_","",Method)]

Rhox2h_prediction <- ggplot(tmp, aes(-PseudoTime, Rhox2h, colour=Method)) +
  geom_point(alpha=0.5) +
  facet_grid(Hormad_Genotype~Method, scales="free_y") +
  scale_color_brewer(palette = "Set1") +
  geom_smooth(colour="black") +
  theme_bw()

Rhox2h_prediction

R FA

too slow didn't finish

library(data.table)
data <- fread("../data/conradV3/V3_SDAmerged_mouse_V3_SDA.data")

dimnames_conradV3 <- readRDS("../data/conradV3/V3_SDAmerged_mouse_V3_SDA_dimnames.rds")
data <- as.matrix(data)
rownames(data) <- dimnames_conradV3[[1]]
colnames(data) <- dimnames_conradV3[[2]]

rfa <- factanal(data, factors = 50, rotation = "promax")

MyersGroup/testisAtlas documentation built on Nov. 29, 2020, 9:21 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

MyersGroup/testisAtlas
testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq

In MyersGroup/testisAtlas: testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq

NNMF

Interation: 39

Interation: 45

ICA

PCA

Load results

compare sparsity of gene loadings

Cell score sparcity

Compare Prdm9 Component Loadings

Compare Leptotene Loadings

Compare X Chrom Activation Loadings

All NMF Loadings

compare NNMF L1 sparsity

Imputation

test PCA implementaitons

Correlations

100 componentns

Pearson

Rotated NNMF

Rotated SDA

GGplot version

Compare Promax

Expression predictions

Correlate imputed expression NNMF vs SDA

Blood

Stem Cells

Compare components not expression

Enrichments

ETC

Stra8 Enrichment

MSCA

Compare Hormad1KO Cell Scores

Compare Rhox2h Prediction

R FA

R Package Documentation

Browse R Packages

We want your feedback!

MyersGroup/testisAtlas testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq

In MyersGroup/testisAtlas: testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq

NNMF

Interation: 39

Interation: 45

ICA

PCA

Load results

compare sparsity of gene loadings

Cell score sparcity

Compare Prdm9 Component Loadings

Compare Leptotene Loadings

Compare X Chrom Activation Loadings

All NMF Loadings

compare NNMF L1 sparsity

Imputation

test PCA implementaitons

Correlations

100 componentns

Pearson

Rotated NNMF

Rotated SDA

GGplot version

Compare Promax

Expression predictions

Correlate imputed expression NNMF vs SDA

Blood

Stem Cells

Compare components not expression

Enrichments

ETC

Stra8 Enrichment

MSCA

Compare Hormad1KO Cell Scores

Compare Rhox2h Prediction

R FA

R Package Documentation

Browse R Packages

We want your feedback!

MyersGroup/testisAtlas
testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq