detach("package:testisAtlas", unload=TRUE)
detach("package:ComplexHeatmap", unload=TRUE)
devtools::install_github("jokergoo/ComplexHeatmap", ref="5de60f87b")
pkgload::load_all()

This is running SDA only on WT cell (no Knock Outs). In general, we find many component are very similar across the different SDA decompositions.

library(Matrix)
library(data.table)
library(dropsim)

raw_data <- readRDS("../data/merged-mouse_counts_tsne-QC_low-mt.rds")
raw_data <- t(raw_data)

#cell_data <- readRDS("../data/tmp_cache/cell_data.rds")

wt_groups <- c("WT","mj","SPG","SPD","SPCII","SPCI")

str(wt_groups)

wt_cells <- cell_data[group %in% wt_groups]$cell



raw_data <- raw_data[,wt_cells]

data <- normaliseDGE(raw_data,
                     center = FALSE,
                     scale = TRUE,
                     threshold = 10,
                     min_library_size = 200,
                     gene_subset = (2/3))

library_size <- Matrix::colSums(raw_data[,rownames(data)])

# Issue warning if data changes
stopifnot(digest::digest(data) == "9996e625cd14c3387d0840442de901ba")
stopifnot(digest::digest(library_size) == "21c2f00d2a33fff3dab0082b78d179b2")

saveRDS(list(dge=data, library_size=library_size), "../data/merged_mouse_normalised_tsne-QC_V3_WT.rds") # save a cache for easy reopening

v10 Mix genes

normalie data

dataM <- normaliseDGE(raw_data[colnames(data),wt_cells],
                     center = FALSE,
                     scale = TRUE,
                     threshold = 10,
                     min_library_size = 200,
                     gene_subset = 1)

library_size <- Matrix::colSums(raw_data[,rownames(dataM)]) # across ALL genes

# Issue warning if data changes
stopifnot(digest::digest(dataM) == "8dac076792e424cde2cc6c308cfd6db9")
stopifnot(digest::digest(library_size) == "21c2f00d2a33fff3dab0082b78d179b2")

saveRDS(list(dge=dataM, library_size=library_size), "../data/merged_mouse_normalised_tsne-QC_V3_WT_MixGenes.rds") # save a cache for easy reopening

run sda

dataM <- readRDS("../data/merged_mouse_normalised_tsne-QC_V3_WT_MixGenes.rds")$dge
export_data(as.matrix(dataM), name = "merged_mouse_V3_WT_mixgenes_SDA", path="../data/conradV3/")

run_SDA(sda_location = "../../SDA/build/sda",
        out = "../results/conradV3_sda_10",
        data = "../data/conradV3/merged_mouse_V3_WT_mixgenes_SDA",
        num_comps = 50,
        max_iter = 10000,
        save_freq = 1000,
        set_seed = "79151 17351",
        N = 9936,
        eigen_parallel = TRUE,
        num_blocks = 10,
        num_openmp_threads = 10)

load results

results10_WT <- load_results(results_folder = "../data/SDA/conradV3_sda_10/", data_path = "../data/count_matrices/")
str(results10_WT)

rownames(results10_WT$loadings[[1]]) <- paste0("WT_",1:50)
colnames(results10_WT$scores) <- paste0("WT_",1:50)

check_convergence(results10_WT)

common_genes1_10 <- colnames(results1$loadings[[1]])[colnames(results1$loadings[[1]]) %in% colnames(results10_WT$loadings[[1]])]

v9, WT cells

run sda

data <- readRDS("../data/conradV3/merged_mouse_normalised_tsne-QC_V3_WT.rds")$dge
export_data(as.matrix(data), name = "merged_mouse_V3_WT_SDA", path="../data/conradV3/")

run_SDA(sda_location = "../../SDA/build/sda",
        out = "../results/conradV3_sda_9",
        data = "../data/conradV3/merged_mouse_V3_WT_SDA.data",
        num_comps = 50,
        max_iter = 10000,
        save_freq = 1000,
        set_seed = "79151 17351",
        N = 9936,
        eigen_parallel = TRUE,
        num_blocks = 5,
        num_openmp_threads = 8)

run_SDA(sda_location = "../../SDA/build/sda",
        out = "../results/conradV3_sda_15",
        data = "../data/conradV3/merged_mouse_V3_WT_SDA.data",
        num_comps = 50,
        max_iter = 10000,
        save_freq = 1000,
        set_seed = "77253 12763",
        N = 9936,
        eigen_parallel = TRUE,
        num_blocks = 6,
        num_openmp_threads = 6)

load results

results9_WT <- load_results(results_folder = "../data/SDA/conradV3_sda_9/", data_path = "../data/count_matrices/")
str(results9_WT)

rownames(results9_WT$loadings[[1]]) <- paste0("WT_",1:50)
colnames(results9_WT$scores) <- paste0("WT_",1:50)

check_convergence(results9_WT)
loading_distribution(results9_WT)
scores_distribution(results9_WT)
plot_maximums(results9_WT)
#plot_scree(results9_WT)
PIP_distribution(results9_WT)
PIP_component_distribution(results9_WT, 2)
PIP_threshold_distribution(results9_WT)
rna_locations <- load_gene_locations(colnames(results_WT$loadings[[1]]), name="merge_mouse_V3_WT", path = "../data/")
rna_locations[,.N,by=chromosome_name][order(-N)]

Overall Comparison

Generally correlations between any random compoents are low (good), with some pairs have high correlation (~0.8, good). Permuting the gene loadings gives a kind of null distribution of correlations which are all below 0.03. Most (11) WT components have one good match (>0.5) to the mix compoents, with 1 having two matches, and 38 no match.

results1 <- load_results(results_folder = "../data/SDA/conradV3_sda_1/", data_path = "../data/count_matrices/dimnames_archive/")
str(results1)

# add annotated component names
rownames(results1$loadings[[1]]) <- paste0("Mix",1:50," ",names(sort(component_labels))) #paste0("Mix_",1:50)
colnames(results1$scores) <- paste0("Mix",1:50," ",names(sort(component_labels))) #paste0("Mix_",1:50)

library(ComplexHeatmap)

common_genes <- colnames(results1$loadings[[1]])[colnames(results1$loadings[[1]]) %in% colnames(results9_WT$loadings[[1]])]

V9 WT vs Mix

To get a rough idea of what correlation to expect under a null, permute genes.

Max correlation over 100 permutations if 3.6%

max_permuted_correlation <- sapply(1:100,
function(x) range(compare_factorisations(results9_WT,
                       results1,
                       names= c("WT",x),
                       method = "pearson",
                       randomise = T,
                       return = "cor")))

plot(sort(max_permuted_correlation[2,]), main="Max Correlation for Permuted Genes", ylab="Max Pearson Correlation", xlab="Permutation Round (sorted)")

Replication of Preprint Figure (Spearman)

As noted by the reviewer the max correlation (spearman) for WT_42 is 0.42

cor_mix_vs_wt9 <- compare_factorisations(results1,
                       results9_WT,
                       names= c("Mixed","WT"),
                       method = "spearman", return = "cor")
max(cor_mix_vs_wt9["WT_42",])

compare_factorisations(results1,
                       results9_WT,
                       names= c("Mixed","WT"),
                       method = "spearman")

pdf("../results/WT/correlation_matrix_spearman.pdf", width = 12, height = 10)
compare_factorisations(results9_WT,
                       results1,
                       names= c("WT","Mixed"),
                       method = "spearman")
dev.off()

WT on the bottom, Pearson

Splitting components can help in some cases, e.g. Mix18 Late Spermiogenesis 1

compare_factorisations(results9_WT,
                       results1,
                       names= c("WT","Mixed"),
                       method = "pearson")

compare_factorisations(results9_WT,
                       results1,
                       names= c("WT","Mixed"),
                       method = "pearson",
                       split=c(T,T))

WT on the right

compare_factorisations(results1,
                       results9_WT,
                       names= c("Mixed","WT"),
                       method = "pearson")

compare_factorisations(results1,
                       results9_WT,
                       names= c("Mixed","WT"),
                       method = "pearson",
                       split=c(T,T))

Plot component metadata

df_cor <- compare_factorisations(results1,
                       results9_WT,
                       names= c("Mixed","WT"),
                       method = "pearson",
                       return = "df")

df_cor

ggplot(df_cor[51:100], aes(max_cell_score, max_cor_per_compoent, label=component, colour=sum_abs_cell_score)) +
    geom_point() +
    scale_color_viridis() +
    ggtitle("High Max Cell score have low Correlation")

ggplot(df_cor[51:100], aes(sum_abs_cell_score, max_cor_per_compoent, label=component, colour=max_cell_score)) +
    geom_point() +
    scale_color_viridis() + 
    ggtitle("More cells in a component, higher correlation?")

ggplot(df_cor[1:50], aes(mutant_contribution, max_cor_per_compoent, label=component, colour=max_cell_score)) +
    geom_point() +
    scale_color_viridis()

Predictions

If the WT only model gives the same predicted (imputed) expression for the WT cells as the mixed analysis, this suggests nothing completely crazy has happened.

compare_predictions <- function(gene){
    tmp <- sda_predict(gene,results1)[cell %in% rownames(results9_WT$scores)][sda_predict(gene,results9_WT)]
    # i is the 2nd one, (WT only)
    plot(tmp[,get(gene)], tmp[,get(paste0("i.",gene))], pch=".", xlab="Mixed Prediction", ylab="WT only Prediction",
         main= paste(gene, signif(cor(tmp[,get(gene)], tmp[,get(paste0("i.",gene))]), 3)))
    abline(0,1, col='red')

}

compare_predictions("Ssxb1")
compare_predictions("Piwil1")
compare_predictions("Prm1")
compare_predictions("Acrv1")
compare_predictions("Gfra1")
compare_predictions("Nanos3")
compare_predictions("Dcn")
compare_predictions("Ccna1")
compare_predictions("Esx1")
compare_predictions("Aard")

Some Mix components can be receated as a linear combination of WT only components

Indeed a linear combination of WT_49, WT_42 and WT_24 gives a new correlation of 93%

combolm42 <- lm(results1$loadings[[1]]["Mix42 Pachytene",common_genes] ~ t(results9_WT$loadings[[1]][,common_genes]))
summary(combolm42)
plot(predict.lm(combolm42), results1$loadings[[1]]["Mix42 Pachytene",common_genes], pch=".")
cor(predict.lm(combolm42), results1$loadings[[1]]["Mix42 Pachytene",common_genes])
cor(-results9_WT$loadings[[1]]["WT_24",common_genes]*0.911 + results9_WT$loadings[[1]]["WT_49",common_genes]*0.875 + results9_WT$loadings[[1]]["WT_42",common_genes]*0.629, results1$loadings[[1]]["Mix42 Pachytene",common_genes])

Procrustes Rotation

This linear combination thing has a more general case, that is the procrustes rotation of the loadings matrix to find all the linear combinations at once.

Similar result, 90% correlation.

row_9WT_list <- rotate_SDA(results9_WT, results1)

which.max(abs(cor(t(row_9WT_list$loadings[[1]]),results1$loadings[[1]]["Mix42 Pachytene",common_genes])))
plot(t(row_9WT_list$loadings[[1]])[,42],results1$loadings[[1]]["Mix42 Pachytene",common_genes], pch=".")
cor(t(row_9WT_list$loadings[[1]])[,42],results1$loadings[[1]]["Mix42 Pachytene",common_genes])

which.max(abs(cor(t(row_9WT_list$loadings[[1]]),results1$loadings[[1]]["Mix9 Respiration",common_genes])))
plot(t(row_9WT_list$loadings[[1]])[,9],results1$loadings[[1]]["Mix9 Respiration",common_genes], pch=".")
cor(t(row_9WT_list$loadings[[1]])[,9],results1$loadings[[1]]["Mix9 Respiration",common_genes])
pdf("../results/WT/correlation_matrix.pdf", width = 12, height = 10)
compare_factorisations(results9_WT,
                       results1,
                       c("WT","Mixed"),
                       method = "pearson")
dev.off()


pdf("../results/WT/correlation_matrix_rotated.pdf", width = 12, height = 10)
compare_factorisations(row_9WT_list,
                       results1,
                       c("WT","Mixed"),
                       method = "pearson")
dev.off()

rot_mix_cor <- compare_factorisations(row_9WT_list,
                       results1,
                       c("WT","Mixed"),
                       method = "pearson", return = "cor")

mix_cor <- compare_factorisations(results9_WT,
                       results1,
                       c("WT","Mixed"),
                       method = "pearson", return = "cor")
tmp <- data.table(
  Component=sort(rownames(mix_cor)),
  Rotated=apply(rot_mix_cor, 1, max)[sort(rownames(mix_cor))],
  Raw=apply(mix_cor, 1, max)[sort(rownames(mix_cor))]
)

#qplot(tmp$Raw, tmp$Rotated) + geom_abline(intercept = 0, slope = 1)

tmp[, Difference := Rotated-Raw]

tmp <- melt(tmp, id.vars = c("Component","Difference"), variable.name = "Type", value.name = "Correlation")

tmp[, Component := factor(Component, levels=tmp[Type=="Rotated"][order(Correlation)]$Component)]

rot_diff <- ggplot(tmp,
       aes(Component, Correlation, group=Component)) +
  geom_line(aes(colour=Difference>0), arrow = arrow(ends="first",type="closed", length=unit(0.25,"cm"))) +
  scale_color_manual(values=c("red","black")) +
  geom_point(shape=21, stroke=0, size=2, aes(fill=Type)) +
  scale_fill_brewer(palette = "Set1") +
  coord_flip()

pdf("../results/WT/rotation_correlation_difference.pdf", width=11, height = 8)
rot_diff
dev.off()

Factor Congruence

"Given two sets of factor loadings, report their degree of congruence (vector cosine). Although first reported by Burt (1948), this is frequently known as the Tucker index of factor congruence."

cong_9WT <- psych::factor.congruence(t(row_9WT_list$loadings[[1]]), t(results1$loadings[[1]][,common_genes]))
cong_9WT <- sort_matrix(cong_9WT, sort(apply(abs(cong_9WT), 2, max), decreasing = T))
Heatmap(cong_9WT,
        cluster_rows = FALSE, cluster_columns = FALSE)

The rotation matrix gives a read out of which components have been split into which others

For example Mix42 Pachytene (as we saw previously), is a mixture of WT 42, 24, and 49

Heatmap(sort_matrix(row_9WT_list$rotation, sort(apply(abs(row_9WT_list$rotation), 2, max), decreasing = T)),
        cluster_rows = FALSE, cluster_columns = FALSE)

Sparsity

But the rotation make it non sparse

Could threshold the rotation matrix

plot(sort(row_9WT_list$rotation)); abline(h=0); abline(h=1e-1, col="red"); abline(h=3.5e-1, col="blue")

reg_rot <- row_9WT_list$rotation
reg_rot[abs(reg_rot)<3.5e-1] <- 0

rot_9WTsparse <- t(results9_WT$loadings[[1]][,common_genes]) %*% reg_rot

colnames(rot_9WTsparse) <- paste0("WT_rot",1:50)

# scores
rot_9WTsparse_scoresrot <- results9_WT$scores %*% reg_rot
#colnames(rot_9WT_scoresrot) <- paste0("WT_rot",1:50)
colnames(rot_9WTsparse_scoresrot) <- paste0("WT_rot",1:50)

rot_9WTb_list <- list(loadings = list(t(rot_9WTsparse + rnorm(prod(dim(rot_9WTsparse)), 0, 0.00001))),
                      scores = rot_9WTsparse_scoresrot + rnorm(prod(dim(rot_9WTsparse_scoresrot)), 0, 0.00001))


#rownames(reg_rot) <- 1:50
#colnames(reg_rot) <- rownames(results1$loadings[[1]])
#rownames(reg_rot) <- paste("WT",1:50)
threshold = exp(seq(-10,0,0.1))

loading_sparsity <- data.table(threshold,
                               t(sapply(threshold, function(x) c("Rotated_WT_SDA"=mean(t(row_9WT_list$loadings[[1]])/max(abs(t(row_9WT_list$loadings[[1]]))) < x),
                                                                 "Rotated_WT_SDA_reg"=mean(rot_9WTb_list$loadings[[1]]/max(abs(rot_9WTb_list$loadings[[1]])) < x),
                                                                 "Mix_SDA"=mean(SDAresults$loadings[[1]]/max(abs(SDAresults$loadings[[1]])) < x),
                                                                 "WT_SDA"=mean(results9_WT$loadings[[1]]/max(abs(results9_WT$loadings[[1]])) < x)))))


loading_sparsity <- melt(loading_sparsity, id.vars = "threshold", variable.name = "Method", value.name = "Fraction")

gene_loading_sparsity <- ggplot(loading_sparsity, aes(threshold, Fraction, colour=Method)) +
  geom_line() +
  ylab("Fraction of absolute gene loadings below threshold") + 
  xlab("Scaled Threshold (t * max(abs(gene_loadings)))") +
  scale_x_continuous(trans = reverselog_trans())+
  scale_color_brewer(palette = "Set1") +
  theme_minimal() +
  theme(legend.position = "none") + 
  geom_text_repel(data=loading_sparsity[threshold==threshold[51]], aes(label = Method),
                  nudge_x = 1, nudge_y = 0.02,
                  na.rm = TRUE, segment.colour='darkgrey', colour="black")


gene_loading_sparsity

# plot(sort(abs(sample(rot_9WT$Yrot, 10000, replace = F))), pch=".", log='y')
# points(sort(abs(sample(results9_WT$loadings[[1]], 10000, replace = F))), col="red", pch=".", log='y')
# abline(h=c(1e-2,1e-3))
# 
# plot(sort(abs(sample(SDAresults$loadings[[1]]/max(abs(SDAresults$loadings[[1]])), 10000, replace = F))), pch=".")
# points(sort(sample(nnmf_decomp$H/max(abs(nnmf_decomp$H)), 10000, replace = F)), col="red", pch=".")

After thresholding we get a much cleaner read out

pdf("../results/WT/rotation_matrix.pdf", width = 10, height = 8)
Heatmap(sort_matrix(row_9WT_list$rotation, sort(apply(abs(row_9WT_list$rotation), 2, max), decreasing = T)),
        col=log_colour_scale(row_9WT_list$rotation, scale = 0.15),
        cluster_rows = FALSE, cluster_columns = FALSE)
dev.off()
pdf("../results/WT/rotation_matrix_sparse.pdf", width = 10, height = 8)
Heatmap(sort_matrix(reg_rot, sort(apply(abs(reg_rot), 2, max), decreasing = T)),
        cluster_rows = FALSE, cluster_columns = FALSE)
dev.off()
table(rowSums(reg_rot!=0))

9 missing, 21 same, 20 split

And similar correlations

cor(rot_9WTb[,42], results1$loadings[[1]]["Mix42 Pachytene",])
cor(rot_9WT$Yrot[,"WT_rot42"], results1$loadings[[1]]["Mix42 Pachytene",])

plot(rot_9WTb[,42], results1$loadings[[1]]["Mix42 Pachytene",], pch=".")
plot(rot_9WT$Yrot[,"WT_rot42"], results1$loadings[[1]]["Mix42 Pachytene",], pch=".")

Sparse vs Non

# some columns are zero so add a tiny bit of noise to avoid error
pdf("../results/WT/correlation_matrix_rotated_sparse.pdf", width = 12, height = 10)
compare_factorisations(row_9WTb_list,
                       results1,
                       names= c("WT","Mixed"),
                       method = "pearson")
dev.off()

compare_factorisations(row_9WT_list,
                       results1,
                       names= c("Mixed","WT"),
                       method = "pearson")
ggplot(data.table(apply(rot_mix_cor, 1, max), factor(rownames(rot_mix_cor), levels=rownames(rot_mix_cor))), aes(V2, V1)) +
    geom_point() +
    coord_flip()

cor(rot_9WT$Yrot[,"WT_rot2"], results1$loadings[[1]]["Mix2 Pre-leptotene",], method = "spearman")

mix_wt <- data.table(results1$loadings[[1]]["Mix50 Gfra1 stem cells",],
                                 rot_9WT$Yrot[,"WT_rot50"],
                                 gene=names(rot_9WT$Yrot[,"WT_rot5"]))

ggplot(mix_wt, aes(V1, V2, label=gene)) +
    geom_point(alpha=0.3, stroke=0) +
    theme_minimal() +
    geom_text_repel(data=mix_wt[abs(V1)>0.09 | abs(V2)>0.1], force = 2) +
    labs(x="Mixed Loadings",y="WT Loadings (Rotated)")

mix_wt <- data.table(results1$loadings[[1]]["Mix31 Undifferentiated Spermatogonia",],
                                 rot_9WT$Yrot[,"WT_rot31"],
                                 gene=names(rot_9WT$Yrot[,"WT_rot5"]))

ggplot(mix_wt, aes(V1, V2, label=gene)) +
    geom_point(alpha=0.3, stroke=0) +
    theme_minimal() +
    geom_text_repel(data=mix_wt[abs(V1)>0.25 | abs(V2)>0.25], force = 2) +
    labs(x="Mixed Loadings",y="WT Loadings (Rotated)")

mix_wt <- data.table(results1$loadings[[1]]["Mix35 Late Acrosomal",],
                                 rot_9WT$Yrot[,"WT_rot35"],
                                 gene=names(rot_9WT$Yrot[,"WT_rot5"]))

ggplot(mix_wt, aes(V1, V2, label=gene)) +
    geom_point(alpha=0.3, stroke=0) +
    theme_minimal() +
    geom_text_repel(data=mix_wt[V1>0.2 | V2>0.2], force = 2) +
    labs(x="Mixed Loadings",y="WT Loadings (Rotated)")

mix_wt <- data.table(results1$loadings[[1]]["Mix20 Meiotic Divisions",],
                                 rot_9WT$Yrot[,"WT_rot20"],
                                 gene=names(rot_9WT$Yrot[,"WT_rot5"]))

ggplot(mix_wt, aes(V1, V2, label=gene)) +
    geom_point(alpha=0.3, stroke=0) +
    theme_minimal() +
    geom_text_repel(data=mix_wt[abs(V1)>(0.15) | abs(V2)>(0.15)], force = 2) +
    labs(x="Mixed Loadings",y="WT Loadings (Rotated)")


mix_wt <- data.table(results1$loadings[[1]]["Mix42 Pachytene",common_genes],
                                 rot_9WT$Yrot[,"WT_rot42"],
                     results9_WT$loadings[[1]]["WT_42",common_genes],
                     rot_9WTb[,"WT_rot42"],
                                 gene=names(rot_9WT$Yrot[,"WT_rot5"]))

cor(mix_wt$V1, mix_wt$V3, method = "spearman")
cor(mix_wt$V1, mix_wt$V3)
cor(mix_wt$V1, mix_wt$V2)
cor(mix_wt$V1, mix_wt$V4)

cor(results1$loadings[[1]]["Mix42 Pachytene",common_genes],results9_WT$loadings[[1]]["WT_42",common_genes])

ggplot(mix_wt, aes(V3, V1, label=gene)) +
    geom_point(alpha=0.3, stroke=0) +
    theme_minimal() +
    geom_text_repel(data=mix_wt[abs(V1)>(1) | abs(V2)>(1)], force = 2) +
    labs(x="Mixed Loadings",y="WT Loadings (Rotated)")

ggplot(mix_wt, aes(V2, V1, label=gene)) +
    geom_point(alpha=1, stroke=0, size=0.5) +
    theme_minimal() +
    geom_text_repel(data=mix_wt[abs(V1)>(1) | abs(V2)>(1)], force = 2) +
    labs(x="Mixed Loadings",y="WT Loadings (Rotated)") + scale_y_reverse() + scale_x_reverse()

pdf("../results/WT/Pachytene_gene_scatterplot.pdf")
ggplot(mix_wt, aes(V4, V1, label=gene)) +
  geom_point(alpha=1, stroke=0, size=0.5) +
  theme_minimal() +
  geom_text_repel(data=mix_wt[abs(V1)>(1) | abs(V2)>(1)], force = 2) +
  labs(x="Mixed Loadings",y="WT Loadings (Rotated)") + scale_y_reverse() + scale_x_reverse()
dev.off()

# nover <- function(dt=mix_wt, i=1){
#     nrow(mix_wt[ V1 > mix_wt[i]$V1-0.01 &
#                 V1 < mix_wt[i]$V1+0.01 &
#                 V4 > mix_wt[i]$V4-0.01 &
#                 V4 < mix_wt[i]$V4+0.01 ])
# }
# 
# noverlaps <- sapply(1:nrow(mix_wt), function(x) nover(i=x))
# tmp <- cbind(mix_wt,V5=as.numeric(noverlaps))
mix_wt <- data.table(results1$loadings[[1]]["Mix31 Spermatogonial Stem Cells",common_genes],
                                 t(row_9WT_list$loadings[[1]])[,"WT_31rot"],
                     results9_WT$loadings[[1]]["WT_7",common_genes],
                     row_9WTb_list$loadings[[1]]["WT_rot31",],
                                 gene=names(t(row_9WT_list$loadings[[1]])[,"WT_5rot"]))

pdf("../results/WT/UdifSpg_gene_scatterplot.pdf")
ggplot(mix_wt, aes(V1, V4, label=gene)) +
  geom_point(alpha=0.5, stroke=0, size=0.7) +
  theme_minimal() +
  geom_text_repel(data=mix_wt[abs(V1)>(0.22) | abs(V4)>(0.22) | V4 < (-0.175) | V1 < (-0.15)], force = 2) +
  labs(y="Mixed C31 Loadings",x="WT C31 Loadings (Rotated)")
dev.off()

pdf("../results/WT/UdifSpg_cell_scatterplot.pdf")
ggplot(cell_data[data.table(rot_9WT_scoresrot[,"WT_rot31", drop=F], keep.rownames = T)],
        aes(WT_rot31, V31, colour=PseudoTime)) +
   geom_point(stroke=0, size=1.5) +
   scale_colour_viridis(direction = -1) +
   labs(y="WT: C31 (Rotated)", x="Mixed: C31 Pachytene")
dev.off()

Scores

WT only vs Mixed scores (V42)

plot(rot_9WTsparse_scoresrot[,"WT_rot42"], results1$scores[rownames(rot_9WTsparse_scoresrot),42], pch=".")
plot(rot_9WT_scoresrot[,"WT_rot42"], results1$scores[rownames(rot_9WT_scoresrot),42], pch=".")
abline(0,0.55,col='red')

ggplot(cell_data[data.table(rot_9WT_scoresrot_sparse[,"WT_rot42", drop=F], keep.rownames = T)], aes(WT_rot42, V42, colour=PseudoTime)) + geom_point(stroke=0, size=0.5) + scale_colour_viridis()

pdf("../results/WT/Pachytene_cellscore_scatter.pdf")
ggplot(cell_data[data.table(rot_9WT_scoresrot[,"WT_rot42", drop=F], keep.rownames = T)],
       aes(WT_rot42, V42, colour=PseudoTime)) +
  geom_point(stroke=0, size=0.5) +
  scale_colour_viridis(direction = -1) +
  scale_y_reverse() + scale_x_reverse() +
  labs(y="WT: C42 (Rotated)", x="Mixed: C42 Pachytene")
dev.off()

ggplot(cell_data[data.table(results9_WT$scores[,"WT_42", drop=F], keep.rownames = T)],
       aes(WT_42, V42, colour=PseudoTime)) +
  geom_point(stroke=0, size=0.5) +
  scale_colour_viridis(direction = -1) +
  scale_y_reverse() + scale_x_reverse() +
  labs(y="WT: C42 (Raw)", x="Mixed: C42 Pachytene")

ggplot(cell_data[data.table(rot_9WTsparse_scoresrot[,"WT_rot42", drop=F], keep.rownames = T)], 
       aes(Tsne1_QC1,Tsne2_QC1, colour=cell %in% names(which(rot_9WTsparse_scoresrot[,"WT_rot42"]*0.55 < results1$scores[rownames(rot_9WTsparse_scoresrot),42])))) + 
    geom_point() +
    theme(legend.position = "bottom")

Scores on tSNE (compare to previous score visualisation)

ggplot(cell_data[data.table(rot_9WT_scoresrot_sparse[,"Mix42 Pachytene", drop=F], keep.rownames = T)], 
       aes(Tsne1_QC1,Tsne2_QC1, colour=get("Mix42 Pachytene"))) + 
    geom_point() +
    scale_colour_viridis() +
    theme(legend.position = "bottom")

ggplot(cell_data[data.table(rot_9WT_scoresrot_sparse[,"Mix42 Pachytene", drop=F], keep.rownames = T)], 
       aes(Tsne1_QC1,Tsne2_QC1, colour=V42)) + 
    geom_point() +
    scale_colour_viridis() +
    theme(legend.position = "bottom")

Heatmap score correlations

Not clear this is better than using loadings, despite sparsity in loadings.

Perhaps would be best to combine the two somehow.

compare_factorisations(t(rot_9WT_scoresrot),
                       t(results1$scores),
                       names= c("Mixed","WT"),
                       method = "pearson")

compare_factorisations(t(rot_9WT_scoresrot_sparse+ rnorm(50*9936, 0, 0.00001)),
                       t(results1$scores),
                       names= c("Mixed","WT"),
                       method = "pearson")


MyersGroup/testisAtlas documentation built on Nov. 29, 2020, 9:21 p.m.