Load conos, pagoda2 and SecretUtils etc.

library(conos)
library(tidyverse)
devtools::load_all('/home/larsc/SecretUtils')
require(pagoda2)
library(pheatmap)
library(irlba)
library(igraph)
mouse_annot <- read.csv(file.path('/home/larsc/data/mouse_alzheimer/mouse_alzheimers_annotation_filtered_subtypes.csv'))
mouse_annot$subtype_condition <- paste0(mouse_annot$celltype, '.', mouse_annot$condition)

load conos object

mouse_con <- readRDS('/home/larsc/data/mouse_alzheimer/mouse_alzheimers_conos_procced_graphed.rds')

Rbind panels from conos objects

rbound_panel <- RbindPanel(mouse_con)
# sorting it just in case
rbound_panel <- rbound_panel[order(rbound_panel %>% rownames),]

Make groups for colorful tsne plots of the dataset

nr_annot <- setNames(mouse_annot$mouse_nr, mouse_annot$Well_ID)
batch_annot <- setNames(mouse_annot$Amp_batch_ID, mouse_annot$Well_ID)
condition_annot <- setNames(mouse_annot$condition, mouse_annot$Well_ID)
celltype_annot <- setNames(mouse_annot$celltype, mouse_annot$Well_ID)
sub_cond_annot <- setNames(mouse_annot$subtype_condition, mouse_annot$Well_ID)
table(nr_annot)
table(celltype_annot)

Plot graph with different annotations

mouse_con$plotGraph(groups=condition_annot, font.size=3, size=0.3, alpha=0.3, show.legend=T)
mouse_con$plotGraph(groups=celltype_annot, font.size=3, size=0.3, alpha=0.3, show.legend=T)
mouse_con$plotGraph(groups=sub_cond_annot, font.size=3, size=0.3, alpha=0.3, show.legend=T)
mouse_con$plotGraph(groups=nr_annot, font.size=3, size=0.3, alpha=0.3, show.legend=T)

Initiate some variables

od_genes = conos:::getOdGenesUniformly(mouse_con$samples, 3000)
state_split <- split(mouse_annot, mouse_annot$condition, drop=TRUE)
subtype_split <- state_split %>% lapply(function(x){split(x, x$celltype, drop=TRUE)})

Jensen Shannon between AD and WT, overall (microglia has by far the most cells so this will heavily skew the result due to dropout)

sub_mats_probs <- SecretUtils::GetSubMats(rbound_panel, mouse_annot$Well_ID, mouse_annot$celltype, mouse_annot$condition, 
                                          normalize=T, pseudo.prob=10^-8)

all_dists <- Map(JensenShannon, sub_mats_probs$AD, sub_mats_probs$WT) %>% as_tibble
all_dists_gathered <- gather(all_dists, key=subtype, value=js_distance)
ggplot(all_dists_gathered, aes(y=js_distance, x=subtype)) +geom_bar(stat='identity') +
  theme(axis.text.x = element_text(angle = -90, hjust = 1))

Violins plots of between condition distances(slightly older function, hence some not ideal practices regarding input variables, but gets the job done).

wtcellprobs <- IndividualCellProbs(state_split$WT, rbound_panel, 1, 7, 100, od_genes, 10^(-8))
adcellprobs <- IndividualCellProbs(state_split$AD, rbound_panel, 1, 7, 100, od_genes, 10^(-8))
all_singlecell_dists <- Map(CalculateAllJSD, wtcellprobs, adcellprobs)
all_sc_dists <- all_singlecell_dists %>% as_tibble
all_scd_gathered <- gather(all_sc_dists, key=subtype, value=jsd)
ggplot(all_scd_gathered, aes(y=jsd, x=subtype)) + geom_violin(aes(col=subtype))+
  theme(axis.text.x = element_text(angle = -90, hjust = 1))

PCA for correlation (correlation is very biased in gene expression space)

pca_cm <- prcomp_irlba(rbound_panel[, od_genes],n=100)
pca_cmat <- pca_cm$x
rownames(pca_cmat) <- rownames(rbound_panel)
pca_genes <- colnames(pca_cmat)
sub_mats_pca <- SecretUtils::GetSubMats(pca_cmat, mouse_annot$Well_ID, mouse_annot$celltype, mouse_annot$condition)

all_dists <- Map(function(x,y){1-cor(x,y)}, sub_mats_pca$AD, sub_mats_pca$WT) %>% as_tibble
all_dists_gathered <- gather(all_dists, key=subtype, value=correlation.distance)
ggplot(all_dists_gathered, aes(y=correlation.distance, x=subtype)) +geom_bar(stat='identity') +
  theme(axis.text.x = element_text(angle = -90, hjust = 1))

Plot showing which fractions belong to which celltype for the corresponding conditions.

FractionalPlot(mouse_annot$mouse_nr, mouse_annot$celltype, mouse_annot$condition)

PAGA using unaligned graph (KNN graph where edges are correlation distance in PCA space). Small value = less connected, i.e. a similarity metric, not distance. Note that in general we do not trust the PAGA metric as unbiased, see the simulation plots.

raw_mouse <- RbindRaw(mouse_con)
mouse_unaligned_adj <- GenerateUnalignedAdj(raw_mouse, cellid.vector=mouse_annot$Well_ID)[mouse_annot$Well_ID, mouse_annot$Well_ID]

subtype_order <- (paste0(mouse_annot$celltype) %>% unique)[order(paste0(mouse_annot$celltype) %>% unique)]
membership_vec <- as.numeric(factor(mouse_annot$subtype_condition))
connectivities <- GetPagaMatrix(mouse_unaligned_adj, membership_vec, scale=F)
linearized_stats <- seq(1, dim(connectivities)[1], 2) %>% sapply(function(i){connectivities[i,i+1]})

paga_df <- bind_cols(value=linearized_stats, subtype=subtype_order)
ggplot(paga_df, aes(y=linearized_stats, x=subtype)) +geom_point()+
  theme(axis.text.x = element_text(angle = -90, hjust = 1))


githubz0r/SecretUtils documentation built on May 15, 2021, 10:29 p.m.