Many of the genes in component 11 (Macrophages) are Alzheimers GWAS hits or otherwise well known in AD.

Microglia, the brain resident marcrophages, are heavily implicated in the development and progression of AD.

Key studies:

Full Literature: http://www.citeulike.org/user/danielwells/tag/alzheimers

library(testisAtlas)
load2("../data/cache")
load_component_orderings()

Notable hits in top 25 genes include: Apoe, Tyrobp, Trem2, and Ms4a7 (Part of MS4A cluster)

devtools::load_all("~/Dropbox/Github/SDAtools/")
#rna_locations <- load_gene_locations(colnames(SDAresults$loadings[[1]]), name="merge_mouse_V3b", path = "../data/")
genome_loadings(SDAresults$loadings[[1]][11,], label_both = FALSE, max.items = 25, label.size = 4, gene_locations = gene_annotations) + ylab("Gene Loading (Component 11)") + theme_minimal() + theme(legend.position = "none")

Genes from Late Onset Alzheimers Disease GWAS - expressed in macropgages (component 11)

grid.arrange(grobs=create_grob_list(fn = function(x){print_tsne(x,point_size = 0.5, predict = T)},
                                    input = c("Apoe","Trem2","Bin1","Ms4a6d","Ms4a6b","Ms4a6c","Ms4a7","Tyrobp","Cd33","Mef2c","Spi1","H2-Eb1")),
             nrow=3, heights = c(1,1,1))

Genes from Late Onset Alzheimers Disease GWAS - not specifically expressed in macropgages

grid.arrange(grobs=create_grob_list(fn = function(x){print_tsne(x,point_size = 0.5, predict = T)},
                                    input = c("Clu","Picalm","Abca7","Cd2ap","Zcwpw1","Nme8","Inpp5d","Sorl1","Rin3","Ptk2b")),
             nrow=3, heights = c(1,1,1))

Other genes

grid.arrange(grobs=create_grob_list(fn = function(x){print_tsne(x,point_size = 0.5, predict = T)},
                                    input = c("Lpl","Itm2c","Itm2b","App")),
             nrow=2, heights = c(1,1))

Not in dataset: EPAH1, CASS4

There are 20 GWAS hits which have loadings. For component 11 (macrophages), 8 of them are in the top 200 genes. This gives a p-value of ~1e-11

AD_genes <- read.table("../data/previous_studies/alzhemiers/alzheimers_genes.txt", stringsAsFactors = F)
AD_genes
#DGS2?, APP. PSEN1, TAU, PLD3 - unconfirmed, Pld4...
str(AD_genes$V2[!AD_genes$V2 %in% names(SDAresults$loadings[[1]][11,])])
AD_genes <- AD_genes$V2[AD_genes$V2 %in% names(SDAresults$loadings[[1]][11,])]

#AD_genes <- c("Apoe","Trem2","Bin1","Ms4a6d","Tyrobp","Cd33","Mef2c","Spi1","H2-Eb1","Clu","Picalm","Abca7","Cd2ap","Zcwpw1","Nme8","Inpp5d","Sorl1","Rin3","Ptk2b")

str(AD_genes)

tmp <- rank(-SDAresults$loadings[[1]][11,],F)
names(tmp) <- names(SDAresults$loadings[[1]][11,])

data.table(rank=tmp[AD_genes], gene=AD_genes)[order(rank)]

plot(sort(SDAresults$loadings[[1]][11,],T))
rug(tmp[AD_genes], col="red", lwd=0.3, ticksize = 0.1)

fisher.test(matrix(c(8,20-8,200-8,19262-(200-20+8)),nrow=2,ncol=2),alternative="greater")

Repeating this one sided fishers test for all components gives three results with significant bonferooni corrected hits. Component 8P has a single cell - in the macrophage cluster.

AZ_enrich <- component_enrichment(AD_genes, threshold = 200)

AZ_enrich_plot <- manhatten_plot(AZ_enrich, topn = 2, repel_force = 20, legend_position = c(0.75,0.75))

AZ_enrich_plot

saveRDS(AZ_enrich_plot, "../data/plots/AZ_enrich_plot.rds")

MQ Scores & Loadings

v11_loadings_plot <- genome_loadings(SDAresults$loadings[[1]][11,], label_both = FALSE, max.items = 10, label.size = 4, hide_unknown = T, gene_locations = gene_annotations, highlight_genes = c(AD_genes,"Ms4a7","Ms4a6c"), label_genes = c("Trem2","Apoe","Mef2c","H2-Eb1","Spi1","Bin1","Ms4a7","Cd33")) + ylab("Gene Loading (Component 11)") + theme_minimal() + theme(legend.position = "none")

C11_MQ_plot <- plot_grid(v11_loadings_plot, plot_cell_scores("V11", point_size = 2), ncol = 2, rel_widths = c(3,1))

saveRDS(C11_MQ_plot, "../data/plots/C11_MQ_plot.rds")

Max enriched GO term for each component (q<0.01)

GO_data_dt[qvalue<0.01][order(-Description)][, .SD[which.max(Enrichment)], by = Component][,..summarycols]

Which components have amyloid enrichments q<0.01

A: 24N, 49N, 32N, 3N, 21N, 26N, 40P, 11P, 16N

aka 9 out of the 13 somatic components

The exceptions being 10, 19, 37, 45

GO_data_dt <- readRDS("../data/go/GO_enrichment_dt.rds")
summarycols <- c("Component","Description","qvalue","geneID","Enrichment","ID")

GO_data_dt[grep("amyloid", Description)][qvalue<0.01][,..summarycols][order(qvalue)]

#GO_data_dt[grep("amyloid", Description)][, .SD[which.min(qvalue)],by=Component][order(qvalue)][,..summarycols][1:20]

Three components have amyloid as most enriched (for q<0.01)

26N, 49N, and 16N

GO_data_dt[qvalue<0.01][order(-Description)][, .SD[which.max(Enrichment)], by = Component][grep("amyloid", Description)][,..summarycols]

2 components have amyloid as 2nd highest enrichment

V24N and 21N

GO_data_dt[qvalue<0.01][Component=="V24N"][order(-Enrichment)][,..summarycols][1:5]
GO_data_dt[qvalue<0.01][Component=="V21N"][order(-Enrichment)][,..summarycols][1:5]

One component has amyloid as the most significant

Although mostly due to lack of anything else

GO_data_dt[qvalue<0.01][order(-Description)][, .SD[which.min(qvalue)], by = Component][grep("amyloid", Description)][,..summarycols]


MyersGroup/testisAtlas documentation built on Nov. 29, 2020, 9:21 p.m.