#library(ggplot2) #library(testisAtlas) #library(SDAtools) #devtools::load_all("~/Dropbox/Github/ggplot2/") library(ggplot2) library(SDAtools) #devtools::load_all("~/Dropbox/Github/SDAtools/") library(MoitfFinder) #devtools::load_all("~/Dropbox/Github/MotifFinder/") pkgload::load_all() library(data.table) library(cowplot) library(gridExtra) library(ggforce) library(ggrepel) library(RColorBrewer) library(ggnewscale) load2("../data/cache") load_component_orderings()
#from https://science.sciencemag.org/content/sci/363/6425/eaau1043.full.pdf ## TABLE 5A, Coding & Splice site ## Inlcuded in our list: # Syce1, ok # Ccnb1ip1, ok, GWS MATERNAL & JOINT, Medium paternal # Meiob, ok, GWS MATERNAL, weak paternal, but expressed ok zyg # Fbxo47, ok GWS JOINT # H2bfm, ok, mainly Hormad / X, but a little in L&Z # Msh4, Ok, GWS MATERNAL & JOINT, weak paternal # Hfm1, ok # Rnf212, ok # C14orf39, ok, = 4930447C04Rik, aka SIX6OS1 # Rad21l1, ok, = RAD21l # Hormad1, ok # Prdm9, ok # Hus1b, ok, GWS joint & MATERNAL # Sycp3, ok # EAPP, ok, MATERNAL, - Supp says intergentic, main says ms -see below?, GWS MATERNAL, weak paternal # Syce2, ok, MATERNAL & JOINT, weak paternal # Ctcfl, ok, next to Spo11 # Hsf2bp, ok # Smc1b, ok ## TABLE 5A Not included in our list: # MSFD7 (aka Slc49a3) = Mfsd7a, no detected expression, Previously CPLX1?? ~ 100kb away # CT45A9, no ortohlogue, many similar genes nearby (inc. SAGE1 aka Ints6l -testis specific, sertoli/LZ) # MAPT, 4444 in LD, in inversion HsInv0573 (hg18:chr17:40928985-42139672, hg38:chr17:45495836-46707123 hg19:chr17:43573202-44784489) # ANHX, no mouse homologue?, MATERNAL & JOINT only, v. testis specific # Prame, one to many orthologue to mouse # Fancb, undetecteable expression ## TABLE 5B ## NON CODING # KLHL21, ok - 3'utr, GWS MATERNAL only, joint medium # intergenic / Rnf212, ok (also in coding list) # NFKBIL1 intergenic, DDX39B-AS1, MATERNAL # Seh1l upstream , GWS MATERNAL, joint medium # AP1M2, intergenic, GWS MATERNAL, joint medium # intergeic, MATERNAL gene desert (BMP2) # Rnf212, covered above # SMEK1, ok - 3' UTR, = Ppp4r3a, GWS MATERNAL & JOINT # C17orf104, ok - (aka Meioc) - paper "Meioc maintains an extended meiotic prophase I in mice." # ANXA9, intergenic, GWS MATERNAL, medium paternal # Acyp2, syn, GWS MATERNAL & JOINT, maybe next door gene mouse=Tspyl1? # Fam178b, intergenic, GWS MATERNAL & JOINT, medium paternal # C11orf80, ok 3'UTR = Gm960 in mouse, GWS MATERNAL & JOINT # Sycp2, ok - 3'UTR # One intergenic near Eapp # One intergenic near Fancb # PASD1 no orthologue gwas_genes <- c("Syce1","Ccnb1ip1","Meiob","Fbxo47","H2bfm","Msh4","Hfm1","Rnf212","4930447C04Rik","Rad21l","Hormad1","Prdm9","Hus1b","Sycp3","Eapp","Syce2","Ctcfl","Hsf2bp","Smc1b","Klhl21","Ppp4r3a","Gm960","Sycp2","Meioc") # Kong et al : Rnf212, Cplx, Ccnb1ip1, C14orf39 = 4930447C04Rik, aka SIX6OS1, Smek1 = Ppp4r3a, Rad21l, Mh4, Ccdc42, Prdm9 gwas_genes_rr_enrich <- component_enrichment(gwas_genes, threshold = 300) c5_enrich_plot <- manhatten_plot(gwas_genes_rr_enrich[!component %in% gsub("V","",half_exclusion_comps)], legend_position = c(0.20,0.7)) head(gwas_genes_rr_enrich[!component %in% gsub("V","",half_exclusion_comps)][order(p.value)]) tmp <- single_component_enrichment(SDAresults$loadings[[1]][5,], pos=F, genes=gwas_genes)$ranks ggplot(data.table(rank=tmp, gene=names(tmp)), aes(rank, 1:24, label=gene))+ geom_vline(xintercept = 0, col="red") + geom_point() + xlim(-4000,19264) + geom_label_repel(force = 10, nudge_x = -1000) + xlab("Rank in Component 5") + ylab("")
tmp <- data.table(loading = sort(-SDAresults$loadings[[1]][5,],T), gene = names(sort(-SDAresults$loadings[[1]][5,],T))) tmp$rank <- seq_along(tmp$loading) tmp[gene %in% gwas_genes, KO := "Recombination GWAS hits"] c5_rug_plot <- ggplot(tmp, aes(rank, -loading,colour=KO)) + geom_point(colour="black") + geom_rug(data=tmp[gene %in% gwas_genes], sides = "b", length=0.15) + scale_y_reverse() + ylab("Component\n 5 Gene Loading") + xlab("Rank of gene by loading") + scale_color_brewer(palette = "Set1")+ guides(colour = guide_legend(override.aes = list(size=5))) + theme(legend.position = c(0.4,0.6)) + labs(colour=NULL) c5_rug_plot
v5_loadings_plot <- genome_loadings(SDAresults$loadings[[1]][5,], label_both = FALSE, max.items = 20, label.size = 3.5, gene_locations = gene_annotations, hide_unknown = T, highlight_genes = gwas_genes,label_genes = c("Smc1b","Gm960","Hfm1","Sycp2","Meioc","Meiob","Sycp3")) + ylab("Gene Loadings (Component 5)") + scale_y_reverse() v5_loadings_plot
tsne_plot <- print_tsne(5, curve = T, stages = T, point_size = 0.75) + ggtitle("") tsne_plot
matrix_plot <- ggdraw() + draw_image("../../figures/matrix_decomp_cells.png", clip="on") sparsity_plot <- readRDS("../data/plots/sparsity_plot.rds") gene_loading_sparsity <- readRDS("../data/plots/gene_loading_sparsity.rds") left <- plot_grid(matrix_plot, tsne_plot, c5_enrich_plot, rel_heights = c(0.4,1.5,0.6), ncol=1, labels = c("A","D","F"), label_size = 25) sparsity <- plot_grid(sparsity_plot, gene_loading_sparsity, ncol=2, rel_widths = c(5,4), labels = c("B","C"), label_size = 25) right <- plot_grid(sparsity, v5_loadings_plot, rel_heights = c(0.6, 1), ncol=1, labels = c("","E"), label_size = 25) pdf(height=10, width=13, file="../results/Figure_SDA_Intro.pdf") plot_grid(left, right, ncol=2) dev.off() structure <- readRDS("../data/plots/structure_plot.rds") top_genes_plot <- readRDS("../data/plots/top_genes_plot.rds") GO_plot <- readRDS("../data/plots/GO_plot.rds") scores_pseudotime <- readRDS("../data/plots/scores_pseudotime.rds") left <- plot_grid(scores_pseudotime, top_genes_plot, rel_heights = c(1,2), ncol=1, labels = c("A","C"), label_size = 25) right <- plot_grid(structure, GO_plot, rel_heights = c(3,4), ncol=1, labels = c("B","D"), label_size = 25) pdf(height=16, width=15, file="../results/Figure_SDA_Overlaping_Scores.pdf") plot_grid(left, right, ncol=2, rel_widths = c(4,5)) dev.off()
leg <- ggplot(data.table(x=1,y=1,z="Recombination GWAS Hit (Halldorsson et al 2019)"), aes(x,y, colour=z)) + geom_point() + scale_colour_manual(values='red', name="") + theme_minimal() pdf("../results/C5_loadings.pdf", width = 8.3, height = 5) plot_grid(v5_loadings_plot, get_legend(leg), ncol=1, rel_heights = c(1,0.1)) dev.off() v5_loadings_plot + get_legend(leg) scale_colour_manual(name="", values=c(myline1="red", myline2="blue", myline3="purple"))
ggplot(data.table(Sparsity=apply(SDAresults$pips[[1]], 1, function(x) mean(x<0.5)), Component=factor(names(sort(component_labels)), levels=unique(names(sort(component_labels))[component_order_all]))), aes(Component, Sparsity)) + geom_point() + coord_flip()
v38_loadings_plot <- genome_loadings(SDAresults$loadings[[1]][38,], label_both = FALSE, max.items = 10, label.size = 3, hide_unknown = T, gene_locations = gene_annotations) + ylab("Gene Loading (Component 38)")
tmp <- gene_expression_pseudotime(c("Rhox2h")) tmp[group=="Hormad1", Hormad1_KO := TRUE] tmp[group!="Hormad1", Hormad1_KO := FALSE] Hormad1_gxe <- ggplot(tmp[PseudoTime>14500], aes(-PseudoTime, value, colour=Hormad1_KO)) + geom_point(size=0.5) + theme_minimal() + ylab("Rhox2h Gene Expression") + xlab("Pseudotime") + theme(legend.position = c(0.8,0.7), legend.background = element_rect(colour="black")) + scale_color_brewer(palette = "Set1",name="Hormad1 KO?") pdf(height=5, width=5*1.7, file="../results/Rhox2h.pdf") Hormad1_gxe dev.off() tmp <- gene_expression_pseudotime(c("A830018L16Rik","Rhox2h")) tmp[group=="Hormad1", Hormad1_KO := TRUE] tmp[group!="Hormad1", Hormad1_KO := FALSE] Hormad1_gxe2 <- ggplot(tmp[PseudoTime>14500], aes(-PseudoTime, value, colour=Hormad1_KO)) + geom_point(size=0.5) + theme_minimal() + ylab("Gene Expression") + xlab("Pseudotime") + theme(legend.position = c(0.8,0.3), legend.background = element_rect(colour="black"), strip.text.x = element_text(size = 12)) + scale_color_brewer(palette = "Set1",name="Hormad1 KO?") + facet_wrap(~Gene, ncol=1) pdf(height=5, width=6*1.7, file="../results/Hormad1_two.pdf") plot_grid(v38_loadings_plot, Hormad1_gxe2) dev.off()
v25_loadings_plot <- genome_loadings(SDAresults$loadings[[1]][25,], label_both = FALSE, max.items = 10, label.size = 3, hide_unknown = T, gene_locations = gene_annotations) + ylab("Gene Loading (Component 25)") + scale_y_reverse()
# from alzheimers.Rmd AZ_enrich_plot <- readRDS("../data/plots/AZ_enrich_plot.rds")
c38 <- plot_grid(v38_loadings_plot + scale_size_continuous(range = c(1, 3)), plot_cell_scores("V38", point_size = 1), ncol = 2, rel_widths = c(3,1)) c25 <- plot_grid(v25_loadings_plot + scale_size_continuous(range = c(1, 3)), plot_cell_scores("V25") + scale_x_reverse(), ncol = 2, rel_widths = c(3,1)) pdf(height=5, width=5*1.7, file="../results/C38.pdf") c38 dev.off() #c11 <- plot_grid(v11_loadings_plot, v11_scores, ncol = 2, rel_widths = c(3,1)) set.seed(87162) pdf(width = 12, height = 6, file = "../results/Figure_9_AtoD.pdf") plot_grid(plotlist = list(c38, Hormad1_gxe, c25, AZ_enrich_plot + theme(axis.text.x=element_text(size=4))), rel_widths = c(3,2), labels = "AUTO") dev.off()
median: 18,499
download.file("https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6946/E-MTAB-6946.processed.3.zip", destfile = "../data/previous_studies/Ernst/cellranger_metadata.zip") unzip("../data/previous_studies/Ernst/cellranger_metadata.zip") ernst <- fread("../data/previous_studies/Ernst/cell_metadata.txt") str(ernst) median(ernst$total_counts) mean(ernst$total_counts)
tmp <- data.table(SDAresults$scores, cell=rownames(SDAresults$scores)) tmp$Hormad <- grepl("Hormad",tmp$cell) tmp$WT <- grepl("WT",tmp$cell) tmp$experiment <- unlist(strsplit(tmp$cell, split = "\\."))[c(F,T)] tmp$cell_id <- 1:nrow(tmp) ggplot(tmp, aes(cell_id, V12, colour=experiment)) + geom_point() + theme(axis.text.x = element_blank()) ggplot(tmp[grepl("Hormad",experiment)], aes(cell_id, V12, colour=experiment)) + geom_point() + theme(axis.text.x = element_blank())
# MiSeq = SP*, WT 28117, MLH3-KO tmp[,MiSeq := FALSE] tmp[grepl("SPC.|SPD.|28117",experiment), MiSeq := TRUE] # By date jittered tmp[grepl("WT",experiment)] %>% mutate(experiment = factor(experiment, levels = unique(tmp[WT==T]$experiment)[c(9,1:8)])) %>% ggplot(aes(V22, experiment, colour=experiment)) + geom_jitter(size=1, alpha=0.5, stroke=0) + scale_color_brewer(palette = "Set1") + theme_minimal() + theme(legend.position = "none", axis.title.y=element_blank() ) + xlab(paste0("Component ","V22","\n Cell Score")) # MiSeq tmp %>% mutate(experiment = factor(experiment, levels = tmp[,mean(V22),by=experiment][order(V1)]$experiment)) %>% ggplot(aes(V22, experiment, colour=MiSeq)) + geom_jitter(size=1, alpha=0.5, stroke=0) + scale_color_brewer(palette = "Set1") + theme_minimal() + theme(axis.title.y=element_blank() ) + xlab(paste0("Component ","V22","\n Cell Score")) # MiSeq aggregated ggplot(tmp,aes(V22, MiSeq, colour=MiSeq)) + geom_jitter(size=1, alpha=0.5, stroke=0) + scale_color_brewer(palette = "Set1") + theme_minimal() + theme(axis.title.y=element_blank() ) + xlab(paste0("Component ","V22","\n Cell Score")) # MiSeq Density plot ggplot(tmp, aes(V22, colour=MiSeq)) + geom_density() + scale_color_brewer(palette = "Set1") + theme_minimal() + theme(axis.title.y=element_blank() ) + xlab(paste0("Component ","V22","\n Cell Score")) rpsl_genes <- grep("Rps|Rpl", names(SDAresults$loadings[[1]][22,]), value=T) rpsl_genes <- rpsl_genes[!grepl("-ps",rpsl_genes)] manhatten_plot(component_enrichment(rpsl_genes, threshold = 300), topn = 5)
v22_loadings_plot <- genome_loadings(SDAresults$loadings[[1]][22,], label_both = FALSE, max.items = 20, label.size = 4, hide_unknown = T, gene_locations = gene_annotations) + ylab("Gene Loading (Component 22)") + scale_y_reverse() + scale_size_continuous(range = c(1, 3)) v22_cell_scores <- tmp %>% mutate(experiment = factor(experiment, levels = tmp[,mean(V22),by=experiment][order(V1)]$experiment)) %>% ggplot(aes(V22, experiment, colour=MiSeq)) + geom_jitter(size=1, alpha=0.5, stroke=0) + scale_color_brewer(palette = "Set1") + theme_minimal() + theme(axis.title.y=element_blank() ) + xlab(paste0("Component ","V22","\n Cell Score")) v12_cell_scores <- ggplot(tmp,aes(V12, experiment, colour=grepl("Hormad",experiment))) + geom_jitter(size=1, alpha=0.5, stroke=0) + scale_color_brewer(palette = "Set1", guide=F) + theme_minimal() + theme(axis.title.y=element_blank()) + xlab(paste0("Component ","V12","\n Cell Score")) v12_loadings_plot <- genome_loadings(SDAresults$loadings[[1]][12,], label_both = FALSE, max.items = 20, label.size = 4, hide_unknown = T, gene_locations = gene_annotations) + ylab("Gene Loading (Component 12)") + scale_y_reverse() + scale_size_continuous(range = c(1, 3))
#GO_data <- readRDS("../data/GO_enrichment_dt.rds") # unique(ETC_genes$"MGI Gene/Marker ID")[unique(ETC_genes$"MGI Gene/Marker ID") %in% colnames(SDAresults$loadings[[1]])] # # ETC_genes <- fread("../data/GO_term_summary_20190318_211630.txt") # unique(ETC_genes$"MGI Gene/Marker ID") etc_genes <- grep("Uqc|Cox|Ndu",colnames(SDAresults$loadings[[1]]),value = T) etc_genes <- grep("-ps",etc_genes, invert = T,value = T) library(biomaRt) ensembl <- useMart("ENSEMBL_MART_ENSEMBL", host = "www.ensembl.org", dataset = "mmusculus_gene_ensembl") mapTab2 <- getBM(attributes = c("external_gene_name",'description'), filter = "external_gene_name", values = etc_genes, mart = ensembl, uniqueRows=TRUE) etc_genes_clean <- data.table(mapTab2)[!grepl("assembly|like",description)]$external_gene_name etc_genes_assemble <- data.table(mapTab2)[grepl("assembly",description)]$external_gene_name etc_enrich <- component_enrichment(etc_genes_clean) etc_enrich_plot <- manhatten_plot(etc_enrich[!component %in% gsub("V","",half_exclusion_comps)], topn=1, legend_position=c(0.25,0.6), repel_force = 10) + ggtitle("Enrichment of electron transport\n chain proton pump genes") set.seed(142) v9_loadings_plot <- genome_loadings(SDAresults$loadings[[1]][9,], label_both = FALSE, label.repel = 2, max.items = 20, label.size = 4, hide_unknown = T, gene_locations = gene_annotations, highlight_genes = etc_genes_clean) + ylab("Gene Loading (Component 9)") + scale_y_reverse() + scale_size_continuous(range = c(1, 3)) + expand_limits(y=-0.3) pdf("../results/S6B_NonMeiotic_Components.pdf", height = 12, width=15) plot_grid(v22_loadings_plot, v22_cell_scores, v12_loadings_plot, v12_cell_scores, v9_loadings_plot, etc_enrich_plot, rel_widths = c(2,1), ncol=2, labels = "AUTO") dev.off() pdf(height=6, width=6*(16/9), file="../results/MiSeq.pdf") plot_grid(v22_loadings_plot, v22_cell_scores, rel_widths = c(3,2)) dev.off() pdf(height=6, width=6*(16/9), file="../results/ETC.pdf") plot_grid(v9_loadings_plot, etc_enrich_plot, rel_widths = c(3,2)) dev.off()
tricolour_tsne(c("Gfra1","Upp1","Id4")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Id4")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Fli1")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Utf1")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Glis3")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Batf")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Foxf1")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Rhox10")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Lin28a")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction")
tricolour_tsne(c("Gfra1","Upp1","Zbtb12")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Fbxl14")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Furin")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Dvl3")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Csrp1")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction") tricolour_tsne(c("Gfra1","Upp1","Lmtk2")) + facet_zoom(xlim=c(-5,3), ylim=c(35,43), zoom.size = 2) + ggtitle("SDA Prediction")
mouse_ages <- data.table( experiment=c("CNP_2016-12-15", "CNP_2017-01-05", "Cul4a_2017-03-28", "Cul4a_2017-03-30", "Hormad1_1_A", "Hormad1_2_A", "Hormad1_3_A", "Mlh3_1190", "Mlh3_1191", "Mlh3_1192", "Mlh3_300", "Mlh3_800", "SPCII_1", "SPCI_1", "SPD_1", "SPG_1", "WT_2016-11-08", "WT_2016-11-10", "WT_2016-11-11", "WT_2016-11-15", "WT_2016-11-16", "WT_2016-11-17", "WT_2016-11-21", "WT_2016-11-22", "WT_28117", "mj_3"), mouse_age=c( # CNP 2 * month + 14, 2 * month + 16, # Cul4a 1 * month + 18, 1 * month + 21, # Hormad 4 * month + 3, 4 * month + 3, 4 * month + 0, # Mlh3 2 * month + 21, 2 * month + 22, 2 * month + 23, 8 * month + 30, 8 * month + 30, #SPC 2 * month + 21, 2 * month + 21, 2 * month + 21, 0 * month + 7, #WT 5 * month + 5, 5 * month + 7, 3 * month + 27, 4 * month + 0, 3 * month + 1, 3 * month + 2, 9 * month + 14, 7 * month + 21, 4 * month + 0, #MJ 4 * month + 2) ) tmp2 <- data.table(SDAresults$scores, cell=rownames(SDAresults$scores)) tmp2 <- melt(tmp2, id="cell") tmp2$experiment <- unlist(strsplit(tmp$cell,split = "\\."))[c(F,T)] tmp2 <- tmp2[mouse_ages, on="experiment"] tmp2[order(mouse_age), cell_id_byage:= 1:nrow(tmp2)] age_comp_means <- tmp2[,median(value),by=c("experiment","mouse_age","variable")] age_comp_means[,cor(mouse_age, V1, method='spearman'),by=variable][order(abs(V1))] qplot(rank(age_comp_means[variable=="V44"]$mouse_age), rank(age_comp_means[variable=="V44"]$V1))
for(i in 1:50){ print( ggplot(tmp, aes(cell_id_byage, get(paste0("V",i)), colour=experiment)) + geom_point() + theme(axis.text.x = element_blank(), legend.position = "none") ) }
sox30 <- readxl::read_xlsx("~/Downloads/TableS1.xlsx", sheet = "s1.4", range = "D2:D62") str(sox30$`Class 2 target genes (60)`) sox30_subset <- sox30$`Class 2 target genes (60)`[sox30$`Class 2 target genes (60)` %in% colnames(SDAresults$loadings[[1]])] sox30DEG <- readxl::read_xlsx("~/Downloads/TableS1.xlsx", sheet = "s1.1", skip = 1) sox30DEG <- data.table(sox30DEG)[order(p_value)][1:500]$Genes sox30DEG <- sox30DEG[sox30DEG %in% colnames(SDAresults$loadings[[1]])] sox30_enrich <- component_enrichment(sox30_subset) sox30DEG_enrich <- component_enrichment(sox30DEG) manhatten_plot(sox30_enrich, legend_position = c(0.2,0.6)) manhatten_plot(sox30DEG_enrich, legend_position = c(0.2,0.6))
tmp <- cell_data tmp[, group := factor(group, levels=c("WT","SPG","SPCI","SPCII","SPD","Hormad1","Mlh3","Cul4a","CNP"))] tnse_groups <- ggplot(tmp[group!="mj"], aes(Tsne1_QC1, Tsne2_QC1, colour=group)) + geom_point(data = transform(tmp, group = NULL), colour = "grey85", size=0.5, stroke=0) + geom_point(size=0.5, stroke=0) + scale_color_manual(values = c(brewer.pal(7,"Dark2"),"red", "black")) + facet_wrap(~group) + labs(x="t-SNE 1", y="t-SNE 2") + theme_minimal() + theme(legend.position = "none") pdf("../results/tsne_groups.pdf", width=16/2, height=9/2) tnse_groups dev.off()
marker_genes2 <- c("Dazl", "Dmrt1","Sycp3","Shcbp1l","Cdca8","Aurka","Nxt1","Acrv1","Lyzl1","Hemgn","Prm1","Tssk6","Oaz3","Aard","Clu","Defb19","Ptgds","Insl3","Lcn2","Col1a2") pdf("../results/markers_from_FIG1.pdf", width=6, height=9) plot_grid(plotlist = create_grob_list(fn = function(x){print_tsne(x,point_size = 0.1, predict = T)}, input = marker_genes2), ncol = 4, label_size=16) dev.off()
# from Meiotic_components.Rmd trip13_enrich_plot <- readRDS("../data/plots/trip13_enrich_plot.rds") c38_rug_plot <- readRDS("../data/plots/c83_rug_plot.rds") hormad <- plot_grid(c38_rug_plot, trip13_enrich_plot, labels = "AUTO", label_size = 25, nrow = 2) pdf(height=6.5, width=6*1.7, file="../results/hormad_enrich.pdf") hormad dev.off()
# from MSCI.Rmd msci_tsne <- readRDS("../data/plots/msci_tsne.rds") msci <- plot_grid(print_tsne(38, curve = T, point_size = 0.5) + ggtitle(""), msci_tsne+ ggtitle(""), labels=c("C","D"), label_size = 25) pdf(height=6, width=6*1.7, file="../results/msci_tsne.pdf") msci dev.off()
# Macrophages plot inc Hormad enrichment C11_MQ_plot <- readRDS("../data/plots/C11_MQ_plot.rds")
c40_loadings_plot <- genome_loadings(SDAresults$loadings[[1]][40,], label_both = FALSE, max.items = 10, label.size = 4, gene_locations = gene_annotations, hide_unknown = T, highlight_genes = c("Cyp17a1","Hsd3b6","Star","Cyp11a1","Hsd17b3"), label_genes = c("Cyp17a1","Hsd3b6","Star","Cyp11a1","Hsd17b3")) + ylab("Gene Loadings (Component 40)") + scale_size_continuous(range = c(1, 3)) mito_plot <- ggdraw() + draw_image("../results/testosterone-synth_wdoi.png", clip="on") leydig <- plot_grid(c40_loadings_plot, mito_plot, rel_widths = c(2,1))
pdf("../results/Fig_S6_top.pdf", height=13, width=20) plot_grid(hormad, msci, C11_MQ_plot, leydig, nrow = 2, labels = c("","","E","F"), label_size = 25, scale=0.9) dev.off()
loadings_plot <- genome_loadings(SDAresults$loadings[[1]][38,], label_both = FALSE, max.items = 10, label.size = 4, hide_unknown = T) + ylab("Gene Loadings (Component 38)") + theme_minimal() + theme(legend.position = "none") loadings_plot pdf(height=5, width=5*1.7, file="../results/C38.pdf") loadings_plot dev.off() pdf(height=5, width=5*1.7, file="../results/C5_tsne.pdf") tsne_plot + theme(legend.position = "right" ) + guides(colour = guide_legend(override.aes = list(ncol=1))) dev.off() pdf(height=5, width=5*1.7, file="../results/C5_loadings.pdf") v5_loadings_plot dev.off() pdf(height=5, width=5*1.7, file="../results/C5_enrich.pdf") c5_enrich_plot dev.off() pdf(height=5, width=5*1.7, file="../results/sparsity.pdf") sparsity_plot dev.off() jpeg("../results/component_tsne_all_presentation.jpeg", height=3000, width=3000*1.7, res = 200) component_tsne dev.off() pdf(height=5, width=5*1.7, file="../results/Prdm9_impute.pdf") imputed_vs_raw(c("Prdm9"), facet = F) + theme_minimal(base_size = 18) + theme(legend.position = "none") + ggtitle("Prdm9") dev.off() imputed_vs_raw(c("Piwil1"), facet = F) + theme_minimal(base_size = 18) + theme(legend.position = "none") + ggtitle("Piwil1") imputed_vs_raw(c("Ssxb1"), facet = F) + theme_minimal(base_size = 18) + theme(legend.position = "none") + ggtitle("Ssxb1") pdf(height=5, width=5*1.7, file="../results/Zfp37_impute.pdf") imputed_vs_raw(c("Zfp37"), facet = F) + theme_minimal(base_size = 18) + theme(legend.position = "none") + ggtitle("Zfp37") dev.off() pdf(height=5, width=5*1.7, file="../results/Gapdhs_impute.pdf") imputed_vs_raw(c("Gapdhs"), facet = F) + theme_minimal(base_size = 18) + theme(legend.position = "none") + ggtitle("Gapdhs") dev.off() pdf(height=5, width=5*1.7, file="../results/Lrrd1_impute.pdf") imputed_vs_raw(c("Lrrd1"), facet = F) + theme_minimal(base_size = 16) + theme(legend.position = "none") + ggtitle("Lrrd1") dev.off() GO_data <- readRDS("../data/GO_enrichment.rds") library(ggrepel) pdf(height=5, width=5*1.7, file="../results/V5N_enrich.pdf") go_volcano_plot(component = "V5N", label_size = 3, OR_threshold = 10) dev.off()
component_tsne <- plot_grid(plotlist = create_grob_list(fn = function(x) print_tsne(x) + theme(plot.title = element_text(size=24)), input = c(11,32,40,45,38,31,5,13,42,20,30,35,15,17,18)), nrow=3) pdf("../results/component_tsne_sel.pdf", width=24, height=13.5) component_tsne dev.off()
devtools::load_all("~/Dropbox/Github/SDAtools/") genome_loadings(SDAresults$loadings[[1]][5,], label_both = FALSE, max.items = 100, label.size = 4, hide_unknown = T, gene_locations = gene_annotations) + xlim(0,2e8)
"A Gene Regulatory Program for Meiotic Prophase in the Fetal Ovary" https://doi.org/10.1371/journal.pgen.1005531
Enriched in 5N
library(readxl) fetal_ovary <- read_excel("../data/previous_studies/Soh/journal.pgen.1005531.s008.XLSX", sheet = 1) union_ovary <- as.character(fetal_ovary$Row.names[fetal_ovary$Row.names %in% colnames(SDAresults$loadings[[1]])]) str(union_ovary) ovary_enrich <- component_enrichment(union_ovary) library(ggrepel) ggplot(ovary_enrich[!Component %in% gsub("V","",ordering[half_exclusions])], aes(Component, -log(p.value,10), label=Component)) + geom_point() + geom_label_repel(data=ovary_enrich[-log(p.value,10) > 15]) + geom_hline(yintercept = -log(0.05/100,10), col="red", size=0.2) + ggtitle("Manhattan Plot - Enrichment for Soh et al Meiotic Prophase genes in Ovary") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Current schematic doesn't use real data? But it's hard to colour each column/row seperately to match the components across matrices in R. Also hard to represent both positive and negative with a single colour per comopnent..
library(ComplexHeatmap) tmp <- simulate_2D_data(n_individuals = 20, n_variables = 15, n_components = 5, sparsity = 0.7) Heatmap(tmp$Y, cluster_rows = F, cluster_columns = F, col = circlize::colorRamp2(c(-3, 0, 3), c("#E41A1C", "white", "#377EB8")), rect_gp = gpar(col = "grey", lty = 1, lwd = 2), show_heatmap_legend = F, column_title = "Genes", row_title = "Cells") Heatmap(tmp$A, cluster_rows = F, cluster_columns = F, col = circlize::colorRamp2(c(-3, 0, 3), c("#E41A1C", "white", "#377EB8")), rect_gp = gpar(col = "grey", lty = 1, lwd = 2), show_heatmap_legend = F, width = unit(3.5, "cm"), column_title = "Components", row_title = "Cells")
Heatmap(tmp$X, cluster_rows = F, cluster_columns = F, col = circlize::colorRamp2(c(-3, 0, 3), c("#E41A1C", "white", "#377EB8")), rect_gp = gpar(col = "grey", lty = 1, lwd = 2), show_heatmap_legend = F, column_title = "Genes", row_title = "Components")
sg_genes <- c("Gfra1","Id4","Nanos3","Glis3","Sipa1","Zbtb16","Ctcfl","Egr4","Upp1","Afp","Lin28a","Foxf1","Foxo1","Rhox10","Fbxo32","Morc1","Irf4","Dppa4","Dctd","Pramef12","Erap1","Ccdc141","Sohlh1","Nefm","Sbk1","Prtg") Spermatogonia_plot <- ggplot(melt_genes(sg_genes, predict = T)[PseudoTime>15500], aes(-PseudoTime, Expression, group=Gene, colour=Gene=="Ctcfl")) + geom_point(size=0.1) + geom_smooth(method = "gam",formula = y ~ s(x, k = 50), se=FALSE, colour="black") + ylab("Gene Expression") + xlab("Pseudotime") + theme_minimal() + facet_wrap(~Gene, scales = "free_y") + theme(legend.position ="none") + ggtitle("Early Spermatogonial Genes, with Pre-leptotene Ctcfl for reference") Spermatogonia_plot pdf(width = 8, file="../results/Spermatogonia_candidate_markers.pdf") Spermatogonia_plot plot_grid(plotlist = create_grob_list(fn = function(x){print_tsne(x,point_size = 0.1)}, input = sg_genes), ncol = 4, label_size=16) dev.off()
data <- readRDS("../data/merged_mouse_normalised_tsne-QC_V3.rds") library_size <- data$library_size data <- data$dge retina <- fread("../data/previous_studies/mccarroll/GSM1626798_P14Retina_6_gene_exon.dge.summary.txt") retina2 <- fread("../data/previous_studies/mccarroll/GSM1626794_P14Retina_2_gene_exon.dge.summary.txt") retina <- rbind(retina2, retina) bipolar <- fread("../data/previous_studies/biopolar/Bipolar_neurons_gene_exon.dge.summary.txt") pbmc10x <- fread("../data/previous_studies/10X_genomics/293t/293t_10X_gene_exon.dge.summary.txt") lib_sizes <- rbind(data.table(counts = library_size, exp="conrad"), data.table(counts = retina$NUM_TRANSCRIPTS, exp="mccarrol"), data.table(counts = bipolar$NUM_TRANSCRIPTS, exp="bipolar"), data.table(counts = pbmc10x$NUM_TRANSCRIPTS, exp="293t10x")) cell_sums <- lib_sizes[order(-counts)] cell_sums[, cumulative_reads := cumsum(counts), by=exp] cell_sums[, cell_index := seq_len(.N), by=exp] ggplot(cell_sums, aes(cell_index, cumulative_reads, colour = exp)) + geom_line() + theme(legend.position="bottom") ggplot(cell_sums, aes(counts)) + geom_histogram(bins=100) + facet_wrap(~exp, scales = "free_y") + xlim(0,5000) ggplot(cell_sums, aes(log(counts))) + geom_histogram(bins=100) + facet_wrap(~exp)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.