tss_seqs <- seqinr::read.fasta("../data/motifs/promoter_sequences/all_genes_tss_1000.fasta", forceDNAtolower = FALSE, as.string = TRUE) tss_seqs_s <- toupper(as.character(tss_seqs)) names(tss_seqs_s) <- gsub("\\(-\\)|\\(\\+\\)","",names(tss_seqs)) tss_seqs_s <- substr(tss_seqs_s,500,2000) #CG_count <- sapply(tss_seqs_s,function(x) stringr::str_count(x,"CG")) CG_locations <- sapply(tss_seqs_s,function(x) stringr::str_locate_all(x,"CG")[[1]][,1]) CG_count <- sapply(CG_locations, function(x) sum(x>450 & x<1000)) hist(CG_count, breaks=1000) abline(v=20,col="blue", lwd=3) abline(v=45,col="blue", lwd=3) hist(unlist(CG_locations), breaks=100) abline(v=500, col="blue", lwd=3)
best_denovo <- readRDS("../data/motifs/best_denovo_V3.rds") motif_matches <- readRDS("../data/motifs/motif_matches_v3.rds") # Load Regprobs HM11_regprobs <- readRDS("../data/motifs/HocomocoV11_regprobs_matrix_core_and_extra.rds") str(HM11_regprobs) # load motif categories motif_groups <- read.table("../data/motifs/motif_results_groups.csv", fill=T, sep = ",", stringsAsFactors=F) motif_groups$group <- c(1:(nrow(motif_groups))) #c(NA,1:(nrow(motif_groups)-1)) motif_groups <- data.table(melt(motif_groups, id.vars="group"))[value!=""] regprobs_matrix <- HM11_regprobs[,best_denovo$Target_ID] colnames(regprobs_matrix) <- sapply(strsplit(colnames(regprobs_matrix), "_"),function(x) x[1]) regprobs_matrix_all <- HM11_regprobs[,motif_matches[Target.Name %in% motif_groups[!is.na(group)]$value]$Target_ID] colnames(regprobs_matrix_all) <- sapply(strsplit(colnames(regprobs_matrix_all), "_"),function(x) x[1]) str(regprobs_matrix) str(regprobs_matrix_all)
add custom (new) motifs to matrix
results_custom <- readRDS("../data/motifs/MF_results_custom_V3b.rds") regprobs_matrix_custom <- sapply(results_custom, function(x) x$dt$regprob) rownames(regprobs_matrix_custom) <- results_custom[[1]]$dt$sequence regprobs_matrix_custom <- regprobs_matrix_custom[which(rownames(regprobs_matrix_custom) %in% names(tss_seqs_s)), ] # add CpG Motif to matrix regprobs_matrix_custom <- cbind("CpG"=sqrt(CG_count)[rownames(regprobs_matrix_custom)]/max(CG_count), regprobs_matrix_custom)
SDAresults$loadings_split <- split_loadings()#[rownames(HM11_regprobs),] loadings_subset <- t(SDAresults$loadings[[1]][,rownames(regprobs_matrix_custom)])
qplot(1:50,tapply(CG_count[names(sort(loadings_subset[,"V5"],T))], cut(seq_along(CG_count[names(loadings_subset[,"V5"])]), 50), function(x) mean(x,na.rm=T)), ylab="Mean CpG count", xlab="Binned Component 5 loadings") summary(lm(loadings_subset[,"V5"] ~ CG_count[rownames(loadings_subset)]*(loadings_subset[,"V5"]>0)))
# plot raw associations # regrank_matrix <- apply(regprobs_matrix_custom, 2, rank) plot(rank(SDAresults$loadings[[1]][5,rownames(regprobs_matrix)]),rank(regprobs_matrix[,"NRF1"])) plot(rank(SDAresults$loadings[[1]][5,rownames(regprobs_matrix)]),rank(regprobs_matrix[,"SP2"])) plot(rank(SDAresults$loadings[[1]][42,rownames(regprobs_matrix)]),rank(regprobs_matrix[,"MYBA"])) plot(rank(SDAresults$loadings[[1]][11,rownames(regprobs_matrix)]),rank(regprobs_matrix[,"SPI1"])) plot(rank(SDAresults$loadings[[1]][17,rownames(regprobs_matrix)]),rank(regprobs_matrix_custom[,"ATF1"]))
# For a give component, display binned associations library(testisAtlas) motif_order <- c("CpG","SP2","TAF1","GABPA","NRF1","ETV5","TYY1","NFYA","ZN143","MYBA","STRA8","RFX6","ATF1","RARA","TBP","SPI1","CREMT") binned_regprobs_custom <- binned_mean_regprob(regprobs = regprobs_matrix_custom[,motif_order]) plot_component_regprobs(11, binned_regprobs_custom) plot_component_regprobs(5, binned_regprobs_custom) plot_component_regprobs(42, binned_regprobs_custom) plot_component_regprobs(28, binned_regprobs_custom) plot_component_regprobs(30, binned_regprobs_custom) plot_component_regprobs(17, binned_regprobs_custom)
compare to the mean loadings per bin of loading
bin_factor <- cut(seq_along(regprobs_matrix[,1]), 50) tmp <- sort(SDAresults$loadings[[1]][30,]) tmp <- tmp[names(tmp) %in% rownames(regprobs_matrix)] plot(tapply(tmp, bin_factor, mean), main="30") tmp <- sort(SDAresults$loadings[[1]][5,]) tmp <- tmp[names(tmp) %in% rownames(regprobs_matrix)] plot(tapply(tmp, bin_factor, mean), main="5") tmp <- sort(SDAresults$loadings[[1]][17,]) tmp <- tmp[names(tmp) %in% rownames(regprobs_matrix)] plot(tapply(tmp, bin_factor, mean), main="17") tmp <- sort(SDAresults$loadings[[1]][42,]) tmp <- tmp[names(tmp) %in% rownames(regprobs_matrix)] plot(tapply(tmp, bin_factor, mean), main="42")
library(ComplexHeatmap) library(RColorBrewer) plot_meanprob_bycomp(binned_regprobs_custom) Heatmap(t(binned_regprobs_custom["SP2",,component_order]), col=rev(colorRampPalette(brewer.pal(11, "RdYlBu"))(100)), cluster_rows = FALSE, cluster_columns = FALSE, column_title = "SP2") Heatmap(t(binned_regprobs_custom["RFX6",,component_order]), col=rev(colorRampPalette(brewer.pal(11, "RdYlBu"))(100)), cluster_rows = FALSE, cluster_columns = FALSE, column_title = "RFX2") Heatmap(t(binned_regprobs_custom["GABPA",,component_order]), col=rev(colorRampPalette(brewer.pal(11, "RdYlBu"))(100)), cluster_rows = FALSE, cluster_columns = FALSE, column_title = "GABPA") Heatmap(t(binned_regprobs_custom["MYBA",,component_order]), col=rev(colorRampPalette(brewer.pal(11, "RdYlBu"))(100)), cluster_rows = FALSE, cluster_columns = FALSE, column_title = "MYBA") Heatmap(t(binned_regprobs_custom["ATF1",,component_order]), col=rev(colorRampPalette(brewer.pal(11, "RdYlBu"))(100)), cluster_rows = FALSE, cluster_columns = FALSE, column_title = "ATF1*") mean_prob_matrix <- cbind(binned_regprobs_custom[,50,],binned_regprobs_custom[,1,]) colnames(mean_prob_matrix) <- colnames(SDAresults$loadings_split) mean_prob_matrix <- t(mean_prob_matrix[,ordering[-half_exclusions]]) rownames(mean_prob_matrix) <- paste(rownames(mean_prob_matrix), component_names[-half_exclusions]) NMF::aheatmap(scale(mean_prob_matrix), Rowv = NA, hclustfun = "ward.D2")
regprob_enrich <- function(i,motif,n=500){ a=regprobs_matrix_custom[names(sort(SDAresults$loadings_split[rownames(regprobs_matrix_custom),i],T))[1:n],motif] b=regprobs_matrix_custom[names(sort(SDAresults$loadings_split[rownames(regprobs_matrix_custom),i],T))[(n+1):4512],motif] return(wilcox.test(a,b, alternative = "greater")$p.value) } comps <- 1:100 names(comps) <- colnames(SDAresults$loadings_split) combined <- sapply(comps, function(x) regprob_enrich(x, "STRA8", n=500)) #plot(-log10(combined)) tmp <- data.table( component = factor(names(combined), levels=names(combined)[c(rbind(component_order_all,component_order_all+50))]), p.value = combined ) tmp$name <- rep(component_order_dt$name,2) tmp[order(component), rank := 1:100] tmp[rank >= 41, Type := "Meiotic"] tmp[rank < 41, Type := "Somatic"] tmp$OR = 1 manhatten_plot(tmp[!component %in% half_exclusion_comps], topn = 4)
For selected motifs
motif_order <- c("CpG","SP2","TAF1","GABPA","NRF1","ETV5","TYY1","NFYA","ZN143","STRA8","MYBA","RFX6","ATF1","RARA","TBP","SPI1","CREMT") vvv3 <- correlate_regprobs(regprobs = regprobs_matrix_custom[,motif_order]) vvv3 <- vvv3[ordering[-half_exclusions],] rownames(vvv3) <- paste(rownames(vvv3), component_names[-half_exclusions]) #motif_order <- c("SP2","ETV5","CpG","TAF1","ZN143","NRF1","GABPA","NFYA","ZFP42","MYBA","MLX","CREM","RFX2","TBP","STAT1","SPIB") #vvv3 <- vvv3[,motif_order] #c("Sp2","Etv5","CpG","Taf1","Zfp143","Gabpa","Nrf1","Nfya","Yy1/Zfp42","Mybl1","Mlx","Crem","Rfx2","Tbp","Stat1","Spi1","Crem-t") motif_gene_names <- c("CpG","Sp2","Taf1","Gabpa","Nrf1","Etv5","Zfp42\n (Yy1)","Nfya","Zfp143","Stra8*","Mybl1","Rfx2\n (Rfx6)","Crem\n (Atf1)","Rara","Tbp","Spi1","Crem-t") colnames(vvv3) <- motif_gene_names
Create annotations
library(ComplexHeatmap) results_custom_motifs <- sapply(results_custom, function(x) x$scoremat) hascpg <- c("CpG"=NA,sapply(results_custom_motifs, function(x) MotifFinder::submotif(exp(x)))) #meth_sens = c("SP2"="No","ETV5"="Yes","CpG"="NA","TAF1"="No","ZN143"="No","NRF1"="Yes","GABPA"="Yes","NFYA"="Yes", # "ZFP42"="Yes","MYBA"="No","MLX"="Yes","CREM"="Yes","RFX2"="No","TBP"="No","STAT1"="No","SPI1"="Yes","ATF1"="NA") hascpg["CpG"] <- max(hascpg, na.rm = T) #hascpg2["CpG"] <- min(hascpg2, na.rm = T) dt1 <- data.table("Max(P(CpG)):" = hascpg[motif_order], "Max(P(CpG))>0.3:" = hascpg[motif_order] > 0.3) #"P(CpG2):" = 1-hascpg2[motif_order], #,"Meth Sensitive?:" = meth_sens[motif_order] ha1 = HeatmapAnnotation(df = dt1, simple_anno_size = unit(1.5, "mm"), show_annotation_name = FALSE, col = list("Has CpG?:"=c("Yes"="purple","No"="grey"), "Meth Sensitive?:"=c("Yes"="dark green","No"="grey","NA"="grey28"), "Max(P(CpG))>0.3:"=c("TRUE"="purple","FALSE"="grey"), "Max(P(CpG)):"=circlize::colorRamp2(c(min(hascpg), max(hascpg)), c("white", "red"))), #colorRamp2(seq(0, 1, by = 0.25), viridisLite::viridis(5, direction = -1)) annotation_legend_param = list(legend_direction="horizontal", ncol=2, labels_gp = gpar(fontsize = 7), title_position = "leftcenter") )
plot heatmap
pdf(height=6, width=8.5, file="../results/motifs/motifs_fig_DN.pdf") draw( Heatmap(vvv3, col = log_colour_scale(vvv3), cluster_rows = FALSE, cluster_columns = FALSE, column_order = motif_gene_names, row_order = 1:nrow(vvv3), top_annotation = ha1, heatmap_legend_param = list(legend_direction = "horizontal", title = "t-value"), row_names_gp = gpar(fontsize = 4), show_heatmap_legend = T), split = rep(c(" Somatic","", " Diploid", "Haploid"), c(18,4,32,18)), row_dend_gp = gpar(fontsize = 3), annotation_legend_side = "bottom", heatmap_legend_side = "bottom") dev.off()
# generic version of above (without annotation and custom ordering) save_regprobs_heatmap <- function(regprobs_matrix = regprobs_matrix_C2N, file="../results/motifs/motifs_fig_DN_C2N.pdf", width=30){ vvv3 <- correlate_regprobs(regprobs = regprobs_matrix) vvv3 <- vvv3[ordering[-half_exclusions],] rownames(vvv3) <- paste(rownames(vvv3), component_names[-half_exclusions]) pdf(height=5, width=width, file=file) draw(Heatmap(vvv3, col = log_colour_scale(vvv3), cluster_rows = FALSE, row_order = 1:nrow(vvv3), heatmap_legend_param = list(legend_direction = "horizontal", title = "t-value"), row_names_gp = gpar(fontsize = 3), column_names_gp = gpar(fontsize = 3), show_heatmap_legend = T), split = rep(c(" Somatic","", " Diploid", "Haploid"), c(18,4,32,18)), heatmap_legend_side = "bottom") dev.off() }
# select motifs, HCMC regprobs save_regprobs_heatmap(regprobs_matrix, "../results/motifs/motifs_fig_DN_HCMCregprobs_subset.pdf", width=5) # all denovo motifs, but using hocomoco regprobs save_regprobs_heatmap(regprobs_matrix_all, "../results/motifs/motifs_fig_DN_HCMCregprobs.pdf", width=10) # All denovo motifs, denovo regprobbs save_regprobs_heatmap(regprobs_matrix_custom, "../results/motifs/motifs_fig_DN_All.pdf", width=10)
regprobs_matrix_core <- sapply(results_core, function(x) x$dt$regprob) rownames(regprobs_matrix_core) <- results_core[[1]]$dt$sequence save_regprobs_heatmap(regprobs_matrix_core, "../results/motifs/motifs_fig_HM_core.pdf", width=100) save_regprobs_heatmap(HM11_regprobs[,1:300], "../results/motifs/motifs_fig_HM11_regprobsA.pdf", width=100) save_regprobs_heatmap(HM11_regprobs[,301:600], "../results/motifs/motifs_fig_HM11_regprobsB.pdf", width=100) save_regprobs_heatmap(HM11_regprobs[,601:900], "../results/motifs/motifs_fig_HM11_regprobsC.pdf", width=100) save_regprobs_heatmap(HM11_regprobs[,901:1276], "../results/motifs/motifs_fig_HM11_regprobsD.pdf", width=100)
results_C2N <- readRDS("../data/motifs/MF_results_C2N_V3.rds") regprobs_matrix_C2N <- sapply(results_C2N, function(x) x$dt$regprob) rownames(regprobs_matrix_C2N) <- results_C2N[[1]]$dt$sequence regprobs_matrix_C2N <- regprobs_matrix_C2N[which(rownames(regprobs_matrix_C2N) %in% names(tss_seqs_s)), ] save_regprobs_heatmap(regprobs_matrix_C2N, "../results/motifs/motifs_fig_DN_C2N.pdf", width = 7) ##### results_C5N <- readRDS("../data/motifs/MF_results_C5N_V3.rds") regprobs_matrix_C5N <- sapply(results_C5N, function(x) x$dt$regprob) rownames(regprobs_matrix_C5N) <- results_C5N[[1]]$dt$sequence regprobs_matrix_C5N <- regprobs_matrix_C5N[which(rownames(regprobs_matrix_C5N) %in% names(tss_seqs_s)), ] save_regprobs_heatmap(regprobs_matrix_C5N, "../results/motifs/motifs_fig_DN_C5N.pdf", width = 7) ###### results_C11P <- readRDS("../data/motifs/MF_results_C11P_V3.rds") regprobs_matrix_C11P <- sapply(results_C11P, function(x) x$dt$regprob) rownames(regprobs_matrix_C11P) <- results_C11P[[1]]$dt$sequence regprobs_matrix_C11P <- regprobs_matrix_C11P[which(rownames(regprobs_matrix_C11P) %in% names(tss_seqs_s)), ] save_regprobs_heatmap(regprobs_matrix_C11P, "../results/motifs/motifs_fig_DN_C11P.pdf", width = 7) ###### results_C3P <- readRDS("../data/motifs/MF_results_C3P_V3.rds") regprobs_matrix_C3P <- sapply(results_C3P, function(x) x$dt$regprob) rownames(regprobs_matrix_C3P) <- results_C3P[[1]]$dt$sequence regprobs_matrix_C3P <- regprobs_matrix_C3P[which(rownames(regprobs_matrix_C3P) %in% names(tss_seqs_s)), ] save_regprobs_heatmap(regprobs_matrix_C3P, "../results/motifs/motifs_fig_DN_C3P.pdf", width = 7) ###### results_novel_A <- readRDS("../data/motifs/MF_results_novel_A.rds") regprobs_matrix_novel_A <- sapply(results_novel_A, function(x) x$dt$regprob) rownames(regprobs_matrix_novel_A) <- results_novel_A[[1]]$dt$sequence save_regprobs_heatmap(regprobs_matrix_novel_A, "../results/motifs/motifs_fig_DN_novelA.pdf", width=15) results_novel_B <- readRDS("../data/motifs/MF_results_novel_B.rds") regprobs_matrix_novel_B <- sapply(results_novel_B, function(x) x$dt$regprob) rownames(regprobs_matrix_novel_B) <- results_novel_B[[1]]$dt$sequence save_regprobs_heatmap(regprobs_matrix_novel_B, "../results/motifs/motifs_fig_DN_novelB.pdf", width=15)
whichpos_matrix_custom <- sapply(results_custom, function(x) x$dt[order(-regprob)][1:3000]$whichpos) whichpos_dt <- melt(data.table(t(whichpos_matrix_custom[,motif_order[c(-1)]]), keep.rownames = T), id.vars = "rn") whichpos_dt[,rn := factor(rn, levels=motif_order[c(-1)])] ggplot(whichpos_dt, aes(value-500)) + geom_density() + facet_wrap(~rn, scales="free_y") + geom_vline(xintercept = 0, col='red') + xlab("Distance from TSS") + ylab("Relative Density")
common_genes <- rownames(regprobs_matrix_custom)[rownames(regprobs_matrix_custom) %in% colnames(data)] tmp <- data[,common_genes] * matrix(rep(t(regprobs_matrix_custom[common_genes,"CREMT",drop=F]),nrow(data)),nrow=nrow(data),byrow = T) tmp2 <- Matrix::rowMeans(tmp) ggplot(data.table(Stra8=tmp2, cell=names(tmp2))[cell_data,on="cell"], aes(Tsne1_QC1, Tsne2_QC1, colour=Stra8)) + geom_point(stroke=0, size=1) + scale_colour_viridis(direction = -1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.