knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, cache = TRUE, fig.width = 14, fig.width = 14, comment = "#>" )
library(scChIX) library(dplyr) library(ggplot2) library(data.table) library(hash) library(ggforce)
First, we load the count matrices that are used for running the scChIX snakemake workflow.
# download "CountMatsGastrulationInputs.RData" from external URL (or GEO) because github doesn't allow files >100MB. # The object can be downloaded from seafile.ist.ac.at: # wget https://seafile.ist.ac.at/f/915d8fde09d14c659ed3/?dl=1 -O CountMatsGastrulationInputs.RData # load("CountMatsGastrulationInputs.RData") # Loading objects: # countmats.gastru
Save these count matrices as .rds
files into a snakemake_inputs/countmats
directory where the Snakefile
, config.yaml
, and cluster.json
files are.
# copy snakemake workflow files into directory for outputs outdir <- "/tmp" copycmd=paste0("cp snakemake_workflow/Snakefile snakemake_workflow/cluster.json snakemake_workflow/config.yaml snakemake_workflow/run_snakemake.gastru_K9.sh", outdir) # system(cpycmd) # run this to move files # save matrices as .rds to snakemake_inputs mkdircmd=paste0("mkdir -p ", outdir, "/snakemake_inputs/countmats") # system(mkdircmd) # make directory before writing mats # run chunk to save countmats as rds into snakemake input directory # use countmat_var_filt.${mark}.rds as name to match expected filename in Snakemake workflow # jmarks <- names(countmats.gastru); names(jmarks) # for (jmark in jmarks){ # saveRDS(countmats.gastru[[jmark]], file.path(outdir, paste0("countmat_var_filt.", jmark, ".rds"))) # } print(outdir)
bashscript=file.path(outdir, paste0("run_snakemake.gastru_K9.sh")) # modify for your specific conda environment and HPC cluster settings runcmd=paste0("bash ", bashscript) # system(runcmd) # launch snakemake workflow
Let's load the outputs of the snakemake workflow and look at the cell type and heterochromatin relationships.
data(GastrulationScChIXOutputsK36K9m3) # Loading objects: # act.repress.coord.lst fits.out <- act.repress.coord.lst data(GastrulationLouvainCelltypeAnnotations) # Loading objects: # louvain.celltype.metadata # ctype.colcode.metadata louv2ctype.act <- hash::hash(louvain.celltype.metadata$K36$louv.act, louvain.celltype.metadata$K36$cluster) louv2ctype.repress <- hash::hash(louvain.celltype.metadata$K9m3$louv.repress, louvain.celltype.metadata$K9m3$cluster) ctype2colcode <- hash::hash(ctype.colcode.metadata$cluster, ctype.colcode.metadata$colorcode)
We wrangle the fits and then plot the celltype to heterochromatin relationship
# if louvains are now from clusters need eto rethink jcoord cell.vec <- names(fits.out) names(cell.vec) <- cell.vec coords.dbl <- lapply(cell.vec, function(jcell){ jfit <- fits.out[[jcell]] jweight <- fits.out[[jcell]]$w p.mat <- SoftMax(jfit$ll.mat) jcoord <- which(jfit$ll.mat == max(jfit$ll.mat), arr.ind = TRUE) jmax <- max(p.mat) jlouv.act <- rownames(p.mat)[[jcoord[[1]]]] jlouv.repress <- colnames(p.mat)[[jcoord[[2]]]] jcluster.act <- louv2ctype.act[[jlouv.act]] jcluster.repress <- louv2ctype.repress[[jlouv.repress]] colcode <- ctype2colcode[[jcluster.act]] if (grepl("_", jlouv.act)){ jlouv.act <- strsplit(jlouv.act, split = "_")[[1]][[2]] } if (grepl("_", jlouv.repress)){ jlouv.repress <- strsplit(jlouv.repress, split = "_")[[1]][[2]] } out.dat <- data.frame(cell = jcell, celltype.act = jcluster.act, celltype.repress = jcluster.repress, colcode = colcode, lnprob = jmax, w = jweight, stringsAsFactors = FALSE) return(out.dat) }) %>% bind_rows() clstrs.order.active <- c("Erythroid", "WhiteBloodCells", "Endothelial", "NeuralTubeNeuralProgs", "Neurons", "SchwannCellPrecursor", "Epithelial", "MesenchymalProgs", "Cardiomyocytes") clstrs.order.repress <- c("Erythroid", "WhiteBloodCells", "NonBlood") coords.dbl$celltype.act <- factor(coords.dbl$celltype.act, levels = clstrs.order.active) coords.dbl$celltype.repress <- factor(coords.dbl$celltype.repress, levels = clstrs.order.repress) m.grid <- ggplot(coords.dbl, aes(x = celltype.act, y = celltype.repress, color = colcode)) + geom_point(alpha = 0.25, position = ggforce::position_jitternormal(sd_x = 0.08, sd_y = 0.08)) + theme_bw() + scale_color_identity() + theme(aspect.ratio=0.6, axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + ggtitle("Each dot is a double stained cell,\nX-Y shows the cluster pair it is assigned") print(m.grid)
The scChIX output also reveals global ratios of H3K36me3 and H3K9me3 and how they are lower in erythroids versus other cell types.
m.ratios <- ggplot(coords.dbl, aes(x = celltype.act, y = log2(w / (1 - w)), fill = colcode)) + geom_boxplot() + theme_bw() + scale_fill_identity() + ylab("log2 H3K36me3 to H3K9me3 ratio") + theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + ggtitle("log2 H3K36me3 to H3K9me3 ratio inferred from double-incubated cells") print(m.ratios)
This can be a UMAP.
m.ratios <- ggplot(coords.dbl, aes(x = celltype.act, y = log2(w / (1 - w)), fill = colcode)) + geom_boxplot() + theme_bw() + scale_fill_identity() + ylab("log2 H3K36me3 to H3K9me3 ratio") + theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) + ggtitle("log2 H3K36me3 to H3K9me3 ratio inferred from double-incubated cells") print(m.ratios)
inf.meta.annot <- "/nfs/scistore12/hpcgrp/jyeung/data_from_Hubrecht/hpc_hub_oudenaarden/scChIX_source_data/Figure_04_scChIX_organo_H3K36me3_and_H3K9me3_metadata_final.txt.gz" dat.meta.annot <- fread(inf.meta.annot) cell2ctype <- hash::hash(dat.meta.annot$cell, dat.meta.annot$cluster) cell2col <- hash::hash(dat.meta.annot$cell, dat.meta.annot$clustercol) # inf.meta <- "/nfs/scistore12/hpcgrp/jyeung/data_from_Hubrecht/hpc_hub_oudenaarden/scChIX_gastrulation/from_analysis_macbook/objs_from_macbook/metadata_flipped.rds" inf.meta <- "/nfs/scistore12/hpcgrp/jyeung/data_from_Hubrecht/hpc_hub_oudenaarden/scChIX_gastrulation/from_cluster/metadata/metadata_cleaned.2021-10-04.rds" dat.meta <- readRDS(inf.meta) %>% rowwise() %>% mutate(clusterold = cluster, cluster = cell2ctype[[cell]], clustercol = cell2col[[cell]]) ggplot(dat.meta, aes(x = umap1, y = umap2, color = clustercol, group = cell)) + geom_line(alpha = 0.1) + geom_point() + theme_bw() + scale_color_identity() + theme(aspect.ratio=0.5, panel.grid.major = element_blank(), panel.grid.minor = element_blank()) dat.meta.ratios <- left_join(coords.dbl, dat.meta) m.umap <- ggplot(dat.meta.ratios, aes(x = umap1, y = umap2, color = log2(w / (1 - w)), group = cell)) + geom_line(alpha = 0.1) + geom_point() + theme_bw() + scale_color_viridis_c() + theme(aspect.ratio=0.4, panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "bottom") # dat.meta$plate <- sapply(dat.meta$cell, function(x) scchicFuncs::ClipLast(x = x, jsep = "_")) # dat.meta$cond <- interaction(dat.meta$type, dat.meta$mark, sep = "_") # dat.meta.split <- split(dat.meta, f = dat.meta$cond) # # dat.plates <- lapply(dat.meta.split, function(dat){ # unique(dat$plate) # }) # nplates <- sapply(dat.plates, function(jdat) length(jdat)) # # # inf.meta.mac <- "/nfs/scistore12/hpcgrp/jyeung/data_from_Hubrecht/hpc_hub_oudenaarden/scChIX_differentiation/for_databank/processed_data/scchix_macrophage_differentiation_dat_meta_merged_H3K4me1_and_H3K36me3.txt.gz" # dat.meta.mac <- fread(inf.meta.mac) # dat.meta.mac$plate <- sapply(dat.meta.mac$cell, function(x) scchicFuncs::ClipLast(x = x, jsep = "_")) # dat.meta.mac$cond <- interaction(dat.meta.mac$experi, dat.meta.mac$mark, sep = "_") # dat.meta.mac.split <- split(dat.meta.mac, f = dat.meta.mac$cond) # # dat.plates.mac <- lapply(dat.meta.mac.split, function(dat){ # unique(dat$plate) # }) # nplates <- sapply(dat.plates.mac, function(jdat) length(jdat)) # save outputs outpdf <- paste0("/nfs/scistore12/hpcgrp/jyeung/projects/scChIX/plots/organogenesis_umaps.", Sys.Date(), ".pdf") pdf(outpdf, useDingbats = FALSE) print(m.ratios) print(m.umap) dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.