knitr::opts_chunk$set( collapse = TRUE, warning = FALSE, echo = TRUE, cache = TRUE, fig.width = 14, fig.width = 14, comment = "#>" )
library(scChIX) library(dplyr) library(ggplot2) library(data.table) library(hash)
In this notebook, we will infer relationships between two correlated histone marks, H3K4me1 and H3K36me3, from double-incubated cells.
First, we load our training data.
# These objects are too large for github, so download them from seafile.ist.ac.at: # wget https://seafile.ist.ac.at/f/4c9902f7e77e4e78b02e/?dl=1 -O LowessFits_H3K4me1.RData # wget https://seafile.ist.ac.at/f/c1bdec80ab8943d790f3/?dl=1 -O LowessFits_H3K36me3.RData # load(LowessFits_H3K4me1, v=T) # Loading objects: # lowess.fits.k4me1 # load(LowessFits_H3K36me3, v=T) # Loading objects: # lowess.fits.k36me3
Next we load our count matrix. To improve scChIX inference, we sum counts across 25 nearest neighbors.
data(MacDiffDblMatNearestNeighbors) # Loading objects: # umap.out data(MacDiffDblMatForSplitting_H3K4me1xH3K36me3) # Loading objects: # mat.dbl.H3K4me1xH3K36me3 mat.dbl <- mat.dbl.H3K4me1xH3K36me3; rm(mat.dbl.H3K4me1xH3K36me3) nn <- 25 dbl.cells <- rownames(umap.out$knn$distances); names(dbl.cells) <- dbl.cells nearest.cells.lst <- lapply(dbl.cells, function(jcell){ indx.vec <- umap.out$knn$indexes[jcell, 1:nn] nearest.cells <- rownames(umap.out$knn$distances)[indx.vec] }) mat.dbl.knn <- lapply(dbl.cells, function(jcell){ jcells.keep <- nearest.cells.lst[[jcell]] rowSums(mat.dbl[, jcells.keep]) }) mat.dbl <- do.call(cbind, mat.dbl.knn)
Our training data takes a pseudotime value (number between 0 and 1) and outputs the predicted gene expression for every gene. We can therefore use this training data to infer the pseudotime of H3K4me1 ahd H3K36me3 for each double-incubated cell.
To speed things up, we will run scChIX on just a subset of cells.
set.seed(0) ncores <- 16 subset.fraction <- 1 w <- 0.77 jcells <- colnames(mat.dbl); names(jcells) <- jcells print(length(jcells)) jcells.subset <- sample(jcells, size = floor(subset.fraction * length(jcells)), replace = FALSE) print(length(jcells.subset)) print("Running fits...") # system.time( # dat.fits.raw <- parallel::mclapply(jcells.subset, function(jcell){ # cell.count.raw.merged <- mat.dbl[, jcell] # jfit.fixedw <- FitMixingWeightPtimeLinear.fixedw(cell.count.raw.merged = cell.count.raw.merged, jfits.lowess1 = lowess.fits.k4me1, jfits.lowess2 = lowess.fits.k36me3, w.fixed = w, p1.init = 0.5, p1.lower = 0.01, p1.upper = 0.99, p2.init = 0.5, p2.lower = 0.01, p2.upper = 0.99, jmethod = "L-BFGS-B", jhessian = TRUE) # }, mc.cores = ncores) # ) # print("Running fits... done") # or you can just load the fit outputs data(scChIXOutputs_H3K4me1xH3K36me3) # dat.fits.raw
We extract the inferred pseudotime and its standard errors from each cell.
dat.fits.clean.lst <- lapply(jcells.subset, function(jcell){ jdat <- data.frame(cell = jcell, ptime1 = dat.fits.raw[[jcell]]$par[[1]], ptime2 = dat.fits.raw[[jcell]]$par[[2]], stringsAsFactors = FALSE) # get se mathessian <- dat.fits.raw[[jcell]]$hessian inversemathessian <- solve(mathessian) res <- sqrt(diag(inversemathessian)) jdat$ptime1.se <- res[[1]] jdat$ptime2.se <- res[[2]] return(jdat) }) dat.fits.clean <- dat.fits.clean.lst %>% bind_rows() %>% rowwise() %>% mutate(daystr = paste0("day_", scChIX::GetDayFromCellByRow(cell)))
We plot the outputs to verify that the double-incubated cells show a pseudotime progression from 0 to 1. We can color each double-incubated cell by the day from which they were calculated during the 7-day time course to verify that later pseudotimes correspond to later days in the time course.
For each cell, we plot the maximum likelihood pair of pseudotimes for H3K4me1 (x-axis) and H3K36me3 (y-axis) as well as the 99% confidence intervals.
jfactor <- 2.6 m <- ggplot(dat.fits.clean, aes(x = ptime1, y = ptime2, xmin = ptime1 - ptime1.se * jfactor, xmax = ptime1 + ptime1.se * jfactor, ymin = ptime2 - ptime2.se * jfactor, ymax = ptime2 + ptime2.se * jfactor, color = daystr)) + geom_point(alpha = 1) + geom_errorbar(alpha = 0.25) + geom_errorbarh(alpha = 0.25) + theme_bw() + scale_color_viridis_d() + geom_abline(slope = 1, linetype = "dotted") + xlab("Pseudotime H3K4me1") + ylab("Pseudotime H3K36me3") + theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "bottom") print(m)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.