# Jake Yeung
# Date of Creation: 2021-07-12
# File: ~/projects/scChIX/analysis_scripts/1-explore_raw_count_data_CountsAndTAFracOnly.R
#
rm(list=ls())
library(dplyr)
library(tidyr)
library(ggplot2)
library(data.table)
library(Matrix)
library(scchicFuncs)
outdir <- file.path(hubprefix, "jyeung/data/dblchic/gastrulation/from_analysis/filtered_count_tables_CountsAndTAFracOnly_RemoveBadDblPlate")
bad.plate.dbl <- "E9p5-CB6-K36-K9m3-190409-2"
# Load raw counts --------------------------------------------------------
hubprefix <- "/home/jyeung/hub_oudenaarden"
jmarks <- c("K36", "K9m3", "K36-K9m3")
names(jmarks) <- jmarks
jmark1 <- jmarks[[1]]
jmark2 <- jmarks[[2]]
jmarkdbl <- jmarks[[3]]
jsuffix <- "50000"
# load mine
infs.lst <- lapply(jmarks, function(jmark) file.path(hubprefix, paste0("jyeung/data/dblchic/gastrulation/count_tables/counts_tables_", jsuffix, "/", jmark, "/cbind_out/filtered_counts/count_mat.", jmark, ".countcutoffmin_1000.TAcutoff_0.5.rds")))
lapply(infs.lst, function(x) file.exists(x))
mats.lst <- lapply(infs.lst, function(inf) readRDS(inf))
print(lapply(mats.lst, dim))
# Get common rows --------------------------------------------------------
rows.lst <- lapply(mats.lst, function(jmat) rownames(jmat))
rows.common <- Reduce(intersect, x = rows.lst)
mats.lst.common <- lapply(mats.lst, function(jmat) jmat[rows.common, ])
mats.cbind.common <- do.call(cbind, mats.lst.common)
# Plot LSI all three together --------------------------------------------
library(JFuncs)
library(irlba)
library(igraph)
library(umap)
library(hash)
jcheck <- scchicFuncs::RunLSI(count.mat = as.matrix(mats.cbind.common), n.components = 50)
jcheck2 <- scchicFuncs::RunLSI(count.mat = as.matrix(mats.lst$K36), n.components = 50)
lapply(jcheck, dim)
lapply(jcheck2, dim)
jsettings <- umap.defaults
jsettings$n_neighbors <- 30
jsettings$min_dist <- 0.1
jsettings$random_state <- 123
dat.umap <- DoUmapAndLouvain(jcheck$u, jsettings = jsettings) %>%
rowwise() %>%
mutate(plate = ClipLast(cell, jsep = "_"),
experi = ClipLast(plate, jsep = "-"))
dat.umap2 <- DoUmapAndLouvain(jcheck2$u, jsettings = jsettings)
ggplot(dat.umap, aes(x = umap1, y = umap2, color = louvain)) +
geom_point() +
theme_bw() +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplot(dat.umap2, aes(x = umap1, y = umap2, color = louvain)) +
geom_point() +
theme_bw() +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# Remove bad cells -------------------------------------------------------
dat.umap$mark <- sapply(dat.umap$cell, function(x){
if (grepl(jmarkdbl, x)){
jmarktmp <- jmarkdbl
} else if (grepl(jmark1, x)){
jmarktmp <- jmark1
} else if (grepl(jmark2, x)){
jmarktmp <- jmark2
}
return(jmarktmp)
})
dat.umap <- dat.umap %>%
rowwise() %>%
mutate(stage = strsplit(cell, split = "-")[[1]][[1]])
ggplot(dat.umap, aes(x = umap1, y = umap2, color = mark)) +
geom_point(alpha = 0.25) + theme_bw() +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplot(dat.umap, aes(x = umap1, y = umap2, color = mark)) +
geom_point() + theme_bw() + facet_wrap(~mark) +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplot(dat.umap, aes(x = umap1, y = umap2, color = louvain)) +
geom_point(alpha = 0.25) + theme_bw() +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplot(dat.umap %>% filter(mark == "K9m3"), aes(x = umap1, y = umap2, color = louvain)) +
geom_point(alpha = 0.25) + theme_bw() +
facet_wrap(~experi) +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplot(dat.umap %>% filter(mark == "K36-K9m3"), aes(x = umap1, y = umap2, color = louvain)) +
geom_point(alpha = 0.25) + theme_bw() +
facet_wrap(~plate) +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplot(dat.umap %>% filter(louvain != "3"), aes(x = umap1, y = umap2, color = mark)) +
geom_point(alpha = 0.25) + theme_bw() +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggplot(dat.umap %>% filter(louvain != "3"), aes(x = umap1, y = umap2, color = mark)) +
geom_point(alpha = 0.25) + theme_bw() + facet_wrap(~mark) +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# Remove K9m3s that cluster with K36 -------------------------------------
umap1min <- 0
umap2max <- 1
dat.umap.midfilt <- subset(dat.umap, louvain != "3")
dat.umap.midfilt2 <- dat.umap.midfilt %>% filter( ! ( grepl("K9m3", mark) & umap1 > umap1min & umap2 < umap2max) )
ggplot(dat.umap.midfilt2, aes(x = umap1, y = umap2, color = mark)) +
geom_point(alpha = 0.25) + theme_bw() + facet_wrap(~mark) +
coord_cartesian(xlim = c(-7, 7), ylim = c(-10, 10)) +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# check k36me3 still OK ?
ggplot(dat.umap2 %>% filter(cell %in% dat.umap.midfilt2$cell), aes(x = umap1, y = umap2)) +
geom_point(alpha = 0.25) + theme_bw() +
theme(aspect.ratio=1, panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# Write to output --------------------------------------------------------
for (jmark in jmarks){
print(jmark)
outrds <- file.path(outdir, paste0("count_tables.", jsuffix, ".", jmark, ".", Sys.Date(), ".rds"))
jsub <- subset(dat.umap.midfilt2, mark == jmark)
cells.keep <- jsub$cell
jmat.sub <- mats.lst.common[[jmark]][rows.common, cells.keep]
print(dim(jmat.sub))
saveRDS(jmat.sub, file = outrds)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.