In FrietzeLabUVM/ssvQC: QC for enrichment based NGS

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, results='hide')

Focus on Ikaros

Assess in detail the differences between DF4 and WT Ikaros binding.

Incorporate ATAC-seq data to verify and IGG cut&run to weed out artifacts.

options(mc.cores = 10)
library(seqqc)
library(seqsetvis)
library(BiocFileCache)
bfc = BiocFileCache()
bfcif = ssvRecipes::bfcif
wd = "/slipstream/home/joeboyd/R/seqqc/PR_selected/cutnrun"
wd
bam_dirs = wd
peak_dirs = file.path(wd, "peaks_deseq2")

stopifnot(all(dir.exists(wd)))
stopifnot(all(dir.exists(bam_dirs)))
stopifnot(all(dir.exists(peak_dirs)))

peak_files = dir(peak_dirs, 
                 pattern = "narrowPeak$", 
                 full.names = TRUE)
peak_files = peak_files[!duplicated(basename(peak_files))]

bam_files = dir(bam_dirs, pattern = "bam$", 
                full.names = TRUE)
bam_files = bam_files[!duplicated(basename(bam_files))]

fix_names = function(in_f){
  out_name = basename(in_f)
  out_name = toupper(out_name)

  out_name = sapply(strsplit(out_name, "[_\\.]"), function(x)paste(x[1:3], collapse = "_"))
  out_name = sub("MERGE$", "MERGED", out_name)

  out_name[out_name == "DF4_H3K9AC_REP1"] = c("DF4_H3K9AC_REP1", "DF4_H3K9AC_REP2")


  toupper(out_name)
}

names(bam_files) = fix_names(bam_files)
names(peak_files) = fix_names(peak_files)

if(any(duplicated(names(bam_files)))){
  stop()
  names(bam_files)[duplicated(names(bam_files))]  
}
if(any(duplicated(names(peak_files)))){
  stop()
  names(peak_files)[duplicated(names(peak_files))]  
}

filter_files = function(f){
  f #no filtering in this case
}
bam_files.f = filter_files(bam_files)
peak_files.f = filter_files(peak_files)

#using rep as treatment here becuase that looks more interesting
my_make_config_dt = function(files){
  config_dt = data.table(file = files, name = names(files))
  config_dt[, c("cell", "mark", "rep") := tstrsplit(name, "_", keep = 1:3)]

  config_dt[, treatment := mark]
  config_dt = config_dt[order(rep)][order(treatment)][order(cell)]

  config_dt$name = factor(config_dt$name, levels = config_dt$name)
  config_dt$name_split = factor(gsub("_", "\n", config_dt$name), levels = gsub("_", "\n", config_dt$name))
  config_dt
}
peak_config_dt = my_make_config_dt(peak_files.f)
bam_config_dt = my_make_config_dt(bam_files.f)

stopifnot(file.exists(peak_config_dt$file))
stopifnot(file.exists(bam_config_dt$file))

Read depth is comparable

get_mapped_reads = function(f){
  stats = Rsamtools::idxstatsBam(f)
  stats = subset(stats, grepl("chr[0-9XY]+$", seqnames ))
  sum(stats[,3])
}
bam_config_dt[, cell_simple := sub("X", "", cell)]

cell_colors = c("DF4" = "firebrick1", "WT" = "gray20")

bam_config_dt[, mapped_reads := get_mapped_reads(file), .(file)]
ggplot(bam_config_dt[mark %in% c("IKAROS")], aes(x = name, y = mapped_reads, fill = cell_simple)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = cell_colors) +
  scale_y_continuous(labels = function(x)x/1e6) +
  labs(y = "M mapped reads", fill = "Genotype") +
  theme(panel.background = element_blank())

ggplot(bam_config_dt[mark %in% c("IGG")], aes(x = name, y = mapped_reads, fill = cell_simple)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = cell_colors) +
  scale_y_continuous(labels = function(x)x/1e6) +
  labs(y = "M mapped reads", fill = "Genotype") +
  theme(panel.background = element_blank())

bam_igg_config_dt = bam_config_dt[mark == "IGG"]
peak_config_dt = peak_config_dt[mark == "IKAROS"]
bam_config_dt = bam_config_dt[mark == "IKAROS"]

m = "IKAROS"
message(m)
main_title = paste("Overlap of", m, "peaks")
olap_title = paste(m, "peaks by frequency")
i_peak_config_dt = peak_config_dt[mark == m]
i_bam_config_dt = bam_config_dt[mark == m]

cell_splits = split(i_peak_config_dt, ifelse(grepl("WT", i_peak_config_dt$cell), "WT", "DF4"))
WT_cols = safeBrew(length(cell_splits$WT$name)+2, "Greys")[seq(3, length(cell_splits$WT$name)+2)]
names(WT_cols) = cell_splits$WT$name
DF_cols = safeBrew(length(cell_splits$DF4$name)+2, "Reds")[seq(3, length(cell_splits$DF4$name)+2)]
names(DF_cols) = cell_splits$DF4$name
name_cols = c(WT_cols, DF_cols)

i_peak_config_dt$name = factor(i_peak_config_dt$name, levels = names(name_cols))
i_bam_config_dt$name = factor(i_bam_config_dt$name, levels = names(name_cols))

peak_grs = easyLoad_narrowPeak(i_peak_config_dt$file, file_names = i_peak_config_dt$name)
peak_grs = lapply(peak_grs, function(x)x[order(x$pValue, decreasing = TRUE)])
peak_grs = lapply(peak_grs, function(x){x[nchar(as.character(seqnames(x))) <= 5]})
olaps_gr = ssvOverlapIntervalSets(peak_grs)

WT rep1 has ~30k unique peaks. WT has many more peaks generally that are not called in DF4. 600+ peaks called in all 4.

p_freq_upset = ssvFeatureUpset(olaps_gr) + labs(title = olap_title)
p_freq_upset

cons2_gr = ssvConsensusIntervalSets(peak_grs, min_number = 2, min_fraction = 0)
p_cons2_upset = ssvFeatureUpset(cons2_gr) + labs(title = paste(m, "consensus in any 2"))
p_cons2_upset

I want to cast a wide net so I'll carry forward peaks called in any of the 4 samples.

qgr = sample(resize(olaps_gr, 3e3, fix = "center"), length(olaps_gr))
options(mc.cores = 30)

prof_dt = bfcif(bfc, digest::digest(list(i_bam_config_dt, qgr, "signal")), function(){
  ssvFetchBamPE(i_bam_config_dt, qgr, n_region_splits = 50, return_data.table = TRUE)
})

prof_dt.igg = bfcif(bfc, digest::digest(list(bam_igg_config_dt, qgr, "igg")), function(){
  ssvFetchBamPE(bam_igg_config_dt, qgr, n_region_splits = 50, return_data.table = TRUE)
})

set.seed(0)
norm_dt = append_ynorm(prof_dt, aggFUN2 = function(x)quantile(x, .99))

sel_id =names(subset(qgr, DF4_IKAROS_REP1 | DF4_IKAROS_REP2))

Keeping in mind that read death is very similar across these 4 samples. We have 1 very strong WT rep.

clust_dt = ssvSignalClustering(norm_dt[!id %in% sel_id], fill_ = "y", max_rows = 2e3)
ssvSignalHeatmap(clust_dt, facet_ = "name", fill_ = "y", max_rows = Inf, fill_limits = c(0, 80)) +
  labs(title = "no normalization", subtitle = "no DF4, only WT peaks")

clust_dt = ssvSignalClustering(norm_dt[id %in% sel_id], fill_ = "y", max_rows = 2e3)
ssvSignalHeatmap(clust_dt, facet_ = "name", fill_ = "y", max_rows = Inf, fill_limits = c(0, 80)) +
  labs(title = "no normalization", subtitle = "DF4 peaks, can be WT")

If we apply normalization, DF4 looks basically like WT.

clust_dt = ssvSignalClustering(norm_dt[!id %in% sel_id], fill_ = "y_norm", max_rows = 2e3)
ssvSignalHeatmap(clust_dt, facet_ = "name", fill_ = "y_norm", max_rows = Inf)+
  labs(title = "normalized", subtitle = "no DF4, only WT peaks")

agg_dt = clust_dt[, .(y = mean(y), y_norm = mean(y_norm)), .(name, x, cluster_id, cell_simple)]
ggplot(agg_dt, aes(x = x, y = y_norm, color = cell_simple)) +
  geom_path() +
  facet_grid(cluster_id~name) +
  scale_color_manual(values = cell_colors)+
  labs(title = "normalized", subtitle = "no DF4, only WT peaks")

In broad strokes, all 4 samples seem to have the same profile of enrichment, just at varying strengths.

clust_dt = ssvSignalClustering(norm_dt[id %in% sel_id], fill_ = "y_norm", max_rows = 2000)
ssvSignalHeatmap(clust_dt, facet_ = "name", fill_ = "y_norm", max_rows = Inf)

Let's investigate this idea of same profiles at varying strengths further.

First, let's compare the WT replicates by plotting the maximum pileup at each peak region in a scatterplot, rep2 vs rep1.

my_scatter = function(x_name, y_name, trim = .002, plot_linear = TRUE){
  xy_dt = ssvSignalScatterplot(prof_dt, x_name = x_name, y_name = y_name, xy_variable = "name", return_data = TRUE)

  xy_dt[, trimmed := xval < quantile(xval, trim) | xval > quantile(xval, 1-trim) | yval < quantile(yval, trim) | yval > quantile(yval, 1-trim)]

  xy_dt[, lg_xval := log2(xval + 1)]
  xy_dt[, lg_yval := log2(yval + 1)]

  lm_coef = coef(lm(yval~0+xval, xy_dt[trimmed == FALSE]))

  xs = c(1, max(xy_dt[trimmed == FALSE]$xval))
  if(length(lm_coef) == 1){
    ys = xs*lm_coef[1]
    slope = lm_coef[1]
  }else{
    ys = lm_coef[1] + lm_coef[2]*xs  
    slope = lm_coef[2]
  }

  lg_xs = log2(xs)
  lg_ys = log2(ys)

  p_wt_2_vs_1.linear = ggplot(xy_dt[trimmed == FALSE], aes(x = xval, y = yval)) +
    geom_point(alpha = .2, shape = 19) +
    labs(x = x_name, y = y_name) +
    annotate("line", x = xs, y = ys, color = "blue")+
    labs(subtitle = paste("slope is", round(slope, 3))) +
    theme(panel.background = element_blank())

  p_wt_2_vs_1 = ggplot(xy_dt, aes(x = lg_xval, y = lg_yval, color = trimmed)) +
    geom_point(shape = 19) +
    labs(x = x_name, y = y_name) +
    scale_y_continuous(labels = function(x)2^round(x)) +
    scale_x_continuous(labels = function(x)2^round(x))+
    annotate("line", x = lg_xs, y = lg_ys, color = "blue")+
    labs(subtitle = paste("slope is", round(slope, 3))) +
    scale_color_manual(values = c("TRUE" = "gray80", "FALSE" = "black"))+
    theme(panel.background = element_blank())
  # geom_smooth(method = "lm", se = FALSE)

  if(plot_linear){
    p_wt_2_vs_1.linear  
  }else{
    p_wt_2_vs_1
  }

}

my_scatter(
  x_name = "WT_IKAROS_REP1",
  y_name = "WT_IKAROS_REP2"
)

my_scatter(
  x_name = "WT_IKAROS_REP1",
  y_name = "DF4_IKAROS_REP1"
)

my_scatter(
  x_name = "WT_IKAROS_REP1",
  y_name = "DF4_IKAROS_REP2"
)

Same plots but in log scale

my_scatter(
  x_name = "WT_IKAROS_REP1",
  y_name = "WT_IKAROS_REP2", 
  plot_linear = FALSE
)

my_scatter(
  x_name = "WT_IKAROS_REP1",
  y_name = "DF4_IKAROS_REP1", 
  plot_linear = FALSE
)

my_scatter(
  x_name = "WT_IKAROS_REP1",
  y_name = "DF4_IKAROS_REP2", 
  plot_linear = FALSE
)

My interpretation of slope here is a that it represents the relative efficiency of these samples. The lower relative efficiency in the DF4 samples probably has a biological basis but there is a high degree of technical variation too.

avg_dt = prof_dt[, .(y = mean(y)), .(cell, mark, id, x)]

df_1 = data.frame(id = names(qgr), group = ifelse(qgr$DF4_IKAROS_REP1, "DF4_IKAROS_REP1", "-"))
df_2 = data.frame(id = names(qgr), group = ifelse(qgr$DF4_IKAROS_REP2, "DF4_IKAROS_REP2", "-"))
df_3 = data.frame(id = names(qgr), group = ifelse(qgr$WT_IKAROS_REP1, "WT_IKAROS_REP1", "-"))
df_4 = data.frame(id = names(qgr), group = ifelse(qgr$WT_IKAROS_REP2, "WT_IKAROS_REP2", "-"))

df_df4 = data.frame(id = names(qgr), group = ifelse(names(qgr) %in% sel_id, "DF4 peaks", "-"))

df_all = ssvFactorizeMembTable(qgr)

p_scatter = ssvSignalScatterplot(avg_dt, x_name = "WT", y_name = "DF4", xy_variable = "cell", fixed_coords = FALSE, color_table = df_df4) +
  labs(title = "Average DF4 vs WT max pileup")
p_scatter + #annotate("line", x = c(0, 3e3), y = c(0, 3e3), linetype = 2)
  coord_cartesian(xlim = c(0, 2e3), ylim = c(0, 2e3)) +
  labs(subtitle = "showing outliers")
p_scatter +
  coord_cartesian(xlim = c(0,150), ylim = c(0, 37.5)) +
  labs(subtitle = "zoom to main area")

Let's look at that group of peaks that are basically 0 in WT

xy_dt = ssvSignalScatterplot(avg_dt, x_name = "WT", y_name = "DF4", xy_variable = "cell", fixed_coords = FALSE, color_table = df_df4, return_data = TRUE)

low_wt_ids = xy_dt[xval < 3]$id


ggplot(xy_dt, aes(x = xval, y = yval, color = id %in% low_wt_ids)) + facet_wrap(~group) +
  geom_point()+
  coord_cartesian(xlim = c(0,150), ylim = c(0, 37.5))

ssvSignalHeatmap(rbind(prof_dt, prof_dt.igg)[id %in% low_wt_ids], facet_ = "name_split")

Signal only in 1 rep for DF4 Ikaros that matches IGG in WT (but not DF4 oddly. oh it's because peaks were called vs DF4 IGG, duh).

My verdict : artifacts

Next let's look at the really high pileup peaks. Remember, these were ignored in the slope analysis.

high_ids = xy_dt[xval > 350 | yval > 150]$id
ggplot(xy_dt, aes(x = xval, y = yval, color = id %in% high_ids)) + facet_wrap(~group) +
  geom_point() +
  labs(y = "avg DF4 pileup", x = "avg WT pileup")
ssvSignalHeatmap(rbind(prof_dt, prof_dt.igg)[id %in% high_ids], facet_ = "name_split", fill_limits = c(0, 2000))

The peaks consistently have signal in IGG comparable to Ikaros samples.

My verdict : artifacts.

Now we'll look at ATAC-seq after removing these artifact sites.

prof_dt.cleaned = prof_dt[id %in% names(qgr.cleaned)]
prof_dt.igg.cleaned = prof_dt.igg[id %in% names(qgr.cleaned)]

qgr.cleaned = qgr[setdiff(names(qgr), c(high_ids, low_wt_ids))]

atac_wd = "/slipstream/home/joeboyd/R/seqqc/PR_selected/atac"
atac_bam_files = dir(atac_wd, pattern = ".bam$", full.names = TRUE)
atac_config_dt = data.table(file = atac_bam_files)
atac_config_dt[, c("cell", "time", "treatment", "rep") := tstrsplit(basename(file), "_", keep = 1:4)]
atac_config_dt[, name := paste(cell, treatment, rep, sep = "_")]
atac_config_dt[, name_split := paste(cell, treatment, rep, sep = "\n")]

atac_config_dt$name = factor(atac_config_dt$name, levels = atac_config_dt$name)
atac_config_dt$name_split = factor(atac_config_dt$name_split, levels = atac_config_dt$name_split)

atac_config_dt$name_split = factor(atac_config_dt$name_split, levels = rev(levels(prof_dt.atac$name_split)))
atac_config_dt$name = factor(atac_config_dt$name, levels = rev(levels(prof_dt.atac$name)))

atac_config_dt[, mapped_reads := get_mapped_reads(file), .(file)]

sl_tab =table(seqnames(qgr.cleaned))
sl = names(sl_tab[sl_tab>0])
seqlevels(qgr.cleaned) = sl

prof_dt.atac = bfcif(bfc, digest::digest(list(atac_config_dt, qgr.cleaned, "igg")), function(){
  ssvFetchBamPE(atac_config_dt, qgr.cleaned, n_region_splits = 50, return_data.table = TRUE)
})

qgr.cleaned.centered = centerGRangesAtMax(prof_dt.atac, qgr.cleaned, width = 3e3)

prof_dt.atac = bfcif(bfc, digest::digest(list(atac_config_dt, qgr.cleaned.centered, "igg")), function(){
  ssvFetchBamPE(atac_config_dt, qgr.cleaned.centered, n_region_splits = 50, return_data.table = TRUE)
})

clust_dt.atac = ssvSignalClustering(prof_dt.atac, facet_ = "name_split")
ssvSignalHeatmap(clust_dt.atac, facet_ = "name_split", fill_limits = c(0, 500))

cap_dt = atac_config_dt[, .(name, mapped_reads)]
cap_dt[, y_cap_value := mapped_reads/1e6]

norm_dt.atac = append_ynorm(prof_dt.atac, cap_dt = cap_dt, by2 = "name", do_not_cap = TRUE)

ssvSignalHeatmap(norm_dt.atac, facet_ = "name_split", fill_ = "y_norm", fill_limits = c(0, 20))

norm_dt.atac[, y_relative := y_norm / max(y_norm, 1), .(id)]

ssvSignalHeatmap(norm_dt.atac, facet_ = "name_split", fill_ = "y_relative", fill_limits = c(0, 1))

Nothing convincing there. Let's look at the diff results.

diff_files = dir(dir(dir(atac_wd, full.names = TRUE), full.names = TRUE), pattern = "sig", full.names = TRUE)
names(diff_files) = sub(".res.+", "", basename(diff_files))
dff_dtl = lapply(diff_files, fread)
diff_grs = lapply(dff_dtl, GRanges)

ssvFeatureUpset(list(IKAROS = qgr.cleaned.centered, ATAC_diff = reduce(unlist(GRangesList(diff_grs)))))

memb.atac = ssvOverlapIntervalSets(c(list(IKAROS = qgr.cleaned.centered), diff_grs), use_first = TRUE)
any_diff = rowSums(as.data.frame(mcols(memb.atac))[,-1]) > 0
memb.atac.diff = memb.atac[any_diff]
any_diff_ids = names(subsetByOverlaps(qgr.cleaned.centered, memb.atac[any_diff]))
ssvFeatureUpset(memb.atac) + labs(title = "IKAROS sites with diff ATAC results")

So there are a few hundred Ikaros sites with diff ATAC.

Let's look at them.

set.seed(0)
clust_dt.diff = ssvSignalClustering(norm_dt.atac[id %in% any_diff_ids], nclust = 6,
                 facet_ = "name_split", 
                 fill_ = "y_relative")

agg_dt.diff = clust_dt.diff[, .(y = mean(y), y_norm = mean(y_norm), y_relative = mean(y_relative)), .(x, name, name_split, cluster_id, cell, treatment)]
p_cell = ggplot(agg_dt.diff, aes(x = x, y = y_relative, color = cell, group = name_split)) +
  geom_path() +
  facet_grid(cluster_id~treatment) +
  scale_color_manual(values = cell_colors)

p_treat = ggplot(agg_dt.diff, aes(x = x, y = y_relative, color = treatment, group = name_split)) +
  geom_path() +
  facet_grid(cluster_id~cell) +
  scale_color_brewer(palette = "Dark2")


assign_dt.diff = unique(clust_dt.diff[, .(id, cluster_id)])

diff_dt = ssvFetchGRanges(diff_grs, qgr.cleaned.centered[any_diff_ids], return_data.table = TRUE)

diff_dt = merge(diff_dt, assign_dt.diff, by = "id")
diff_dt$id = factor(diff_dt$id, levels = levels(assign_dt.diff$id))

p_heat_atac = ssvSignalHeatmap(clust_dt.diff, 
                 facet_ = "name_split", 
                 fill_ = "y_relative", 
                 fill_limits = c(0, 1), show_cluster_bars = TRUE) +
  labs(title = "ATAC reads at IKAROS sites that have diff ATAC")

p_heat_diff = ssvSignalHeatmap.ClusterBars(diff_dt, 
                 facet_ = "sample", 
                 fill_ = "y", 
                 fill_limits = c(0, 1))+
  labs(title = "diff ATAC call at IKAROS sites that have diff ATAC")

pg = cowplot::plot_grid(p_heat_atac, p_heat_diff, p_cell, p_treat)
ggsave("tmp.png", pg, width = 14, height = 12)

prof_dt.all = rbind(prof_dt, prof_dt.igg, prof_dt.atac, fill = TRUE)
norm_dt.all = append_ynorm(prof_dt.all)
norm_dt.all[, y_relative := y_norm / max(y_norm, 1), .(id)]
norm_dt.all$name_split = droplevels(norm_dt.all$name_split)
# clust_dt.all = ssvSignalClustering(norm_dt.all[id %in% any_diff_ids], facet_ = "name_split", fill_ = "y")

clust_dt.all = norm_dt.all[id %in% diff_dt$id]
clust_dt.all = merge(clust_dt.all, assign_dt.diff, by = "id")
clust_dt.all$id = factor(clust_dt.all$id, levels = levels(assign_dt.diff$id))

ssvSignalHeatmap(clust_dt.all, fill_ = "y", facet_ = "name_split", fill_limits = c(0, 50)) +
  labs(fill = "read pileup", title = "")

diff_olaps = ssvOverlapIntervalSets(diff_grs)
ssvFeatureUpset(diff_olaps)
qgr.no_ik = subsetByOverlaps(diff_olaps, qgr, invert = TRUE)

prof_dt.atac.no_ik.raw = bfcif(bfc, digest::digest(list(atac_config_dt, qgr.no_ik, "igg")), function(){
  ssvFetchBamPE(atac_config_dt, qgr.no_ik, n_region_splits = 50, return_data.table = TRUE)
})

qgr.no_ik.centered = centerGRangesAtMax(prof_dt.atac.no_ik.raw, qgr.no_ik, width = 3e3)

prof_dt.atac.no_ik.raw = bfcif(bfc, digest::digest(list(atac_config_dt, qgr.no_ik.centered, "igg")), function(){
  ssvFetchBamPE(atac_config_dt, qgr.no_ik.centered, n_region_splits = 50, return_data.table = TRUE)
})

prof_dt.no_ik = bfcif(bfc, digest::digest(list(i_bam_config_dt, qgr.no_ik.centered, "signal")), function(){
  ssvFetchBamPE(i_bam_config_dt, qgr.no_ik.centered, n_region_splits = 50, return_data.table = TRUE)
})

prof_dt.igg.no_id = bfcif(bfc, digest::digest(list(bam_igg_config_dt, qgr.no_ik.centered, "igg")), function(){
  ssvFetchBamPE(bam_igg_config_dt, qgr.no_ik.centered, n_region_splits = 50, return_data.table = TRUE)
})

prof_dt.atac.no_ik = append_ynorm(prof_dt.atac.no_ik.raw)
prof_dt.atac.no_ik[,y_relative := y_norm / max(y_norm, 1), .(id)]
atac_clust_dt.no_ik = ssvSignalClustering(prof_dt.atac.no_ik, fill_ = "y_relative", nclust = 4)

atac_assign_dt.no_ik = unique(atac_clust_dt.no_ik[, .(id, cluster_id)])

ssvSignalHeatmap.ClusterBars(prof_dt.atac.no_ik, fill_ = "y_relative", facet_ = "name_split")

prof_dt.no_ik = merge(prof_dt.no_ik, atac_assign_dt.no_ik, by = "id")
prof_dt.no_ik$id = factor(prof_dt.no_ik$id, levels = levels(atac_assign_dt.no_ik$id))

prof_dt.igg.no_id = merge(prof_dt.igg.no_id, atac_assign_dt.no_ik, by = "id")
prof_dt.igg.no_id$id = factor(prof_dt.igg.no_id$id, levels = levels(atac_assign_dt.no_ik$id))

ssvSignalHeatmap.ClusterBars(prof_dt.no_ik, fill_ = "y", facet_ = "name_split", fill_limits = c(0, 50))
ssvSignalHeatmap.ClusterBars(prof_dt.igg.no_id, fill_ = "y", facet_ = "name_split", fill_limits = c(0, 50))

agg_dt.no_ik = rbind(atac_clust_dt.no_ik, prof_dt.no_ik, prof_dt.igg.no_id, fill = TRUE)

agg_dt.no_ik = agg_dt.no_ik[, .(y = mean(y)), .(cluster_id, x, name_split, cell, mark, treatment)]
agg_dt.no_ik[is.na(mark), mark := paste("ATAC", treatment)]

agg_dt.no_ik[, cell := sub("X", "", cell)]

agg_dt.no_ik[, y_plot := y]
agg_dt.no_ik[!grepl("ATAC", mark), y_plot := y * 20]

ggplot(agg_dt.no_ik, aes(x = x, y = y_plot, color = cell, group = name_split)) + geom_path() +
  facet_grid(cluster_id~mark, scales = "free_y")