knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

R Markdown

options(mc.cores = 10)
library(seqqc)
library(seqsetvis)
library(BiocFileCache)
bfc = BiocFileCache()
bfcif = ssvRecipes::bfcif
fastq_dir = file.path("/slipstream/home/mmariani/projects/", 
                      "vzv_interactions/vzv_ctcf_cohrs_chip_data/raw_data/")
fastq_files = dir(fastq_dir, 
                  pattern = "fastq.gz$", 
                  full.names = TRUE)
peak_dir = file.path("/slipstream/home/mmariani/projects/", 
                     "vzv_interactions/vzv_cohrs_ctcf_chip/", 
                     "vzv_cohrs_ctcf_chip_macs2_output/hg38_pooled_inputs/narrow")
peak_files = dir(peak_dir, 
                 pattern = "Peak$", 
                 full.names = TRUE)
bam_dir = file.path("/slipstream/home/mmariani/projects/", 
                    "/vzv_interactions/vzv_cohrs_ctcf_chip/", 
                    "vzv_cohrs_ctcf_chip_marked_duplicates/hg38")
bam_files = dir(bam_dir, pattern = "bam$", 
                full.names = TRUE)
bw_dir = file.path("/slipstream/home/mmariani/projects/", 
                   "/vzv_interactions/vzv_cohrs_ctcf_chip/", 
                   "vzv_cohrs_ctcf_chip_macs2_output/hg38/fe")
bw_dir.pooled = file.path("/slipstream/home/mmariani/projects/", 
                          "/vzv_interactions/vzv_cohrs_ctcf_chip/", 
                          "vzv_cohrs_ctcf_chip_macs2_output/hg38_pooled/fe")

bw_files = dir(bw_dir, pattern = "bw$", 
               full.names = TRUE)
fix_names = function(in_f){
  f = basename(in_f)
  f = sub("Frietze_Cohrs_11497_180409_SNL128_0174_ACC3JAACXX_", "", f)
  f = sub("Control_HFL_AntiCTCF_S31_L005.+", "HFL_con_CTCF_set1", f)
  f = sub("Control_HFL_Input_S30_L005.+", "HFL_con_in_set1", f)
  f = sub("VZV_AntiCTCF_S33_L005.+", "HFL_VZV_CTCF_set1", f)
  f = sub("VZV_Input_S32_L005.+", "HFL_VZV_in_set1", f)
  #look behind (?<=set[0-9])_.+
  f = sub("(?<=set[0-9])_.+", "", f, perl = TRUE)
  f
}

names(bw_files) = fix_names(bw_files)
names(bam_files) = fix_names(bam_files)
names(peak_files) = fix_names(peak_files)
names(fastq_files) = fix_names(fastq_files)
#using rep as treatment here becuase that looks more interesting
my_make_config_dt = function(files){
  config_dt = data.table(file = files, name = names(files))
  config_dt[, c("cell", "virus", "mark", "rep") := tstrsplit(name, "_")]
  config_dt[,treatment := rep]
  config_dt = config_dt[order(rep)][order(virus)][order(mark)]
  config_dt$name = factor(config_dt$name, levels = config_dt$name)
  config_dt
}
fastq_config_dt = my_make_config_dt(fastq_files)
peak_config_dt = my_make_config_dt(peak_files)
bam_config_dt = my_make_config_dt(bam_files)
bw_config_dt = my_make_config_dt(bw_files)

stopifnot(file.exists(fastq_config_dt$file))
stopifnot(file.exists(peak_config_dt$file))
stopifnot(file.exists(bam_config_dt$file))
stopifnot(file.exists(bw_config_dt$file))
fq_dt = make_fq_dt(fastq_config_dt)

plot_fq_dt(fq_dt)
peak_grs = easyLoad_narrowPeak(peak_config_dt$file, file_names = peak_config_dt$name)
olaps_gr = ssvOverlapIntervalSets(peak_grs)
cons2_gr = ssvConsensusIntervalSets(peak_grs, min_number = 3, min_fraction = 0)
set.seed(0)
asses_gr = resize(sample(cons2_gr, 5e3), 600, fix = "center")
asses_gr = prepare_fetch_GRanges_names(asses_gr)
feature_plots = plot_feature_comparison(peak_grs)
cowplot::plot_grid(plotlist = feature_plots$all)
cowplot::plot_grid(plotlist = feature_plots$consensus)
scc_dt = make_scc_dt(bam_config_dt, query_gr = asses_gr)
fl = round(mean(scc_dt$fragment_length[grepl("CTCF", name)]$fragment_length))

scc_plots = plot_scc_dt(scc_dt)

scc_plots$scc_curves + facet_wrap("~name", ncol = 3)
scc_plots$scc_dots + facet_wrap("~name", ncol = 3)
view_size = 3e3
asses_gr.c = bfcif(bfc, rname = paste("center_qgr", digest::digest(list(bw_config_dt, asses_gr, view_size, fl))), function(){
  make_centered_query_gr(bw_config_dt, asses_gr, view_size = view_size)
})
frip_dt = bfcif(bfc, rname = paste("frip_dt", digest::digest(list(bam_config_dt, asses_gr.c))), function(){
  make_frip_dt(bam_config_dt, asses_gr.c)
})
frip_dt$name = factor(frip_dt$name, levels = levels(bam_config_dt$name))
plot_frip_dt(frip_dt, name_lev = levels(bam_config_dt$name))
anno_grs = make_anno_grs("/slipstream//home/joeboyd/gencode.v35.annotation.gtf.gz")
anno_dt = make_anno_dt(peak_grs, anno_grs)
plot_anno_overlap(anno_dt)
prof_dt.bam = ssvFetchBam(bam_config_dt, asses_gr.c, return_data.table = TRUE, n_region_splits = 20, fragLens = 120)
clust_dt.bam = ssvSignalClustering(prof_dt.bam, nclust = 6, max_cols = Inf)
assign_dt.bam = make_assign_dt(clust_dt.bam)

signal_plots.bam = plot_signals(prof_dt.bam, asses_gr.c, assign_dt = assign_dt.bam, frip_dt = frip_dt, scc_dt = scc_dt, anno_grs = anno_grs)
signal_plots.bam[-1]
prof_dt.bw = ssvFetchBigwig(bw_config_dt, asses_gr.c, return_data.table = TRUE, n_region_splits = 20)
# clust_dt.bw = ssvSignalClustering(prof_dt.bw, nclust = 6, max_cols = Inf)
# assign_dt.bw = make_assign_dt(clust_dt.bw)

signal_plots.bw = plot_signals(prof_dt.bw, assign_dt = assign_dt.bam)
signal_plots.bw[2]


FrietzeLabUVM/ssvQC documentation built on March 25, 2024, 12:24 a.m.