a.id1 <- "0009b464-b376-4fbc-8a56-da538269a02f"
all.dbs <- join_PCAWG_callers(
aliquot.id = a.id1,
indiv.vcf.dir = "~/collab.vcf/",
pcawg.vcf.dir = "~/pcawg.vcf/final_consensus_12aug_passonly/snv_mnv/")
all.dbs <- merge_PCAWG_callers(
aliquot.id = a.id1,
indiv.vcf.dir = "~/collab.vcf/",
pcawg.vcf.dir = "~/pcawg.vcf/final_consensus_12aug_passonly/snv_mnv/",
out.dir = "~/mvv/foo/")
all.dbs <- join_PCAWG_callers(
aliquot.id = "0009b464-b376-4fbc-8a56-da538269a02f",
indiv.vcf.dir = "~/collab.vcf/",
pcawg.vcf.dir = "~/pcawg.vcf/final_consensus_12aug_passonly/snv_mnv/")
# Original test files
files = c(
"~/collab.vcf/0009b464-b376-4fbc-8a56-da538269a02f.broad-mutect-v3.20160222.somatic.snv_mnv.vcf.gz",
"~/collab.vcf/0009b464-b376-4fbc-8a56-da538269a02f.dkfz-snvCalling_1-0-132-1-hpc.1510221050.somatic.snv_mnv.vcf.gz",
"~/collab.vcf/0009b464-b376-4fbc-8a56-da538269a02f.MUSE_1-0rc-vcf.20151216.somatic.snv_mnv.vcf.gz",
"~/collab.vcf/0009b464-b376-4fbc-8a56-da538269a02f.svcp_1-0-4.20150204.somatic.snv_mnv.vcf.gz",
"~/pcawg.vcf/final_consensus_12aug_passonly/snv_mnv//0009b464-b376-4fbc-8a56-da538269a02f.consensus.snv_mnv.vcf.gz"
)
## Stuff to give to Willie on 14 Jun
# one.sup.dbs <- VCF_to_BED(all.dbs[all.dbs$num.support == 1, ], "one.support.dbs.bed")
# two.sup.dbs <- VCF_to_BED(all.dbs[all.dbs$num.support == 2, ], "two.support.dbs.bed")
# high.sup.dbs <- VCF_to_BED(all.dbs[all.dbs$num.support > 2, ], "high.support.dbs.bed")
# test.case <- which(samp.sheet$aliquot_id == "0009b464-b376-4fbc-8a56-da538269a02f")
# samp.sheet[test.case, ]$icgc_donor_id
#"DO46416"
# from other .R file
donor.pairs[["DO46416"]]
# $SP101724
# Object ID File Name ICGC Donor Specimen ID Specimen Type Size (bytes)
# 1: 2c9d90f1-5c46-5f3f-8a7f-771b36414cf2 bf5b874240ec430239013f4292d4151c.bam DO46416 SP101724 Recurrent tumour - other 117118114996
# $SP101728
# Object ID File Name ICGC Donor Specimen ID Specimen Type Size (bytes)
# 1: 9ec1f43b-379c-58cd-85fe-3d09439058f1 7441677a3aed0864a23b17b84c0a28c9.bam DO46416 SP101728 Normal - blood derived 134788648263
## Test 3 kinds of unioned DBSs -- only one caller supported, two caller supported, and 3 or 4 callers supported
tmp.vcf <- all.dbs[all.dbs$num.support == 1, ]
# tmp.vcf <- tmp.vcf[1:10, ]
tmp2.vcf <- t(apply(tmp.vcf, MARGIN = 1, FUN = merge_DBS_calls_one_row, suffix.list = c("_mt", "_dk", "_ms", "_sa", "")))
colnames(tmp2.vcf) <- c("#CHROM", "POS", "REF", "ALT", "num.callers")
data.table::fwrite(tmp2.vcf, "new.tmp.vcf", sep = "\t")
nn.one.sup <-
DBSverify::Read_DBS_VCF_and_BAMs_to_verify_DBSs(
input.vcf = "new.tmp.vcf",
Nbam.name = "~/mvv/test.minibams/SP101728_oneSupport.bam",
Tbam.name = "~/mvv/test.minibams/SP101724_oneSupport.bam",
N.slice.dir = "one.support.N.slice.dir",
T.slice.dir = "one.support.T.slice.dir",
unlink.slice.dir = FALSE,
verbose = 1,
outfile = "new.one.support.eval.vcf"
)
# Test with something like
# fisher.test(matrix(c(40,0,35,3), ncol = 2), alternative = "g")
tmp.vcf <- all.dbs[all.dbs$num.support == 2, ]
tmp2.vcf <- t(apply(tmp.vcf, MARGIN = 1, FUN = merge_DBS_calls_one_row, suffix.list = c("_mt", "_dk", "_ms", "_sa", "")))
colnames(tmp2.vcf) <- c("#CHROM", "POS", "REF", "ALT", "num.callers")
data.table::fwrite(tmp2.vcf, "two.tmp.vcf", sep = "\t")
new.two.sup <-
DBSverify::Read_DBS_VCF_and_BAMs_to_verify_DBSs(
input.vcf = "two.tmp.vcf",
Nbam.name = "~/mvv/test.minibams/SP101728_twoSupport.bam",
Tbam.name = "~/mvv/test.minibams/SP101724_twoSupport.bam",
N.slice.dir = "two.support.N.slice.dir",
T.slice.dir = "two.support.T.slice.dir",
unlink.slice.dir = FALSE,
verbose = 1,
outfile = "new.two.support.eval.vcf"
)
# Investigate? 8:126314418
# " 4 67785957
tmp.vcf <- all.dbs[all.dbs$num.support > 2, ]
t(apply(tmp.vcf, MARGIN = 1, FUN = merge_DBS_calls_one_row, suffix.list = c("_mt", "_dk", "_ms", "_sa", "")))
colnames(tmp2.vcf) <- c("#CHROM", "POS", "REF", "ALT", "num.callers")
data.table::fwrite(tmp2.vcf, "new.high.tmp.vcf", sep = "\t")
new.high.sup <-
DBSverify::Read_DBS_VCF_and_BAMs_to_verify_DBSs(
input.vcf = "new.high.tmp.vcf",
Nbam.name = "SP101728_highSupport.bam",
Tbam.name = "SP101724_highSupport.bam",
N.slice.dir = "high.support.N.slice.dir",
T.slice.dir = "high.support.T.slice.dir",
unlink.slice.dir = FALSE,
verbose = 1,
outfile = "new.high.support.eval.vcf"
)
setwd("~/DBSverify/")
## From here down is an investigation of SBSs calls and their relationship to DBS calls.
## Very few odd cases, so we just went with DBSs calls (as synthesized from individual caller SBS calls; i.e.
## for each caller we generated DBS calls, and then proceeded from there.)
sbs.foo <- ICAMS::ReadAndSplitVCFs(files = files, variant.caller = "unknown", always.merge.SBS = FALSE )
sbs <- list(mutect=tibble(sbs.foo$SBS[[1]][ , c(1,2,4,5)]),
dkfz =tibble(sbs.foo$SBS[[2]][ , c(1,2,4,5)]),
muse =tibble(sbs.foo$SBS[[3]][ , c(1,2,4,5)]),
sanger=tibble(sbs.foo$SBS[[4]][ , c(1,2,4,5)]),
pcawg =tibble(sbs.foo$SBS[[5]][ , c(1,2,4:8)]))
colnames(sbs$mutect)[3:4] <- paste0(colnames(sbs$mutect)[3:4], "_SBS_mt")
colnames(sbs$dkfz)[3:4] <- paste0(colnames(sbs$dkfz)[3:4], "_SBS_dk")
colnames(sbs$muse)[3:4] <- paste0(colnames(sbs$muse)[3:4], "_SBS_ms")
colnames(sbs$sanger)[3:4] <- paste0(colnames(sbs$sanger)[3:4], "_SBS_sa")
colnames(sbs$pcawg)[3:ncol(sbs$pcawg)] <- paste0(colnames(sbs$pcawg)[3:ncol(sbs$pcawg)], "_SBS_pc")
all.sbs <- full_join(sbs$mutect,
full_join(sbs$dkfz,
full_join(sbs$muse,
full_join(sbs$sanger, sbs$pcawg))))
all <- full_join(all.dbs, all.sbs)
alls <- (all %>% arrange(CHROM, POS))
sup.2.and.pcawg <- which(is.na(alls$REF) & alls$num.support == 2)
sup.2.and.pcawg<- sort(c(sup.2.and.pcawg + 1, sup.2.and.pcawg))
dbsidx <- which(!is.na(alls$num.support))
dd <- c(dbsidx, (dbsidx + 1))
dd <- sort(dd)
alldd <- (alls[dd, ])
idx2 <- which(!is.na(alldd$num.support) & alldd$num.support > 1 & is.na(alldd$REF)) # NOT IN PCAWG BUT OTHER CALLERS HAVE
idd <- c(idx2, idx2 + 1)
idd <- sort(idd)
investigate <- alldd[idd, ]
View(investigate[53:54, ]) # why did't the PCAWG SBSs get merged? # They did get merged, see below
# 15 : 40527158
# 15 : 40527159
pfoo <- sbs.foo$SBS[[5]]
which(pfoo$CHROM == 15 & pfoo$POS == 40527158)
which(pfoo$CHROM == 15 & pfoo$POS == 40527159)
pfoo <- dbs.foo$SBS[[5]]
which(pfoo$CHROM == 15 & pfoo$POS == 40527158)
which(pfoo$CHROM == 15 & pfoo$POS == 40527159)
pfoo <- dbs.foo$DBS[[5]]
which(pfoo$CHROM == 15 & pfoo$POS == 40527158)
which(pfoo$CHROM == 15 & pfoo$POS == 40527159)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.