#' variantCalling
#'
#' @description
#' Aligned and co-cleaned BAM files are processed as tumor-normal pairs.
#' Variant calling is performed using six separate pipelines:
#'
#' \itemize{
#' \item \href{https://bioinformatics.mdanderson.org/public-software/muse/}{MuSE}
#' \item \href{https://gatk.broadinstitute.org/hc/en-us/articles/360035531132--How-to-Call-somatic-mutations-using-GATK4-Mutect2}{MuTect2}
#' \item \href{http://varscan.sourceforge.net/}{VarScan2}
#' \item \href{https://github.com/genome/somatic-sniper}{SomaticSniper}
#' \item \href{http://gmt.genome.wustl.edu/packages/pindel/}{Pindel}
#' \item \href{https://github.com/Illumina/strelka}{Strelka2}
#' \item \href{https://github.com/Illumina/manta}{Manta}
#' }
#'
#' @inheritParams somaticWgsAnalysis
#' @examples
#' \dontrun{
#' variantCalling(tumor_file = 'NET-10_TI_mkdup_sub_0005.bam',
#' normal_file = 'NET-10_BL_mkdup_sub_0005.bam',
#' sample_name = 'NET-10',
#' ref = 'hg38.fa',
#' out_path = 'processed/vcf',
#' threads = 2)
#'}
#' @export
#'
variantCalling <- function(tumor_file,
normal_file,
sample_name,
ref = '/imppc/labs/lplab/share/marc/refgen/hg38/hg38.fa',
out_path,
threads = 1,
gatk4 = '/imppc/labs/lplab/share/bin/gatk-4.1.3.0/gatk',
muse = '/imppc/labs/lplab/share/marc/repos/MuSE/MuSE',
bwa = 'bwa',
samblaster = 'samblaster',
sambamba = 'sambamba',
samtools = 'samtools',
somatic_sniper = '/imppc/labs/lplab/share/marc/repos/somatic-sniper/build/bin/bam-somaticsniper',
varscan2 = '/imppc/labs/lplab/share/marc/repos/varscan/VarScan.v2.4.4.jar',
manta = '/software/debian-8/bio/manta-1.4.0',
strelka2 = '/soft/bio/strelka-2.9.3',
af_only_gnomad = '/imppc/labs/lplab/share/marc/refgen/hg38/af-only-gnomad.hg38.vcf.gz',
pindel = '/soft/bio/pindel/pindel',
centromeres_telomeres = '/imppc/labs/lplab/share/marc/refgen/hg38/centromeres.bed',
perl = 'perl',
python_radia = '/imppc/labs/lplab/share/marc/repos/miniconda/bin/python2.7',
radia_path = '/imppc/labs/lplab/share/marc/repos/radia/scripts',
bcftools = 'bcftools',
tumor_vcf_id = 'TUMOR',
fpfilter = '/imppc/labs/lplab/share/marc/repos/fpfilter-tool/fpfilter.pl',
bam_readcount = 'bam-readcount'){
# check BAM indexes
sambambaIndex(bam = tumor_file,
threads = threads)
sambambaIndex(bam = normal_file,
threads = threads)
# MuSE calling
museCalling(tumor_file = tumor_file,
normal_file = normal_file,
sample_name = sample_name,
ref = ref,
out_path = out_path,
muse = muse,
af_only_gnomad = af_only_gnomad,
bcftools = bcftools)
# VarScan2 calling
varscan2Calling(tumor_file = tumor_file,
normal_file = normal_file,
sample_name = sample_name,
ref = ref,
out_path = out_path,
varscan2 = varscan2,
samtools = samtools,
fpfilter = fpfilter,
perl = perl,
bcftools = bcftools)
# Pindel calling
pindelCalling(tumor_file = tumor_file,
normal_file = normal_file,
sample_name = sample_name,
ref = ref,
out_path = out_path,
pindel = pindel,
sambamba = sambamba,
samtools = samtools,
threads = threads,
perl = perl,
gatk4 = gatk4,
centromeres_telomeres = centromeres_telomeres,
bcftools = bcftools)
# manta calling
mantaCalling(tumor_file = tumor_file,
normal_file = normal_file,
sample_name = sample_name,
ref = ref,
out_path = out_path,
manta = manta,
threads = threads)
# Strelka2 calling
strelka2Calling(tumor_file = tumor_file,
normal_file = normal_file,
sample_name = sample_name,
ref = ref,
out_path = out_path,
threads = threads,
indel_candidates = file.path(out_path, 'manta', paste0(sample_name, "_out_manta"),
'results/variants/candidateSmallIndels.vcf.gz'),
strelka2 = strelka2,
bcftools = bcftools)
# MuTect2 calling
mutect2Calling(tumor_file = tumor_file,
normal_file = normal_file,
sample_name = sample_name,
ref = ref,
out_path = out_path,
gatk4 = gatk4,
threads = threads,
af_only_gnomad = af_only_gnomad,
bcftools = bcftools)
# RADIA calling
radiaCalling(tumor_file = tumor_file,
normal_file = normal_file,
sample_name = sample_name,
ref = ref,
out_path = out_path,
radia_path = radia_path,
python_radia = python_radia,
samtools = samtools,
threads = threads,
bcftools = bcftools)
# Somatic Sniper calling
somaticsniperCalling(tumor_file = tumor_file,
normal_file = normal_file,
sample_name = sample_name,
ref = ref,
out_path = out_path,
somatic_sniper = somatic_sniper,
fpfilter = fpfilter,
perl = perl,
bcftools = bcftools)
}
#' museCalling
#'
#' @description
#' Tumor-normal pair somatic variant calling using MuSE
#'
#' @inheritParams variantCalling
#' @examples
#' \dontrun{
#' museCalling(tumor_file = 'raw/sample_tumor.bam',
#' normal_file = 'raw/sample_normal.bam',
#' sample_name = 'sample'
#' ref = 'ref/hg38.fa',
#' out_path = 'rst',
#' muse = '/bin/MuSE',
#' af_only_gnomad = 'hg38/af-only-gnomad.hg38.vcf.gz')
#' }
#'
#' @export
museCalling <- function(tumor_file,
normal_file,
sample_name,
ref,
out_path,
muse,
af_only_gnomad,
bcftools){
# define output
out_path <- file.path(out_path, "muse")
dir.create(out_path, showWarnings = F)
out_muse <- file.path(out_path, paste0(sample_name, "_muse"))
intermediate_muse_call <- file.path(out_path, paste0(sample_name))
message(paste(Sys.time(),"\n",
'Starting MuSE calling using:\n',
'>Tumor file:', tumor_file, '\n',
'>Normal file:', normal_file, '\n',
'>Sample name:', sample_name, '\n',
'>Reference genome:', ref, '\n',
'>Output path:', out_muse, '\n',
'>Genome aggregation database:', af_only_gnomad))
# generate intermediate muse call
system(paste(muse, 'call',
'-f', ref,
tumor_file,
normal_file,
'-O', intermediate_muse_call))
intermediate_muse_call <- file.path(out_path, paste0(sample_name, ".MuSE.txt"))
# muse calling
system(paste(muse, 'sump',
'-I', intermediate_muse_call,
'-G',
'-D', af_only_gnomad,
'-O', paste0(out_muse, '.vcf')))
unlink(intermediate_muse_call)
# PASS variants filtering
system(paste(bcftools, 'view',
'-f PASS', paste0(out_muse, '.vcf'),
'>', paste0(out_muse, '_PASS.vcf')))
message(paste(Sys.time(),"\n", 'Finished', sample_name, '\n'))
}
#' somaticsniperCalling
#'
#' @description
#' Tumor-normal pair somatic variant calling using SomaticSniper
#'
#' @inheritParams variantCalling
#' @examples
#' \dontrun{
#' somaticsniperCalling(tumor_file = 'raw/sample_tumor.bam',
#' normal_file = 'raw/sample_normal.bam',
#' sample_name = 'sample'
#' ref = 'ref/hg38.fa',
#' out_path = 'rst',
#' somatic_sniper = '/bin/bam-somaticsniper')
#'}
#'
#' @export
somaticsniperCalling <- function(tumor_file,
normal_file,
sample_name,
ref,
out_path,
somatic_sniper,
fpfilter,
perl,
bcftools){
# define output
out_path <- file.path(out_path, "somaticsniper")
dir.create(out_path, showWarnings = F)
out_somaticsniper <- file.path(out_path, paste0(sample_name, "_somaticsniper.vcf"))
message(paste(Sys.time(),"\n",
'Starting SomaticSniper calling using:\n',
'>Tumor file:', tumor_file, '\n',
'>Normal file:', normal_file, '\n',
'>Sample name:', sample_name, '\n',
'>Reference genome:', ref, '\n',
'>Output path:', out_somaticsniper, '\n',
'>Number of threads:', threads, '\n'))
# SomaticSniper calling
system(paste(somatic_sniper,
'-f', ref,
tumor_file,
normal_file,
out_somaticsniper,
'-q 1',
'-L',
'-G',
'-Q 15',
'-s 0.01',
'-T 0.85',
'-N 2',
'-r 0.001',
'-F vcf'))
sample_name <- gsub(".vcf", "", basename(out_somaticsniper))
fpfilterTool(vcf = out_somaticsniper,
tumor_file = tumor_file,
sample_name = sample_name,
ref = ref,
fpfilter = fpfilter,
perl = perl,
out_path = out_path,
bam_readcount = bam_readcount)
out_fpfilter <- file.path(out_path, "fpfilter")
vcf_filtered <- file.path(out_fpfilter, paste0(sample_name, ".vcf"))
# PASS variants filtering
system(paste(bcftools, 'view',
'-f PASS', vcf_filtered,
'>', paste0(file.path(out_fpfilter, paste0(sample_name, '_PASS.vcf')))))
message(paste(Sys.time(),"\n", 'Finished', sample_name, '\n'))
}
#' varscan2Calling
#'
#' @description
#' Tumor-normal pair somatic variant calling using VarScan2.
#'
#' @inheritParams variantCalling
#' @examples
#' \dontrun{
#' varscan2Calling(tumor_file = 'raw/sample_tumor.bam',
#' normal_file = 'raw/sample_normal.bam',
#' sample_name = 'sample'
#' ref = 'ref/hg38.fa',
#' out_path = 'rst',
#' varscan2 = '/bin/VarScan.v2.4.4.jar',
#' threads = 2,
#' samtools = '/bin/samtools')
#'}
#'
#' @export
varscan2Calling <- function(tumor_file,
normal_file,
sample_name,
ref,
out_path,
varscan2,
samtools,
fpfilter,
perl,
bcftools){
# define output
out_path <- file.path(out_path, "varscan2")
dir.create(out_path, showWarnings = F)
intermediate_mpileup <- file.path(out_path, paste0(sample_name, '_intermediate_mpileup.pileup'))
out_varscan2 <- file.path(out_path, paste0(sample_name, "_out_varscan2"))
message(paste(Sys.time(),"\n",
'Starting VarScan2 calling using:\n',
'>Tumor file:', tumor_file, '\n',
'>Normal file:', normal_file, '\n',
'>Sample name:', sample_name, '\n',
'>Reference genome:', ref, '\n',
'>Output path:', out_varscan2, '\n'))
# mpileup
system(paste(samtools, 'mpileup',
'-f', ref,
'-q 1',
'-B',
tumor_file,
normal_file,
">", intermediate_mpileup))
# VarScan2 calling
system(paste('java -jar', varscan2,
'somatic',
intermediate_mpileup,
out_varscan2,
'--mpileup 1',
'--min-coverage 8',
'--min-coverage-normal 8',
'--min-coverage-tumor 6',
'--min-var-freq 0.10',
'--min-freq-for-hom 0.75',
'--normal-purity 1.0',
'--tumor-purity 1.00',
'--p-value 0.99',
'--somatic-p-value 0.05',
'--strand-filter 0',
'--output-vcf'))
unlink(intermediate_mpileup)
# fpfilter
for(vcf in paste0(out_varscan2, c(".indel.vcf", ".snp.vcf"))){
sample_name <- gsub(".vcf", "", basename(vcf))
fpfilterTool(vcf = vcf,
tumor_file = tumor_file,
sample_name = sample_name,
ref = ref,
fpfilter = fpfilter,
perl = perl,
out_path = out_path,
bam_readcount = bam_readcount)
out_fpfilter <- file.path(out_path, "fpfilter")
vcf_filtered <- file.path(out_fpfilter, sample_name, ".vcf")
# PASS variants filtering
system(paste(bcftools, 'view',
'-f PASS', vcf_filtered,
'>', paste0(file.path(out_fpfilter, paste0(sample_name, '_PASS.vcf')))))
}
message(paste(Sys.time(),"\n", 'Finished', sample_name, '\n'))
}
#' pindelCalling
#'
#' @description
#' Tumor-normal pair somatic variant calling using Pindel
#'
#' @inheritParams variantCalling
#' @examples
#' \dontrun{
#' pindelCalling(tumor_file = 'raw/sample_tumor.bam',
#' normal_file = 'raw/sample_normal.bam',
#' sample_name = 'sample'
#' ref = 'ref/hg38.fa',
#' out_path = 'rst')
#'}
#'
#' @export
pindelCalling <- function(tumor_file,
normal_file,
sample_name,
ref,
out_path,
pindel,
sambamba,
samtools,
threads,
perl,
gatk4,
centromeres_telomeres,
bcftools){
# define output
out_path <- file.path(out_path, "pindel")
dir.create(out_path, showWarnings = F)
out_pindel <- file.path(out_path, paste0(sample_name, "_out_pindel"))
normal_file_filtered <- file.path(out_path, basename(normal_file))
pindel2vcf4tcga <- file.path(dirname(pindel), 'pindel2vcf4tcga')
somatic_indelfilter <- file.path(dirname(pindel), 'somatic_filter', 'somatic_indelfilter.pl')
message(paste(Sys.time(),"\n",
'Starting Pindel calling using:\n',
'>Tumor file:', tumor_file, '\n',
'>Normal file:', normal_file, '\n',
'>Sample name:', sample_name, '\n',
'>Reference genome:', ref, '\n',
'>Output path:', out_pindel, '\n',
'>Number of threads:', threads, '\n'))
# create conf file
ti_is <- insertSizePindel(out_path = out_path,
bam_file = normal_file,
sambamba = sambamba,
samtools = samtools)
nr_is <- insertSizePindel(out_path = out_path,
bam_file = tumor_file,
sambamba = sambamba,
samtools = samtools)
config_file <- file.path(out_path, 'config_file')
writeLines(c(ti_is, nr_is), config_file)
# Pindel calling
system(paste(pindel,
'-f', ref,
'-i', config_file,
'-o', out_pindel,
'--exclude', centromeres_telomeres,
'-T', threads))
unlink(file.path(out_path, basename(normal_file)))
unlink(file.path(out_path, basename(tumor_file)))
unlink(file.path(out_path, paste0(basename(normal_file), ".bai")))
unlink(file.path(out_path, paste0(basename(tumor_file), ".bai")))
# Pindel output to vcf
# merge deletions(D) and short insertions (SI)
pindel_files <- list.files(out_path,
full.names = T)
pindel_files <- c(pindel_files[grep("_SI", pindel_files)],
pindel_files[grep("_D", pindel_files)])
merged_pindel <- file.path(out_path, paste0(sample_name, '_merged'))
for(pindel_file in pindel_files){
system(paste('grep ChrID', pindel_file, '>>', merged_pindel))
}
# somatic filter
somatic_indel_filter_config <- file.path(out_path, 'somatic.indel.filter.config')
somatic_output <- file.path(out_path, paste0(sample_name, '_somatic.vcf'))
somatic_config <- paste0('indel.filter.input = ', merged_pindel, '\n',
'indel.filter.vaf = 0.08', '\n',
'indel.filter.cov = 20', '\n',
'indel.filter.hom = 6', '\n',
'indel.filter.pindel2vcf = ', pindel2vcf4tcga, '\n',
'indel.filter.reference = ', ref, '\n',
'indel.filter.referencename = ', gsub("\\..*$", "", basename(ref)), '\n',
'indel.filter.referencedate = ', Sys.time(), '\n',
'indel.filter.output = ', somatic_output)
writeLines(somatic_config, somatic_indel_filter_config)
# apply somatic filter and convert output to vcf
system(paste(perl, somatic_indelfilter, somatic_indel_filter_config))
# # sort vcf
# sorted_vcf <- paste0(gsub('\\..*$', '',somatic_output), '_sorted.vcf')
# system(paste(gatk4,
# 'SortVcf',
# '--CREATE_INDEX=true',
# '--SEQUENCE_DICTIONARY', gsub(".fa", ".dict", ref),
# '-I', somatic_output,
# '-O', sorted_vcf))
# vcf_old_names <- system(paste(bcftools,
# 'query -l', sorted_vcf), intern = T)
#
# vcf_new_names <- c('NORMAL', 'TUMOR')
#
# new_header_file <- file.path(out_path, paste0(sample_name, '.header'))
#
# new_header_vcf <- paste(c(vcf_old_names[1], " ",
# vcf_new_names[1], '\n',
# vcf_old_names[2], " ",
# vcf_new_names[2]),
# collapse = "")
#
# writeLines(new_header_vcf, new_header_file)
#
#
# reheadered_vcf <- file.path(out_path, paste0(sample_name, '_reheader.vcf'))
#
#
# system(paste(bcftools,
# 'reheader',
# '-s', new_header_file,
# '-o',reheadered_vcf,
# sorted_vcf))
# PASS variants filtering
system(paste(bcftools, 'view',
'-f PASS', somatic_output,
'>', paste0(out_pindel, '_PASS.vcf')))
message(paste(Sys.time(),"\n", 'Finished', sample_name, '\n'))
}
#' insertSizePindel
#'
#' @description
#' Calculates the mean insert size and returns the name of a bam-file,
#' the expected average insert size, and a label to indicate the identity of the sample.
#'
#' @inheritParams variantCalling
#' @param size_subsample Number of reads in the BAM file used for the insert size calculation.
#' @param bam_file BAM file where to calculate the insert size.
#' @examples
#' \dontrun{
#' insertSizePindel(bam_file = 'raw/sample_tumor.bam',
#' samtools = '/bin/samtools',
#' sambamba = '/bin/sambamba')
#'}
#'
#' @export
insertSizePindel <- function(out_path,
sambamba,
samtools,
bam_file,
size_subsample = 1000000){
out_bam <- file.path(out_path, basename(bam_file))
# filter reads
system(paste(sambamba, 'view', bam_file,
'--filter "not (unmapped or duplicate or secondary_alignment or failed_quality_control or supplementary)"',
'--format bam',
'--nthreads', threads,
'--output-filename', out_bam))
# calculate mean insert size
wr_in_size <- system(paste(samtools, 'view', out_bam, '| head -n', as.integer(size_subsample)), intern = T)
b_sum = 0
b_count = 0
numlines = 0
for(line in wr_in_size){
numlines <- numlines + 1
tmp <- unlist(strsplit(line, "\t"))
if(length(tmp) < 9) break
if(abs(as.integer(tmp[8])) < 10000){
b_sum <- b_sum + abs(as.integer(tmp[8]))
b_count <- b_count + 1
}
}
mean = b_sum / b_count
conf_tmp <- paste(tumor_file, mean, gsub("\\..*$", "", basename(tumor_file)), collapse = "\t")
return(conf_tmp)
}
#' sambambaIndex
#'
#' @description
#' Generate BAM index if it is not present using sambamba
#'
#' @inheritParams variantCalling
#' @examples
#' \dontrun{
#' sambambaIndex(bam = 'raw/sample_tumor.bam',
#' threads = 2,
#' sambamba = '/bin/sambamba')
#'}
#'
#' @export
sambambaIndex <- function(bam,
threads,
sambamba){
index <- paste0(bam, '.bai')
if(!file.exists(index)){
message(paste(Sys.time(),"\n",
'Index for', bam, 'not present.\n\n',
'Starting BAM index generation using:\n',
'>BAM file:', bam, '\n',
'>Number of threads:', threads, '\n'))
system(paste(sambamba, "index",
"-t", threads,
bam))
message(paste(Sys.time(),"\n", 'Finished', bam, '\n'))
}
}
#' sambambaIndex
#'
#' @description
#' Generate BAM index if it is not present using sambamba
#'
#' @inheritParams variantCalling
#' @examples
#' \dontrun{
#' sambambaIndex(bam = 'raw/sample_tumor.bam',
#' threads = 2,
#' sambamba = '/bin/sambamba')
#'}
#'
#' @export
samtoolsIndex <- function(bam,
threads,
samtools){
index <- paste0(bam, '.bai')
if(!file.exists(index)){
message(paste(Sys.time(),"\n",
'Index for', bam, 'not present.\n\n',
'Starting BAM index generation using:\n',
'>BAM file:', bam, '\n',
'>Number of threads:', threads, '\n'))
system(paste(samtools, "index",
"-@", threads,
bam))
message(paste(Sys.time(),"\n", 'Finished', bam, '\n'))
}
}
#' radia
#'
#' @description
#' Tumor-normal pair somatic variant calling using RADIA
#'
#' @inheritParams variantCalling
#' @examples
#' \dontrun{
#' radiaCalling(tumor_file = 'raw/sample_tumor.bam',
#' normal_file = 'raw/sample_normal.bam',
#' sample_name = 'sample'
#' ref = 'ref/hg38.fa',
#' out_path = 'rst',
#' radia_path = '/bin/radia/scripts',
#' python_radia = 'venv/radia/bin/python',
#' samtools = '/bin/samtools')
#'}
#'
#' @export
radiaCalling <- function(tumor_file,
normal_file,
sample_name,
ref,
out_path,
radia_path,
python_radia,
samtools,
threads,
bcftools){
# define output
out_path <- file.path(out_path, "radia")
dir.create(out_path, showWarnings = F)
out_radia <- file.path(out_path, paste0(sample_name, "_out_radia"))
out_radia_filtered <- file.path(out_path, 'filtered')
dir.create(out_radia_filtered, showWarnings = F)
radia <- file.path(radia_path, 'radia.py')
merge_chroms <- file.path(radia_path, 'mergeChroms.py')
filter_radia <- file.path(radia_path, 'filterRadia.py')
radia_data <- paste0(dirname(radia_path), '/data')
#TODO: advice same ref name and ref radia folder
ref_name <- gsub(".fa", "",basename(ref))
message(paste(Sys.time(),"\n",
'Starting RADIA calling using:\n',
'>Tumor file:', tumor_file, '\n',
'>Normal file:', normal_file, '\n',
'>Sample name:', sample_name, '\n',
'>Reference genome:', ref, '\n',
'>Output path:', out_radia, '\n',
'>Number of threads:', threads, '\n'))
# define chr in BAM file
chr_ti <- system(paste(samtools, 'idxstats', tumor_file, "| cut -f 1 | sed -e '$ d'"), intern = T)
chr_bl <- system(paste(samtools, 'idxstats', normal_file, "| cut -f 1 | sed -e '$ d'"), intern = T)
chr_list <- unique(c(chr_ti, chr_bl))
# set parallel parameters
cl <- parallel::makeCluster(threads)
doParallel::registerDoParallel(cl)
# radia calling in parallel
`%dopar%` <- foreach::`%dopar%`
foreach::foreach(chr=chr_list) %dopar% {
system(paste(python_radia, radia,
sample_name,
chr,
'-n', normal_file,
'-t', tumor_file,
'-f', ref,
'-o', paste0(out_radia, "_", chr, ".vcf")))
# system(paste(python_radia, filter_radia,
# sample_name,
# chr,
# paste0(out_radia, "_", chr, ".vcf"),
# radia_path,
# out_radia_filtered,
# '--dnaOnly',
# '--noSnpEff',
# '--noRadar',
# '--noDarned',
# '--ignoreScriptsDir',
# '-b', file.path(radia_data, ref_name, 'blacklists/1000Genomes/phase3'),
# '-d', file.path(radia_data, ref_name, 'snp151'),
# '-r', file.path(radia_data, ref_name, 'retroGenes'),
# '-t', file.path(radia_data, ref_name, 'gencode/basic'),
# '-p', file.path(radia_data, ref_name, 'pseudoGenes'),
# '-c', file.path(radia_data, ref_name, 'cosmic')))
}
# merge temporal vcfs
system(paste(python_radia, merge_chroms,
paste0(sample_name, "_out_radia"),
out_path,
out_path))
# remove temporal vcfs
tmp_vcf <- list.files(out_path,
full.names = T)
tmp_vcf <- c(tmp_vcf, file.path(out_path, paste0(sample_name, "_out_radia.vcf")))
tmp_vcf <- tmp_vcf[!(duplicated(tmp_vcf)|duplicated(tmp_vcf, fromLast=TRUE))]
lapply(tmp_vcf, unlink)
pass_vcf <- file.path(out_path, paste0(sample_name, '_radia_PASS.vcf'))
system(paste(bcftools, 'view',
'-f PASS', file.path(out_path, paste0(sample_name, "_out_radia.vcf")),
'>', pass_vcf))
# remove IUPAC ambiguity
tmp_vcf <- gsub('\\..*$', '_tmp.vcf', pass_vcf)
tmp_header <- gsub('\\..*$', '_tmp_h.vcf', pass_vcf)
system(paste(bcftools,
'view -H', pass_vcf,
'| awk \'($4=="A")||($4=="T")||($4=="G")||($4=="C") {print}\' >', tmp_vcf))
system(paste(bcftools,
'view -h', pass_vcf,
'>', tmp_header))
unlink(pass_vcf)
system(paste('cat', tmp_header, tmp_vcf, '>', pass_vcf))
unlink(tmp_vcf)
unlink(tmp_header)
message(paste(Sys.time(),"\n", 'Finished', sample_name, '\n'))
}
#' fpfilterTool
#'
#' @description
#' False postive filter parser for label low quality reads
#'
#' @inheritParams variantCalling
#' @examples
#' \dontrun{
#' fpfilterTool(vcf = 'vcf/sample.vcf',
#' tumor_file = 'raw/sample_tumor.bam',
#' sample_name = 'sample'
#' ref = 'ref/hg38.fa',
#' out_path = 'rst',
#' perl = 'perl',
#' fpfilter = 'fpfilter-tool/fpfilter.pl',
#' bam_readcount = 'bam_readcount')
#'}
#' @export
fpfilterTool <- function(vcf,
tumor_file,
sample_name,
ref,
fpfilter,
perl,
out_path,
bam_readcount,
tumor_vcf_id='TUMOR'){
# define output
out_path <- dirname(vcf)
out_fpfilter <- file.path(out_path, "fpfilter")
dir.create(out_fpfilter, showWarnings = F)
out_fpfilter_name <- file.path(out_fpfilter, sample_name)
snvs_var <- paste0(out_fpfilter_name, "_snvs.var")
snvs_readcount <- paste0(out_fpfilter_name, "_snvs.readcount")
vcf_filtered <- paste0(out_fpfilter_name, ".vcf")
message(paste(Sys.time(),"\n",
'Starting fpfilter filtering using:\n',
'>Tumor file:', tumor_file, '\n',
'>Vcf:', vcf, '\n',
'>Reference genome:', ref, '\n',
'>Output path:', out_fpfilter, '\n'))
# get vcf sample names
vcf_names <- system(paste(bcftools,
'query -l',
vcf), intern = T)
if(!tumor_vcf_id %in% vcf_names) {
message('Wrong tumor vcf sample name')
break()
}
# filter using fpfilter
system(paste(perl, fpfilter,
'--vcf-file', vcf,
'--bam-file', tumor_file,
'--reference', ref,
'--output', vcf_filtered,
'--sample', tumor_vcf_id))
message(paste(Sys.time(),"\n", 'Finished', sample_name, '\n'))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.