Nothing
#' run.varitas.pipeline.hybrid
#'
#' @description
#' Run VariTAS pipeline starting from both VCF files and BAM/ FASTQ files.
#' Useful for processing data from the Ion PGM or MiniSeq where variant calling has been done on the machine,
#' but you are interested in running more variant callers.
#'
#' @param vcf.specification
#' Data frame containing details of vcf files to be processed. Must contain columns sample.id, vcf, and caller
#' @param output.directory
#' Main directory where all files should be saved
#' @param run.name
#' Name of pipeline run. Will be added as a prefix to all LSF jobs.
#' @param fastq.specification
#' Data frame containing details of FASTQ files to be processed
#' @param bam.specification
#' Data frame containing details of BAM files to be processed
#' @param variant.callers
#' Vector specifying which variant callers should be run.
#' @param proton
#' Logical indicating if data was generated by proton sequencing. Used to set base quality
#' thresholds in variant calling steps.
#' @param quiet
#' Logical indicating whether to print commands to screen rather than submit jobs. Defaults to FALSE,
#' can be useful to set to TRUE for testing.
#' @param email
#' Email address that should be notified when pipeline finishes. If NULL or FALSE, no email is sent.
#' @param verify.options
#' Logical indicating whether to run verify.varitas.options
#' @param save.specification.files
#' Logical indicating if specification files should be saved to project directory
#'
#' @return None
#' @examples
#' run.varitas.pipeline.hybrid(
#' bam.specification = data.frame(sample.id = c('Z', 'Y'), tumour.bam = c('Z.bam', 'Y.bam')),
#' vcf.specification = data.frame(
#' sample.id = c('a', 'b'),
#' vcf = c('a.vcf', 'b.vcf'),
#' caller = c('pgm', 'pgm')
#' ),
#' output.directory = '.',
#' quiet = TRUE,
#' run.name = "Test",
#' variant.callers = c('mutect', 'varscan')
#' )
#'
#' @export
run.varitas.pipeline.hybrid <- function(
vcf.specification,
output.directory,
run.name = NULL,
fastq.specification = NULL,
bam.specification = NULL,
variant.callers = c('mutect', 'vardict', 'varscan', 'lofreq', 'muse'),
proton = FALSE,
quiet = FALSE,
email = NULL,
verify.options = !quiet,
save.specification.files = !quiet
) {
logo <- c("====================================================================",
"`7MMF' `7MF' db MMP\"\"MM\"\"YMM db .M\"\"\"bgd ",
" `MA ,V P' MM `7 ;MM: ,MI \"Y ",
" VM: ,V ,6\"Yb. `7Mb,od8 `7MM MM ,V^MM. `MMb. ",
" MM. M'8) MM MM' \"' MM MM ,M `MM `YMMNq. ",
" `MM A' ,pm9MM MM MM MM AbmmmqMA . `MM ",
" :MM; 8M MM MM MM MM A' VML Mb dM ",
" VF `Moo9^Yo..JMML. .JMML. .JMML..AMA. .AMMA.P\"Ybmmd\" ",
"===================================================================="
)
cat(logo, sep = '\n')
variant.callers <- match.arg(variant.callers, several.ok = TRUE);
old.vcf.specification <- vcf.specification;
### INPUT TESTS ###########################################################
if( is.null(fastq.specification) && is.null(bam.specification) ) {
stop('Must provide either fastq.specification or bam.specification');
}
if( !is.null(fastq.specification) && !is.null(bam.specification) ) {
stop('Can only handle one of fastq.specification and bam.specification');
}
if( !( 'caller' %in% names(vcf.specification)) ) {
stop("vcf.specification must contain a column 'caller'");
}
if( is.null(fastq.specification) ) {
stages.to.run <- c('qc', 'calling', 'annotation', 'merging');
} else {
stages.to.run <- c('alignment', 'calling', 'annotation', 'merging');
}
### MAIN ##################################################################
if( verify.options ) {
verify.varitas.options(
stages.to.run = stages.to.run,
variant.callers = variant.callers
);
}
# Determine if running on HPC
HPC <- TRUE
config <- read.yaml(save.config())
if (config[['cluster_scheduler']] == 'none') {
HPC <- FALSE
}
# STAGE 1: ALIGNMENT
if( !is.null(fastq.specification) ) {
paired.end <- FALSE;
if( 'mates' %in% names(fastq.specification) ) {
paired.end <- TRUE;
}
bam.specification <- run.alignment(
fastq.specification = fastq.specification,
output.directory = output.directory,
paired.end = paired.end,
job.group = 'alignment',
job.name.prefix = run.name,
quiet = quiet,
verify.options = FALSE
);
if( save.specification.files ) {
file.name <- date.stamp.file.name('bam_specification.txt');
utils::write.table(
bam.specification,
file.path(output.directory, file.name),
sep = '\t',
row.names = FALSE
);
}
}
# STAGE 2: QC
paired <- FALSE;
if( 'normal.bam' %in% names(bam.specification) ) {
paired <- TRUE;
}
if ('qc' %in% stages.to.run) {
bam.specification <- run.target.qc(
bam.specification,
project.directory = output.directory,
job.name.prefix = run.name,
paired = paired,
quiet = quiet,
verify.options = FALSE
);
}
# STAGE 3: VARIANT CALLING
# store VCF specification for new variant calls
new.vcf.specification <- run.variant.calling(
bam.specification = bam.specification,
output.directory = output.directory,
variant.callers = variant.callers,
job.name.prefix = run.name,
paired = paired,
proton = proton,
quiet = quiet,
verify.options = FALSE
);
# make sure VCF specifications match
missing.columns <- names(new.vcf.specification)[ !(names(new.vcf.specification) %in% names(old.vcf.specification)) ];
for(column in missing.columns) {
old.vcf.specification[, column] <- '';
}
# merge into a single vcf specification
vcf.specification <- rbind(new.vcf.specification, old.vcf.specification);
# Run scripts if not on HPC
if (!HPC) {
if (!is.null(fastq.specification)) {
stages.to.run = c('alignment', 'qc', 'calling')
} else {
stages.to.run = c('qc', 'calling')
}
run.all.scripts(
output.directory = output.directory,
stages.to.run = stages.to.run,
variant.callers = variant.callers,
quiet = quiet
)
}
# STAGE 4: REST OF PIPELINE
run.varitas.pipeline(
vcf.specification,
start.stage = 'annotation',
run.name = run.name,
output.directory = output.directory,
quiet = quiet,
verify.options = FALSE,
email = email
);
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.