Nothing
#' run.variant.calling
#'
#' @description
#' Run variant calling for all samples
#'
#' @details
#' Run VarDict on each sample, and annotate the results with ANNOVAR.
#' Files are output to a vardict/ subdirectory within each sample directory.
#'
#' @param bam.specification
#' Data frame containing details of BAM files to be processed, typically from \code{prepare.bam.specification}.
#' @param output.directory
#' Path to directory where output should be saved
#' @param variant.callers
#' Character vector of variant callers to be used
#' @param paired
#' Logical indicating whether to do variant calling with a matched normal
#' @param proton
#' Logical indicating whether data was generated by proton sequencing (ignored if running MuTect)
#' @param sample.directories
#' Logical indicating whether output for each sample should be put in its own directory (within output.directory)
#' @param job.name.prefix
#' Prefix for job names on the cluster
#' @param quiet
#' Logical indicating whether to print commands to screen rather than submit the job
#' @param verify.options
#' Logical indicating whether to run verify.varitas.options
#'
#' @return None
#'
#' @examples
#' run.variant.calling(
#' data.frame(sample.id = c('Z', 'Y'), tumour.bam = c('Z.bam', 'Y.bam')),
#' output.directory = '.',
#' variant.caller = c('lofreq', 'mutect'),
#' quiet = TRUE,
#' paired = FALSE
#' )
#'
#' @export
run.variant.calling <- function(
bam.specification,
output.directory,
variant.callers = c('vardict', 'mutect', 'varscan', 'lofreq', 'muse'),
paired = TRUE,
proton = FALSE,
sample.directories = TRUE,
job.name.prefix = NULL,
quiet = FALSE,
verify.options = !quiet
) {
### INPUT TESTS ###########################################################
# Check that output directory exists.
# In theory we could create the directory if it does not exist, but it would have to be created recursively.
# To be safe, I have opted to throw an error.
if( !quiet && !dir.exists(output.directory) ) {
error.message <- paste('Directory', output.directory, 'does not exist or is not a directory.');
stop(error.message);
}
variant.callers <- match.arg(variant.callers, several.ok = TRUE);
if( 'muse' %in% variant.callers && !paired ) {
error.message <- 'MuSE can only be run on paired data, provide matched normal samples or do not use MuSE';
stop(error.message);
}
### MAIN ##################################################################
# make sure all columns are characters, not factors!
bam.specification$sample.id <- as.character(bam.specification$sample.id);
bam.specification$tumour.bam <- as.character(bam.specification$tumour.bam);
if( 'normal.bam' %in% names(bam.specification) ) {
bam.specification$normal.bam <- as.character(bam.specification$normal.bam);
}
if( verify.options ) {
verify.varitas.options( stages.to.run = 'annotation' );
}
# create directories for log files and code
# - should this be parameterized?
if( !quiet ) {
create.directories(
directory.names = c('log', 'code'),
path = output.directory
);
}
# start assembling a data frame of
annovar.specification <- list();
# Loop over samples and run each one
for( caller in variant.callers ) {
# add a column to bam.specification data frame with path to
# output direcory for that specific sample
if( sample.directories ) {
bam.specification$output.path <- file.path(
output.directory,
bam.specification$sample.id,
caller
);
} else {
bam.specification$output.path <- file.path(
output.directory,
caller
);
}
for( i in seq_len( nrow(bam.specification) ) ) {
sample.id <- bam.specification$sample.id[i];
tumour.bam <- bam.specification$tumour.bam[i];
sample.output.directory <- bam.specification$output.path[i];
job.dependencies <- NULL;
if( 'job.dependency' %in% names(bam.specification) && '' != bam.specification$job.dependency[i] && !is.na(bam.specification$job.dependency[i]) ) {
job.dependencies <- stringr::str_split(
bam.specification$job.dependency[i],
pattern = '\\s+'
);
}
normal.bam <- NULL;
if(paired) {
normal.bam <- bam.specification$normal.bam[i];
}
# Sort out what will be the name of the job and the path to the final output file of interest
# this will be used as input to the next step in the pipeline – job dependency and file to run annotation on
job.name <- paste(caller, sample.id, sep = '_');
if( !is.null(job.name.prefix) && '' != job.name.prefix ) {
job.name <- paste(job.name.prefix, job.name, sep = '_');
}
output.filename <- paste0(sample.id, '.passed.ontarget.vcf');
output.vcf <- file.path(sample.output.directory, output.filename);
if( 'vardict' == caller ) {
run.vardict.sample(
sample.id = sample.id,
tumour.bam = tumour.bam,
normal.bam = normal.bam,
paired = paired,
proton = proton,
output.directory = sample.output.directory,
output.filename = output.filename,
code.directory = file.path(output.directory, 'code'),
log.directory = file.path(output.directory, 'log'),
job.dependencies = job.dependencies,
job.name = job.name,
job.group = caller,
verify.options = FALSE,
quiet = quiet
);
} else if( 'mutect' == caller ) {
run.mutect.sample(
sample.id = sample.id,
tumour.bam = tumour.bam,
normal.bam = normal.bam,
paired = paired,
output.directory = sample.output.directory,
output.filename = output.filename,
code.directory = file.path(output.directory, 'code'),
log.directory = file.path(output.directory, 'log'),
job.dependencies = job.dependencies,
job.group = caller,
job.name = job.name,
verify.options = FALSE,
quiet = quiet
);
} else if( 'varscan' == caller ) {
run.varscan.sample(
sample.id = sample.id,
tumour.bam = tumour.bam,
normal.bam = normal.bam,
paired = paired,
output.directory = sample.output.directory,
output.filename = output.filename,
code.directory = file.path(output.directory, 'code'),
log.directory = file.path(output.directory, 'log'),
job.dependencies = job.dependencies,
job.group = caller,
job.name = job.name,
verify.options = FALSE,
quiet = quiet
);
} else if( 'lofreq' == caller ) {
run.lofreq.sample(
sample.id = sample.id,
tumour.bam = tumour.bam,
normal.bam = normal.bam,
paired = paired,
output.directory = sample.output.directory,
output.filename = output.filename,
code.directory = file.path(output.directory, 'code'),
log.directory = file.path(output.directory, 'log'),
job.dependencies = job.dependencies,
job.group = caller,
job.name = job.name,
verify.options = FALSE,
quiet = quiet
);
} else if( 'muse' == caller ) {
run.muse.sample(
sample.id = sample.id,
tumour.bam = tumour.bam,
normal.bam = normal.bam,
paired = paired,
output.directory = sample.output.directory,
output.filename = output.filename,
code.directory = file.path(output.directory, 'code'),
log.directory = file.path(output.directory, 'log'),
job.dependencies = job.dependencies,
job.group = caller,
job.name = job.name,
verify.options = FALSE,
quiet = quiet
);
}
annovar.specification[[ paste(caller, sample.id, sep = '-') ]] <- data.frame(
'sample.id' = sample.id,
'vcf' = output.vcf,
'job.dependency' = job.name,
'caller' = caller,
stringsAsFactors = FALSE
);
}
}
annovar.specification <- do.call(rbind, annovar.specification);
# sanity check to make sure format is as expected
verify.vcf.specification(annovar.specification);
return(annovar.specification);
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.