R/run.varitas.pipeline.hybrid.R
In varitas: Variant Calling in Targeted Analysis Sequencing Data

Documented in run.varitas.pipeline.hybrid

#' run.varitas.pipeline.hybrid
#'
#' @description
#' 	Run VariTAS pipeline starting from both VCF files and BAM/ FASTQ files. 
#'	Useful for processing data from the Ion PGM or MiniSeq where variant calling has been done on the machine, 
#'	but you are interested in running more variant callers.
#' 
#' @param vcf.specification 
#' 	Data frame containing details of vcf files to be processed. Must contain columns sample.id, vcf, and caller
#' @param output.directory 
#'	Main directory where all files should be saved
#' @param run.name
#'	Name of pipeline run. Will be added as a prefix to all LSF jobs.
#' @param fastq.specification
#' 	Data frame containing details of FASTQ files to be processed
#' @param bam.specification	
#'	Data frame containing details of BAM files to be processed
#' @param variant.callers
#' 	Vector specifying which variant callers should be run.
#' @param proton
#' 	Logical indicating if data was generated by proton sequencing. Used to set base quality 
#'	thresholds in variant calling steps.
#' @param quiet
#' 	Logical indicating whether to print commands to screen rather than submit jobs. Defaults to FALSE,
#'	can be useful to set to TRUE for testing.
#' @param email 
#' 	Email address that should be notified when pipeline finishes. If NULL or FALSE, no email is sent. 
#' @param verify.options
#'	Logical indicating whether to run verify.varitas.options
#' @param save.specification.files
#'	Logical indicating if specification files should be saved to project directory
#'
#' @return None
#' @examples
#' run.varitas.pipeline.hybrid(
#'        bam.specification = data.frame(sample.id = c('Z', 'Y'), tumour.bam = c('Z.bam', 'Y.bam')),
#'        vcf.specification = data.frame(
#'          sample.id = c('a', 'b'),
#'          vcf = c('a.vcf', 'b.vcf'),
#'          caller = c('pgm', 'pgm')
#'        ),
#'        output.directory = '.',
#'        quiet = TRUE,
#'        run.name = "Test", 
#'        variant.callers = c('mutect', 'varscan')
#'      )
#'
#' @export
run.varitas.pipeline.hybrid <- function(
	vcf.specification,
	output.directory,
	run.name = NULL,
	fastq.specification = NULL,
	bam.specification = NULL,
	variant.callers = c('mutect', 'vardict', 'varscan', 'lofreq', 'muse'), 
	proton = FALSE, 
	quiet = FALSE, 
	email = NULL,
	verify.options = !quiet,
	save.specification.files = !quiet
	) { 

  logo <- c("====================================================================",
            "`7MMF'   `7MF'                 db MMP\"\"MM\"\"YMM   db       .M\"\"\"bgd ", 
            "  `MA     ,V                      P'   MM   `7  ;MM:     ,MI    \"Y ", 
            "   VM:   ,V ,6\"Yb.  `7Mb,od8 `7MM      MM      ,V^MM.    `MMb.     ", 
            "    MM.  M'8)   MM    MM' \"'   MM      MM     ,M  `MM      `YMMNq. ", 
            "    `MM A'  ,pm9MM    MM       MM      MM     AbmmmqMA   .     `MM ", 
            "     :MM;  8M   MM    MM       MM      MM    A'     VML  Mb     dM ", 
            "      VF   `Moo9^Yo..JMML.   .JMML.  .JMML..AMA.   .AMMA.P\"Ybmmd\"  ",
            "===================================================================="
  )
  
  cat(logo, sep = '\n')
  
	variant.callers <- match.arg(variant.callers, several.ok = TRUE);
	old.vcf.specification <- vcf.specification;

	### INPUT TESTS ###########################################################

	if( is.null(fastq.specification) && is.null(bam.specification) ) {
		stop('Must provide either fastq.specification or bam.specification');
	}

	if( !is.null(fastq.specification) && !is.null(bam.specification) ) {
		stop('Can only handle one of fastq.specification and bam.specification');
	}

	if( !( 'caller' %in% names(vcf.specification)) ) {
		stop("vcf.specification must contain a column 'caller'");
	}

	if( is.null(fastq.specification) ) {
		stages.to.run <- c('qc', 'calling', 'annotation', 'merging');
	} else {
		stages.to.run <- c('alignment', 'calling', 'annotation', 'merging');
	}

	### MAIN ##################################################################

	if( verify.options ) {
		verify.varitas.options(
			stages.to.run = stages.to.run, 
			variant.callers = variant.callers
			);
	}
	
	# Determine if running on HPC
	HPC <- TRUE
	config <- read.yaml(save.config())
	if (config[['cluster_scheduler']] == 'none') {
	  HPC <- FALSE
	}

	# STAGE 1: ALIGNMENT
	if( !is.null(fastq.specification) ) {

		paired.end <- FALSE;
		if( 'mates' %in% names(fastq.specification) ) {
			paired.end <- TRUE;
		}	

		bam.specification <- run.alignment(
			fastq.specification = fastq.specification, 
			output.directory = output.directory,
			paired.end = paired.end,
			job.group = 'alignment', 
			job.name.prefix = run.name,
			quiet = quiet, 
			verify.options = FALSE
			);

		if( save.specification.files ) {

			file.name <- date.stamp.file.name('bam_specification.txt');
			utils::write.table(
				bam.specification, 
				file.path(output.directory, file.name), 
				sep = '\t', 
				row.names = FALSE
				);
			}
	}


	# STAGE 2: QC
	paired <- FALSE;
	if( 'normal.bam' %in% names(bam.specification) ) {
	  paired <- TRUE;
	}
	if ('qc' %in% stages.to.run) {
	  bam.specification <- run.target.qc(
  		bam.specification,
  		project.directory = output.directory,
  		job.name.prefix = run.name,
  		paired = paired, 
  		quiet = quiet,
  		verify.options = FALSE
  		);
	}

	# STAGE 3: VARIANT CALLING

	# store VCF specification for new variant calls
	new.vcf.specification <- run.variant.calling(
		bam.specification = bam.specification, 
		output.directory = output.directory, 
		variant.callers = variant.callers, 
		job.name.prefix = run.name,
		paired = paired, 
		proton = proton, 
		quiet = quiet, 
		verify.options = FALSE
		);

	# make sure VCF specifications match
	missing.columns <- names(new.vcf.specification)[ !(names(new.vcf.specification) %in% names(old.vcf.specification)) ];
	for(column in missing.columns) {
	  old.vcf.specification[, column] <- '';
	}

	# merge into a single vcf specification
	vcf.specification <- rbind(new.vcf.specification, old.vcf.specification);

	# Run scripts if not on HPC
	
	if (!HPC) {
	  if (!is.null(fastq.specification)) {
	    stages.to.run = c('alignment', 'qc', 'calling')
	  } else {
	    stages.to.run = c('qc', 'calling')
	  }
	  run.all.scripts(
	    output.directory = output.directory,
	    stages.to.run = stages.to.run,
	    variant.callers = variant.callers,
	    quiet = quiet
	  )
	}
	
	# STAGE 4: REST OF PIPELINE
	run.varitas.pipeline(
		vcf.specification, 
		start.stage = 'annotation', 
		run.name = run.name,
		output.directory = output.directory,
		quiet = quiet,
		verify.options = FALSE,
		email = email
		);

}