R/run.all.scripts.R

Defines functions run.all.scripts

Documented in run.all.scripts

globalVariables('i')
#' Run all the generated bash scripts without HPC commands
# 
#' @description 
#'  Run all the scripts generated by previous parts of the pipeline, without using HPC commands
#' @param output.directory
#'  Main directory where all files should be saved
#' @param stages.to.run
#'  A character vector of all stages that need running
#' @param variant.callers
#'  A character vector of variant callers to run
#' @param quiet
#'  Logical indicating whether to print commands to screen rather than submit jobs. Defaults to FALSE,
#'	can be useful to set to TRUE for testing.
#'	
#' @return None
#' 
#' @importFrom foreach '%dopar%'
#'	
#'
#'	
#'
run.all.scripts <- function(
  output.directory,
  stages.to.run = c('alignment', 'qc', 'calling', 'annotation', 'merging'),
  variant.callers = NULL,
  quiet = FALSE
) {

  
  ### INPUT TESTS #############################################################
  
  ### MAIN ####################################################################
  
  script.directory <- file.path(output.directory, 'code')
  log.directory <- file.path(output.directory, 'log/')
  
  ### SET UP DOPARALLEL
  
  config <- read.yaml(save.config())
  num.cores <- config[['num_cpu']]
  doParallel::registerDoParallel(cores = num.cores)
  
  ### RUN STAGES
  
  print('Note: due to the nature of using multiple cores, jobs may complete out of order')
  
  if ('alignment' %in% stages.to.run){
    print('Aligning...')
    script.files <- list.files(pattern = '.*align.*\\.sh$', path = script.directory, full.names = TRUE)
    script.names <- list.files(pattern = '.*align.*\\.sh$', path = script.directory, full.names = FALSE)
    if (length(script.files) > 0) {
      foreach::foreach(i=1:length(script.files)) %dopar% {
        if (quiet) {
          print(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
        } else {
          system(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
          print(paste0('Completed job ', i, ' of ', length(script.files), ' alignment jobs'))
        }
      }
    }
  }
  
  if ('qc' %in% stages.to.run){
    print('Running QC...')
    script.files <- list.files(pattern = '.*target_qc.*\\.sh$', path = script.directory, full.names = TRUE)
    script.names <- list.files(pattern = '.*target_qc.*\\.sh$', path = script.directory, full.names = FALSE)
    if (length(script.files) > 0) {
      foreach::foreach(i=1:length(script.files)) %dopar% {
        if (quiet) {
          print(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
        } else {
          system(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
          print(paste0('Completed job ', i, ' of ', length(script.files), ' QC jobs'))
        }
      }
    }
  }
  
  if ('calling' %in% stages.to.run){
    if ('mutect' %in% variant.callers){
      print('Running Mutect...')
      script.files <- list.files(pattern = '.*mutect.*\\.sh$', path = script.directory, full.names = TRUE)
      script.names <- list.files(pattern = '.*mutect.*\\.sh$', path = script.directory, full.names = FALSE)
      if (length(script.files) > 0) {
        foreach::foreach(i=1:length(script.files)) %dopar% {
          if (quiet) {
            print(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
          } else {
            system(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
            print(paste0('Completed job ', i, ' of ', length(script.files), ' MuTect jobs'))
          }
        }
      }
    }
    if ('vardict' %in% variant.callers) {
      print('Running VarDict...')
      script.files <- list.files(pattern = '.*vardict.*\\.sh$', path = script.directory, full.names = TRUE)
      script.names <- list.files(pattern = '.*vardict.*\\.sh$', path = script.directory, full.names = FALSE)
      if (length(script.files) > 0) {
        foreach::foreach(i=1:length(script.files)) %dopar% {
          if (quiet) {
            print(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
          } else {
            system(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
            print(paste0('Completed job ', i, ' of ', length(script.files), ' VarDict jobs'))
          }
        }
      }
    }
  }
  
  if ('annotation' %in% stages.to.run){
    print('Annotating...')
    script.files <- list.files(pattern = '.*annotate.*\\.sh$', path = script.directory, full.names = TRUE)
    script.names <- list.files(pattern = '.*annotate.*\\.sh$', path = script.directory, full.names = FALSE)
    if (length(script.files) > 0) {
      foreach::foreach(i=1:length(script.files)) %dopar% {
        if (quiet) {
          print(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
        } else {
          system(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
          print(paste0('Completed job ', i, ' of ', length(script.files), ' annotation jobs'))
        }
      }
    }
  }
  
  if ('merging' %in% stages.to.run){
    print('Merging...')
    script.files <- list.files(pattern = '.*post_processing.*\\.sh$', path = file.path(script.directory, '..'), full.names = TRUE)
    script.names <- list.files(pattern = '.*post_processing.*\\.sh$', path = script.directory, full.names = FALSE)
    foreach::foreach(i=1:length(script.files)) %dopar% {
      if (quiet) {
        print(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
      } else {
        system(paste0('bash ', script.files[i], ' &> ', log.directory, script.names[i], '.out'))
      }
    }
  }
  
  print('All jobs executed')
}

Try the varitas package in your browser

Any scripts or data that you put into this service are public.

varitas documentation built on Nov. 14, 2020, 1:07 a.m.