pipelineTools: Pipeline Tools for NGS Sequence Analysis Pipelines

Documented in run_bwa

#' Run the BWA aligner
#'
#' @description Runs the BWA aligner
#'
#' @import parallel
#'
#' @param command BWA command to run, at present can choose 'mem',
#'                which is most useful fo Illumina, PacBio and Nanopore reads, required
#' @param input1 List of the paths to files containing to the forward reads, required
#' @param input2 List of the paths to files containing to the reverse reads, required for paired end sequence data
#' @param index Path to the indexed reference genome, required
#' @param sample.name List of sample names, required
#' @param out.dir Name of the directory from the BWA output
#' @param threads Number of threads for BWA to use, default set to 10
#' @param seed.length Minimum seed length
#' @param parallel Run in parallel, default set to FALSE
#' @param cores Number of cores/threads to use for parallel processing, default set to 4
#' @param execute Whether to execute the commands or not, default set to TRUE
#' @param bwa Path to the bwa program, required
#' @param version Returns the version number
#'
#' @return A list with the BWA commands
#'
#' @examples
#'  \dontrun{
#'  path <- "/software/bwa/bwa"
#'
#' # Get version number
#' bwa.version <- run_bwa(bwa = path,
#'                        version = TRUE)
#' bwa.version[3] # print the line with version number
#'
#' reads.path <- "/path/to/fastqs"
#' reads.patt.1 <- "_S\\d{1,2}\\_L001_R1_001.fastq.gz"
#' reads.patt.2 <- "_S\\d{1,2}\\_L001_R2_001.fastq.gz"
#'
#' # Organise the samples
#' sample.dataframe <- prepare_samples(reads.path, c(reads.patt.1,reads.patt.2),trimmed.reads.dir)
#'
#' input1 <- as.character(sample.dataframe$reads.path.1)
#' input1.trim <- as.character(sample.dataframe$trimmed.reads.path.1)
#' # Paired end only
#'  input2 <- as.character(sample.dataframe$reads.path.2)
#'  input2.trim <- as.character(sample.dataframe$trimmed.reads.path.2)
#'
#' sample.names <- as.character(sample.dataframe$sample.names)
#'
#' genome <- "/path/to/reference/genome"
#'
#' command <- "mem"
#' out.dir <- "bwa_alignments"
#'
#' # Paired end
#' pe <- run_bwa(command = command,
#'               input1 = mate1.trim,
#'               input2 = mate2.trim,
#'               index = genome,
#'               threads = 20,
#'               out.dir = out.dir,
#'               sample.name = sample.names,
#'               seed.length = 21,
#'               bwa = path)
#'
#' # Single end
#' se <- run_bwa(command = command,
#'               input1 = mate1.trim,
#'               index = genome,
#'               threads = 20,
#'               out.dir = out.dir,
#'               sample.name = sample.names,
#'               seed.length = 21,
#'               bwa = path)
#' }
#'
#' @export
#'
run_bwa <- function(command = NULL,
                    input1 = NULL,
                    input2 = NULL,
                    index = NULL,
                    sample.name = NULL,
                    out.dir = NULL,
                    threads = 10,
                    seed.length = NULL,
                    parallel = FALSE,
                    cores = 4,
                    execute = TRUE,
                    bwa = NULL,
                    version = FALSE){
  # Check bwa program can be found
  sprintf("type -P %s &>//dev//null && echo 'Found' || echo 'Not Found'", bwa)

  # Version
  if (isTRUE(version)){
    bwa.run <- sprintf('%s  2>&1',
                      bwa)
    result <- try(system(bwa.run, intern = TRUE))
    return(result)
  }

  # Create the sample directories for the per sample bwa results
  lapply(paste(out.dir,sample.name, sep = "/"), function(cmd) dir.create(cmd, showWarnings = FALSE, recursive = TRUE))

  # Set the additional arguments
  args <- ""
  # Threads
  if (!is.null(threads)){
    args <- paste(args,paste("-t",threads,sep = " "),sep = " ")
  }
  if (!is.null(seed.length)){
    args <- paste(args,paste("-k",seed.length,sep = " "),sep = " ")
  }

  # bwa mem -M -R $read_group_info -t $threads $genome $trimmed_reads_dir/$cut1 $trimmed_reads_dir/$cut2
  # Set the names for the alignment
  sam.files <- paste(out.dir,sample.name,paste(sample.name,"sam",sep = "."),sep = "/")
  # Paired end
  if(!is.null(input2)){
    bwa.run <- sprintf('%s %s %s %s %s %s > %s',
                       bwa,command,args,index,input1,input2,sam.files)
  }else{
    bwa.run <- sprintf('%s %s %s %s %s > %s ',
                       bwa,command,args,index,input1,sam.files)
  }

  if (isTRUE(execute)){
    if (isTRUE(parallel)){
      cluster <- makeCluster(cores)
      parLapply(cluster, bwa.run, function (cmd)  system(cmd))
      stopCluster(cluster)
    }else{
      lapply(bwa.run, function (cmd)  system(cmd))
    }
  }

  return(bwa.run)
}