R/header.R

#' Parse command line arguments
parse_arguments <- function() {
  option_list <- list(
    optparse::make_option(c("--input","-i"), type = "character", default = NULL,
                          help = "Input sequence file", metavar = "character"),
    optparse::make_option(c("--ref","-r"), type = "character", default = NULL,
                          help = "Path to reference genome for filtering", metavar = "path"),
    optparse::make_option(c("--nr"), type = "character", default = NULL,
                          help = "Path to NR database", metavar = "path"),
    optparse::make_option(c("--nt"), type = "character", default = NULL,
                          help = "Path to ncbi NT database", metavar = "path"),
    optparse::make_option(c("--bact"), type = "character", default = NULL,
                          help = "Path to bacterial genomes for filtering", metavar = "path"),
    optparse::make_option(c("--fasta"), action = "store_true", default = FALSE,
                          help = "Input reads are in fasta format"),
    optparse::make_option(c("--fastq"), action = "store_true", default = FALSE,
                          help = "Input reads are in fastq format"),
    optparse::make_option(c("--adapter"), type = "character", default = NULL,
                          help = "Adapter sequences to be trimmed", metavar = "sequence"),
    optparse::make_option(c("--threads", "-t"), type = "character", default = 1,
                          help = "Number of threads to be used [default = %default]", metavar = "number"),
    optparse::make_option(c("-o", "--output"), type = "character", default = "data",
                          help = "output directory [default = %default]", metavar = "path"),
    optparse::make_option(c("--hash"), type = "character", default = "15",
                          help = "Hash value to be used in velvet assembly [default = %default]", metavar = "number"),
    optparse::make_option(c("--si"), type = "character",
                          help = "Start range value to be used", metavar = "number"),
    optparse::make_option(c("--se"), type = "character",
                          help = "End range vale to be used", metavar = "number"),
    optparse::make_option(c("--nofilter"), action = "store_true", default = FALSE,
                          help = "Run directly the assembly only with size filter"),
    optparse::make_option(c("--deg"), action = "store_true", default = FALSE,
                          help = "Samples are degrated, use broader range of small RNA length"),
    optparse::make_option(c("--clean"), action = "store_true", default = FALSE,
                          help = "Clean large intermediate files"),
    optparse::make_option(c("--plot"), action = "store_true", default = FALSE,
                          help = "Create plots for contig pattern")
  )
  opt_parser <- optparse::OptionParser(option_list = option_list)
  opt_parser
}

#' CLI Parameter handling
handle_arguments <- function(opt) {
  if (is.null(opt$input)) {
    optparse::print_help(opt_parser)
    stop("Input file missing (-i, --input)", call. = FALSE)
  } else if (is.null(opt$nt)) {
    optparse::print_help(opt_parser)
    stop("NCBI nt db missing (--nt)", call. = FALSE)
  } else if (is.null(opt$nr)) {
    optparse::print_help(opt_parser)
    stop("NCBI nr db missing (--nr)", call. = FALSE)
  } else if ( (is.null(opt$si)) | (is.null(opt$se)) ) {
    optparse::print_help(opt_parser)
    stop("Give a range of reads length to be used e.g.: --si 21 --se 23 (--si AND --se)", call. = FALSE)
  }
  #
  if ( ((is.null(opt$ref) | is.null(opt$bact) ) ) & !isTRUE(opt$nofilter) ) {
    stop("if reference and bacterial genomes aren't available use parameter --nofilter", call. = FALSE)
  }
  # Reference genomes aren't mandatory
  #if ( is.null(opt$ref) ){
  #  print_help(opt_parser)
  #  stop("Reference genome file missing (-r, --ref)", call.=FALSE)
  #} else if (is.null(opt$bact)){
  #  print_help(opt_parser)
  #  stop("Bacterial genomes db missing (--bact)", call.=FALSE)
  #}
  # After downloading from SRA convert to fastq
  if (isTRUE(opt$sra)) {
    opt$fastq <- TRUE
  }
  # If fastq is selected adapter sequence is mandatory to be given
  if (isTRUE(opt$fastq)) {
    if (is.null(opt$adapter)) {
      optparse::print_help(opt_parser)
      stop("Invalid adapter sequence (--adapter)", call. = FALSE)
    }
  }

  opt
}

#' Create Analysis Infrastructure
#'
#' Creates directory infrastructure for analysis
#'
create_infrastructure <- function(opt) {
  # Path to result files
  opt$out_path <- here::here("results", opt$output)
  # Path with scripts
  opt$src_path <- here::here("inst")
  # fs::dir_ls(opt$src_path)
  # Create folder for results with folder for each analysis step
  dir_vector <- here::here(
    fs::path(
      "results", opt$output,
      paste0("step", stringr::str_pad(1:10, width = 2, pad = 0))
    )
  )
  fs::dir_create(dir_vector)

  # Setting log file
  log_file <- fs::path(opt$out_path, paste0(opt$output, "_log"))
  run_time <- paste0(Sys.time(), " ", Sys.timezone())
  run_time <- c("\n", "========================================", run_time)
  readr::write_lines(x = run_time, path = paste0(log_file, "_call.txt"), append = TRUE)
  readr::write_lines(x = run_time, path = paste0(log_file, "_error.txt"), append = TRUE)
  readr::write_lines(x = run_time, path = paste0(log_file, "_output.txt"), append = TRUE)

  opt
}


#' fc stands for Function Call
#'
#' Run command line programs and write log files,
#' Function to run system commands saving call,
#' output, and warns/error to files
#'
fc <- function(program , ..., log_file = NULL) {
  args_vector <- c(...)
  if (is.null(args_vector)) {
    args_vector <- character()
  }
  if (is.null(log_file)) {
    log_file <- here::here("results","log_file")
  }
  run_output <- processx::run(
    program, args = args_vector,
    error_on_status = FALSE, spinner = TRUE, echo_cmd = TRUE
  )
  std_output <- run_output$stdout
  error_output <- run_output$stderr
  call_output <- paste(program, args_vector, collapse = " ")
  # write call
  readr::write_lines(x = call_output,
                     path = paste0(log_file, "_call.txt"), append = TRUE)
  # write error
  readr::write_lines(x = paste0("Error for '",call_output,"':"),
                     path = paste0(log_file, "_error.txt"), append = TRUE)
  readr::write_lines(x = error_output,
                     path = paste0(log_file, "_error.txt"), append = TRUE)
  # write output
  readr::write_lines(x = paste0("Output for '",call_output,"':"),
                     path = paste0(log_file,"_output.txt"), append = TRUE)
  readr::write_lines(x = std_output,
                     path = paste0(log_file,"_output.txt"), append = TRUE)
  # Output
  if (isTRUE(run_output$status != "0")){
    stop(run_output$stderr, call. = FALSE)
  }
  run_output
}
luciorq/five documentation built on May 21, 2019, 2:30 a.m.