R/preprocessing.R

preprocess_data <- function(opt) {
  # Handling SRA files
  if (isTRUE(opt$sra)) {
    fc(
      "",
      "fastq-dump --split-files ", opt$input
    )
    opt$input <- paste0(opt$input, "_1.fastq")
    opt$fastq <- TRUE
  }

  #  Handling FastQ files
  if (isTRUE(opt$fastq)) {
    message("# Processing FASTQ sequences... \n")
    message("Total read in fastq files: ")
    fc(
      "grep",
      " -c \"^@\" ", opt$input
    )

    fc(
      "Running Quality Filter...\n",
      "fastq_quality_filter -Q 33 -q 15 -p 60 ",
      "-i ", opt$input,
      " -o ", opt$out_path, "step1/trimming.quality.fastq"
    )

    fc(
      "Trimming adaper sequences... \n",
      "fastx_clipper -Q 33 -l 17 -c ",
      "-i ", opt$out_path, "step1/trimming.quality.fastq ",
      "-a ", opt$adapter,
      " -o ", opt$out_path, "step2/trimmed.quality.gt15.fastq"
    )

    fc(
      "Converting fastq to fasta... \n",
      "fastq_to_fasta -Q 33 ",
      "-i ", opt$out_path, "step2/trimmed.quality.gt15.fastq ",
      "-o ", opt$out_path, "step2/trimmed_filtered_gt15.fasta"
    )

    fc(
      "Number of trimmed reads: ",
      "grep -c \">\" ", opt$out_path, "step2/trimmed_filtered_gt15.fasta"
    )
  }

  if (isTRUE(opt$fastq)) {
    opt$fasta <- TRUE
  }

  #  Handling FASTA sequences
  if (isTRUE(opt$fasta)) {
    message("# Loading FASTA file ... \n")
    if (isTRUE(opt$nofilter)) {
      fc(
        "\n",
        "cp ", opt$input, " ", opt$out_path, "step4/unmappedVectorBacters.fasta"
      )
    }
    else {
      fc(
        "\n",
        "cp ", opt$input, " ", opt$out_path, "step2/trimmed_filtered_gt15.fasta"
      )
    }
  }
}
luciorq/five documentation built on May 21, 2019, 2:30 a.m.