inst/master_script.R

# =============================================================================
# Header

# Loading libraries
devtools::load_all(here::here())
# library(five)

# CLI arguments
opt_parser <- five::parse_arguments()

# parse parameters
opt <- optparse::parse_args(opt_parser)

# Handle parameters
opt <- five::handle_arguments(opt)

# Check if docker is available
opt <- five::is_docker_available(opt)

# defining and cleaning variables
# for development
# =========DEVELOPMENT_start=================
if (isTRUE(Sys.getenv("RSTUDIO") == "1")) {
  # Interactive Development parameters
  opt <- five::assign_test(opt)
  # Sys.getenv("PATH")
}

# Print option values
five::print_opt(opt)
# =========DEVELOPMENT_end===================

# =============================================================================
# Start pipeline

message("Running FIVE\n")

# Create directory infrasctructure

opt <- five::create_infrastructure(opt)

# Maybe use step counter as opt$step_counter ??
step_count <- 0

# Initial parameters description
message(
  c(
    "#####################", "\n",
    "# Input file: ", opt$input, "\n",
    "# Reference: ", opt$ref, "\n",
    "# Hash: ", opt$hash, "\n",
    "# Start: ", opt$si, "\n",
    "# End: ", opt$se, "\n",
    "#####################", "\n"
  )
)

# Prepare bowtie indexes
# Checking if all bowtie indexes exist and blast dbs are formatted
#
# ================================ EXPERIMENTAL ================================
message("\nPreparation step - build indexes \n")
five::build_bowtie_indexes(opt)

build_blast_database <- function(opt) {
  fc("perl", "-c")
}
build_blast_database(opt)



# Preprocessing sequencing libraries
# If necessary download library from SRA
five::preprocess_data(opt)

# Mapping steps

# Map agains reference
five::map_against_reference_genome(opt)

five::map_against_bacterial_genomes(opt)


# selecting filtered sequences by size

# Filter reads by size range from FASTA file
filter_reads_by_size <- function(opt, min, max) {
  fc(
    "",
    "python ", fs::path(opt$src_path, "filter_fasta_by_size.py"),
      opt$out_path, "step4/unmappedVectorBacters.fasta ", min, " ", max, " ",
      opt$out_path, paste0("step4/unmapped_trimmed_filtered_", min, "_", max, ".fasta")
  )
}

 message("[FILTER UNMAPPED SEQUENCES BY SIZE (21NT)] \n")
 filter_reads_by_size(opt, opt$si, opt$se)

fc(
  "[FILTER UNMAPPED SEQUENCES BY SIZE (21NT)] \n",
  "python ", opt$src_path, "filter_fasta_by_size.py ",
  opt$out_path, "step4/unmappedVectorBacters.fasta ", opt$si, " ", opt$se, " ",
  opt$out_path, "step4/unmapped_trimmed_filtered.fasta"
)
message(paste0(
  "Reads ranging from ", opt$si, "nt to ", opt$se, "nt: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step4/unmapped_trimmed_filtered.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

fc(
  "[FILTER UNMAPPED SEQUENCES BY SIZE (20-23NT)]\n",
  "python ", opt$src_path, "filter_fasta_by_size.py ",
  opt$out_path, "step4/unmappedVectorBacters.fasta 20 23 ",
  opt$out_path, "step4/unmapped_trimmed_filtered.20to23.fasta"
)
message(paste0(
  "Reads ranging from 20nt to 23nt: ",
  system(paste0(
    "grep \"^>\" ", opt$out_path,
    "step4/unmapped_trimmed_filtered.20to23.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

fc(
  "[FILTER UNMAPPED SEQUENCES BY SIZE (21-29NT)]\n",
  "python ", opt$src_path, "filter_fasta_by_size.py ",
  opt$out_path, "step4/unmappedVectorBacters.fasta 24 30 ",
  opt$out_path, "step4/unmapped_trimmed_filtered.21to29.fasta"
)
message(paste0(
  "Reads ranging from 24nt to 30nt: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step4/unmapped_trimmed_filtered.21to29.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

if (isTRUE(opt$deg)) {
  fc(
    "[FILTER UNMAPPED SEQUENCES BY SIZE (16-30NT)]\n",
    "python ", opt$src_path, "filter_fasta_by_size.py ",
    opt$out_path, "step4/unmappedVectorBacters.fasta 15 30 ",
    opt$out_path, "step4/unmapped_trimmed_filtered.16to30.fasta"
  )
  message(paste0(
    "Reads ranging from 15nt to 30nt: ",
    system(paste0(
      "grep \"^>\" ",
      opt$out_path, "step4/unmapped_trimmed_filtered.16to30.fasta | wc -l"
    ),
    intern = TRUE
    )
  ), "\n")
}

######################################################
# Running velvet optmiser (automatically defined hash)#
######################################################

message("\n#[RUNNING VELVET OPTIMIZER]\n")

fc(
  "#Running step 6 [ Assemble unmapped 21 nt - velvetOptimser.pl ]\n",
  "VelvetOptimiser.pl ",
  "--d step5_opt_run1 ",
  "--t ", opt$threads,
  " --s 13 --e 19 ",
  "--f '-short -fasta ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta' ",
  "--a "
)

fc(
  "\t#Running step 6_1 [ merging unmmaped 21nt reads and assembled contigs ]\n",
  "cat ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta ",
  "step5_opt_run1/contigs.fa > ",
  "step5_opt_run1/unmapped21Andcontigs.fasta"
)

fc(
  "\t#Running step 6_4 [ Assemble run 1 + reads - VelvetOptmiser.pl ] \n",
  "VelvetOptimiser.pl ",
  "--d step5_opt_run2 ",
  "--t ", opt$threads,
  " --s 13 --e 19 ",
  "--f '-short -fasta step5_opt_run1/unmapped21Andcontigs.fasta' ",
  "--a "
)

fc(
  "\t#Running step 6_5 [ merging assembled contigs - mergeContigs.pl ] \n",
  "perl ", opt$src_path, "mergeContigsNew.pl ",
  "-contig1 step5_opt_run1/contigs.fa ",
  "-contig2 step5_opt_run2/contigs.fa ",
  "-output ", opt$out_path, "step5/contigs.opt.fasta "
)
message(paste0(
  "Number of assembled contigs: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step5/contigs.opt.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

if (isTRUE(opt$deg)) {
  fc(
    "\t#Running step 6_6.1 [ Assemble run 1 + reads - velvetOptmiser.pl  - 16-30 ] \n",
    "VelvetOptimiser.pl ",
    "--d step5_opt_run3 ",
    "--t ", opt$threads,
    " --s 15 --e 25 ",
    "--f '-short -fasta ", opt$out_path, "step4/unmapped_trimmed_filtered.16to30.fasta' ",
    "--a "
  )
}

###############################################
# Running velvet (fixed hash)                  #
###############################################
message("\n[RUNNING DEFAULT VELVET]\n")
fc(
  "\t#Running step 6 [ Assemble unmapped 21 nt - velvet hash ]\n",
  "velveth ",
  opt$out_path, "step5_fix_run1 ", opt$hash,
  " -fasta -short ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta"
)

fc(
  "\n",
  "velvetg ",
  opt$out_path, "step5_fix_run1 -exp_cov auto -cov_cutoff auto"
)

fc(
  "\n\nRunning step 6_1 [ concatenando reads and contigs - cat ]\n",
  "cat ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta ",
  opt$out_path, "step5_fix_run1/contigs.fa > ",
  opt$out_path, "step5_fix_run1/unmapped21Andcontigs.fasta"
)

fc(
  "\n",
  "velveth ",
  opt$out_path, "step5_fix_run2 ", opt$hash,
  " -fasta -short ", opt$out_path, "step5_fix_run1/unmapped21Andcontigs.fasta"
)

fc(
  "\n",
  "velvetg ",
  opt$out_path, "step5_fix_run2 -exp_cov 20 -cov_cutoff auto"
)

fc(
  "\t#Running step 6_5 [ merge assemblies - mergeContigs.pl ] \n",
  "perl ", opt$src_path, "mergeContigsNew.pl ",
  "-contig1 ", opt$out_path, "step5_fix_run1/contigs.fa ",
  "-contig2 ", opt$out_path, "step5_fix_run2/contigs.fa ",
  "-output ", opt$out_path, "step5/contigs.fix.fasta "
)
message(paste0(
  "Number of assembled contigs: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step5/contigs.fix.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

###############################################
# Running velvet optmiser (FIXED hash)         #
###############################################

message("\n[VELVET OPTIMISER HASH ONLY 15]\n")
fc(
  "\t#Running step 6_6 [ Assemble unmapped 21 nt - velvetOptimser.pl ]\n",
  "VelvetOptimiser.pl ",
  "--d step5_opt_fix_run1 ",
  "--t ", opt$threads, " --s ", opt$hash, " --e ", opt$hash, " ",
  "--f '-short -fasta ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta' --a"
)

fc(
  "\t#Running step 6_7 [ concatenando reads and contigs - cat ]\n",
  "cat ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta ",
  "step5_opt_fix_run1/contigs.fa > ",
  "step5_opt_fix_run1/unmapped21Andcontigs.fasta"
)

fc(
  "\t#Running step 6_8 [ Assemble run 1 + reads - velvetOptmiser.pl ] \n",
  "VelvetOptimiser.pl ",
  "--d step5_opt_fix_run2 ",
  "--t ", opt$threads, " --s ", opt$hash, " --e ", opt$hash, " ",
  " --f '-short -fasta step5_opt_fix_run1/unmapped21Andcontigs.fasta' --a"
)

fc(
  "\t#Running step 6_9 [ merge assemblies - mergeContigs.pl ] \n",
  "perl ", opt$src_path, "mergeContigsNew.pl ",
  "-contig1 step5_opt_fix_run1/contigs.fa ",
  "-contig2 step5_opt_fix_run2/contigs.fa ",
  "-output ", opt$out_path, "step5/contigs.fix_opt.fasta "
)
message(paste0(
  "Number of assembled contigs: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step5/contigs.fix_opt.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

##############################################
# Running velvet optmiser (FIXED hash) 20-23  #
##############################################

message("\n[VELVET OPTIMISER HASH ONLY 15 - 20-23nt]\n")
fc(
  "\t#Running step 6_10 [ Assemble unmapped 20-23nt nt - velvetOptimser.pl ]\n",
  "VelvetOptimiser.pl ",
  "--d step5_opt_20to23_run1 ",
  "--t ", opt$threads, " --s ", opt$hash, " --e ", opt$hash, " ",
  "--f '-short -fasta ", opt$out_path, "step4/unmapped_trimmed_filtered.20to23.fasta' --a "
)

fc(
  "\t#Running step 6_11 [ concatenando reads and contigs - cat ]\n",
  "cat ", opt$out_path, "step4/unmapped_trimmed_filtered.20to23.fasta ",
  "step5_opt_20to23_run1/contigs.fa > ",
  "step5_opt_20to23_run1/unmapped21Andcontigs.fasta"
)

fc(
  "\t#Running step 6_12 [ Assemble run 1 + reads - velvetOptmiser.pl ] \n",
  "VelvetOptimiser.pl ",
  "--d step5_opt_20to23_run2 ",
  "--t ", opt$threads, " --s ", opt$hash, " --e ", opt$hash, " ",
  "--f '-short -fasta step5_opt_20to23_run1/unmapped21Andcontigs.fasta' --a "
)

fc(
  "\t#Running step 6_13 [ merge assemblies - mergeContigs.pl ] \n",
  "perl ", opt$src_path, "mergeContigsNew.pl ",
  "-contig1 step5_opt_20to23_run1/contigs.fa ",
  "-contig2 step5_opt_20to23_run2/contigs.fa ",
  "-output ", opt$out_path, "step5/contigs.20to23.fasta "
)
fc(
  "\t#Running step 6_9 [ merge assemblies - mergeContigs.pl ] \n",
  "perl ", opt$src_path, "mergeContigsNew.pl ",
  "-contig1 step5_opt_fix_run1/contigs.fa ",
  "-contig2 step5_opt_fix_run2/contigs.fa ",
  "-output ", opt$out_path, "step5/contigs.fix_hash.fasta "
)
message(paste0(
  "Number of assembled contigs: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step5/contigs.fix_hash.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

if (isTRUE(opt$deg)) {
  ##############################################
  # Running velvet optmiser (hash 17) 21-29     #
  ##############################################
  message("\n[VELVET OPTIMISER - 21-29nt]\n")
  fc(
    "\t#Running step 6_10 [ Assemble unmapped 21-29nt nt - velvetOptimser.pl ]\n",
    "VelvetOptimiser.pl ",
    "--d step5_opt_21to29_run1 ",
    "--t ", opt$threads, " --s 17 --e 17 ",
    "--f '-short -fasta ", opt$out_path, "step4/unmapped_trimmed_filtered.21to29.fasta' --a "
  )

  fc(
    "\t#Running step 6_11 [ concatenando reads and contigs - cat ]\n",
    "cat ", opt$out_path, "step4/unmapped_trimmed_filtered.21to29.fasta ",
    "step5_opt_21to29_run1/contigs.fa > ",
    "step5_opt_21to29_run1/unmapped21Andcontigs.fasta"
  )

  fc(
    "\t#Running step 6_12 [ Assemble run 1 + reads - velvetOptmiser.pl ] \n",
    "VelvetOptimiser.pl ",
    "--d step5_opt_21to29_run2 ",
    "--t ", opt$threads, " --s 17 --e 17 ",
    "--f '-short -fasta step5_opt_21to29_run1/unmapped21Andcontigs.fasta' --a "
  )

  fc(
    "\t#Running step 6_13 [ merge assemblies - mergeContigs.pl ] \n",
    "perl ", opt$src_path, "mergeContigsNew.pl ",
    "-contig1 step5_opt_21to29_run1/contigs.fa ",
    "-contig2 step5_opt_21to29_run2/contigs.fa ",
    "-output ", opt$out_path, "step5/contigs.21to29.fasta "
  )
  message(paste0(
    "Number of assembled contigs: ",
    system(paste0(
      "grep \"^>\" ",
      opt$out_path, "step5/contigs.21to29.fasta | wc -l"
    ),
    intern = TRUE
    )
  ), "\n")
}



###############################
# Merging assemblies           #
###############################
if (!isTRUE(opt$deg)) {
  ## URGENT
  ## there still some other files missing, should check '$step5_opt/run3/contigs.fa'
  fc(
    "\n",
    "touch ", opt$out_path, "step5/contigs.21to29.fasta"
  )
  if (!dir.exists("step5_opt_run3")) {
    dir.create("step5_opt_run3")
  }
  fc(
    "\n",
    "touch step5_opt_run3/contigs.fa"
  )
}
if (!dir.exists("step5_opt_run3")) {
  dir.create("step5_opt_run3")
}
if (!file.exists("step5_opt_run3/contigs.fa")) {
  fc(
    "\n",
    "touch step5_opt_run3/contigs.fa"
  )
}


message("\n[MERGING CONTIGS AND RUNNING CAP3]\n")
fc(
  "\t#Running STEP [CAT] Concatenating contigs...\n",
  "cat ",
  "step5_opt_run3/contigs.fa ",
  opt$out_path, "step5/contigs.20to23.fasta ",
  opt$out_path, "step5/contigs.fix_hash.fasta ",
  opt$out_path, "step5/contigs.fix.fasta ",
  opt$out_path, "step5/contigs.opt.fasta ",
  opt$out_path, "step5/contigs.fix_opt.fasta ",
  opt$out_path, "step5/contigs.21to29.fasta > ",
  opt$out_path, "step5/all_contigs.fasta"
)
message(paste0(
  "Number of merged contigs: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step5/all_contigs.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

fc(
  "\t#Running step CAP3 [ assembly contigs from different assemblies ] \n",
  "cap3 ",
  opt$out_path, "step5/all_contigs.fasta  > ",
  opt$out_path, "step5/log.cap3"
)

fc(
  "\t#Running [ Merging contigs and singlets from CAP3]\n",
  "cat ",
  opt$out_path, "step5/all_contigs.fasta.cap.contigs ",
  opt$out_path, "step5/all_contigs.fasta.cap.singlets > ",
  opt$out_path, "step5/contigs_merged.final.fasta"
)
message(paste0(
  "Number of assembled contigs: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step5/contigs_merged.final.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

fc(
  "\t#Running step 6_7 [ selecting contigs larger than n50 - calcN50.pl ] \n",
  "perl ", opt$src_path, "fixIDcap3Contigs.pl ",
  "-i ", opt$out_path, "step5/contigs_merged.final.fasta ",
  "-s 50  ",
  "-p ", opt$out_path, "step5/contigs_merged.final"
)
message(paste0(
  "Number of assembled contigs larger than n50: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step5/contigs_merged.final.gt50.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

################################
# Blastn contigs               #
################################
message("\n[BlastN contigs gt N50]\n")
fc(
  "\t#Running step 10_111 [ Blast against NT - blastall -p blastn ]\n",
  "blastall -p blastn ",
  "-i ", opt$out_path, "step5/contigs_merged.final.gt50.fasta ",
  "-d ", opt$nt, " -v 5 -b 5 -e 1e-5 ",
  "-o ", opt$out_path, "step6/contigs_merged.final.gt50.1e5.blastn  ",
  "-a ", opt$threads, ""
)

fc(
  "\t#Running filterblast...\n",
  "perl ", opt$src_path, "filterblast.pl -b ",
  opt$out_path, "step6/contigs_merged.final.gt50.1e5.blastn ",
  "-evalue 1e-5 --best --desc > ",
  opt$out_path, "step7/contigs.blastn.1e5.report"
)
# system(paste0("cat ",opt$out_path,"step7/contigs.blastn.1e5.report | wc -l"))

fc(
  "\n",
  "perl ", opt$src_path, "analyzeContigsFilterBlastn.pl ",
  "-i ", opt$out_path, "step7/contigs.blastn.1e5.report  ",
  "-f ", opt$out_path, "step5/contigs_merged.final.gt50.fasta -q '' ",
  "-p ", opt$out_path, "step7/contigs.n.analyze --fasta > ",
  opt$out_path, "step7/contigs.blastn.analyze"
)

fc(
  "\n",
  "perl ", opt$src_path, "analyzeContigsFilterBlastn.pl ",
  "-i ", opt$out_path, "step7/contigs.blastn.1e5.report  ",
  "-f ", opt$out_path, "step5/contigs_merged.final.gt50.fasta -q 'virus' ",
  "-p ", opt$out_path, "step7/contigs.analyze --fasta > ",
  opt$out_path, "step7/contigs.blastn.virus.analyze"
)
message(paste0(
  "Number of contigs with hit to viral nucleotide sequences: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step7/contigs.analyze.virus.contigs.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

fc(
  "\n[Extracting contigs no hit blastn 1e-5]\n",
  "perl ", opt$src_path, "extractSequencesNoHitBlast.pl ",
  "-seq ", opt$out_path, "step5/contigs_merged.final.gt50.fasta ",
  "-blast ", opt$out_path, "step7/contigs.blastn.1e5.report ",
  "-out ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta"
)
message(paste0(
  "Number of contigs without hit to known nucleotide sequences: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step6/seqNoHit.blastn.1e5.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

################################
####  BLASTX                   #
################################
message("\n[BlastX contigs gt N50]\n")
fc(
  "\t#Running step 9 [ Blast against NR - blastall -p blastx ]\n",
  "blastall -p blastx ",
  "-i ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta ",
  "-d ", opt$nr, " -v 5 -b 5 -e 1 ",
  "-o ", opt$out_path, "step6/seqNoHit.blastn.blastx ",
  "-a ", opt$threads, ""
)

fc(
  "\t#Running filterblast...\n",
  "perl ", opt$src_path, "filterblast.pl ",
  "-b ", opt$out_path, "step6/seqNoHit.blastn.blastx -evalue 1e-2 --best --desc > ",
  opt$out_path, "step7/contigs.blastx.1e2.report"
)

fc(
  "\t#Running stats from blastn...\n",
  "perl ", opt$src_path, "analyzeReportFilterblast.pl ",
  "-i ", opt$out_path, "step7/contigs.blastx.1e2.report > ",
  opt$out_path, "step7/filterblastx.1e2.allHits.analyze"
)

fc(
  "\n",
  "perl ", opt$src_path, "filterBlastStats.pl ",
  "-i ", opt$out_path, "step7/contigs.blastx.1e2.report -q virus ",
  "-f ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta > ",
  opt$out_path, "step7/filterblast.1e2.allHits.stats"
)

fc(
  "\n",
  "perl ", opt$src_path, "analyzeContigsFilterBlastx.pl ",
  "-i ", opt$out_path, "step7/contigs.blastx.1e2.report  ",
  "-f ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta -q virus ",
  "-p ", opt$out_path, "step7/filterblast.virus --fasta > ",
  opt$out_path, "step7/contigs.blastx.virus.analyze"
)

fc(
  "\n",
  "perl ", opt$src_path, "analyzeContigsFilterBlastx.pl ",
  "-i ", opt$out_path, "step7/contigs.blastx.1e2.report  ",
  "-f ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta -q \"\" ",
  "-p  ", opt$out_path, "step7/contigs.x.analyze --fasta > ",
  opt$out_path, "step7/contigs.blastx.analyze"
)


fc(
  "\n[Extracting contigs no hit blastx 1e-2]\n",
  "perl ", opt$src_path, "extractSequencesNoHitBlast.pl ",
  "-seq ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta ",
  "-blast ", opt$out_path, "step7/contigs.blastx.1e2.report ",
  "-out ", opt$out_path, "step9/seqNoHit.blast.fasta"
)
message(paste0(
  "Number of contigs without homology to known protein sequences: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step9/seqNoHit.blast.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

####### FINAL OVERVIEW VIRAL SEQUENCES
if (!dir.exists(paste0(opt$out_path, "step_virus/"))) {
  dir.create(paste0(opt$out_path, "step_virus/"))
}
fc(
  "\n",
  "cat ",
  opt$out_path, "step7/contigs.blastx.1e2.report ",
  opt$out_path, "step7/contigs.blastn.1e5.report > ",
  opt$out_path, "step_virus/merge.blast.report"
)

fc(
  "\n",
  "grep 'virus' ", opt$out_path, "step_virus/merge.blast.report > ",
  opt$out_path, "step_virus/virus.merge.blast.report"
)

fc(
  "\n",
  "perl ", opt$src_path, "analyzeContigsFilterBlastx.pl ",
  "-i ", opt$out_path, "step_virus/merge.blast.report  ",
  "-f ", opt$out_path, "step5/contigs_merged.final.gt50.fasta -q \"\" ",
  "-p  ", opt$out_path, "step_virus/contigs.analyze --fasta > ",
  opt$out_path, "step_virus/contigs.blastn.analyze"
)
message(paste0(
  "????: ",
  system(paste0(
    "grep \"^>\" ",
    opt$out_path, "step_virus/contigs.analyze..contigs.fasta | wc -l"
  ),
  intern = TRUE
  )
), "\n")

###############################################
# Mapping contigs against filtered reads       #
###############################################

message("\n[Mapping filtered reads against all contigs]\n")

fc(
  "\t#Formatting contigs file...\n",
  "bowtie-build -f ", opt$out_path, "step9/seqNoHit.blast.fasta ",
  opt$out_path, "step9/seqNoHit.blast.fasta"
)

fc(
  "\t#Mapping reads against contigs...\n",
  "bowtie -f -S -p ", opt$threads, " -v 1 ",
  opt$out_path, "step9/seqNoHit.blast.fasta ",
  opt$out_path, "step4/unmappedVectorBacters.fasta | ",
  "awk -F'\t' '{if( $2 != 4) print $0}' > ",
  opt$out_path, "step9/contigs_no_hit_x_filtered_reads.v1.sam "
)

fc(
  "\t#Calculating statistics...\n",
  "perl ", opt$src_path, "samStatistics.pl ",
  "-sam ", opt$out_path, "step9/contigs_no_hit_x_filtered_reads.v1.sam --profile > ",
  opt$out_path, "step9/contigs_no_hit_x_filtered_reads.v1.sam.statistics"
)


## THIS IS COMPLETELY wrong
if (isTRUE(opt$clean)) {
  ## trimmed_filtered_gt15_X_vector.sam don't exist
  fc(
    "\n\n[ CLEAN LARGE INTERMEDIATE FILES ] \n\n",
    "perl ", opt$src_path, "samStatistics.pl ",
    "-sam ", opt$out_path, "step4/trimmed_filtered_gt15_X_vector.sam > ",
    opt$out_path, "step4/trimmed_filtered_gt15_X_vector.sam.stats"
  )

  ## readsXbacters.sam don't exist
  fc(
    "\n",
    "perl ", opt$src_path, "samStatistics.pl ",
    "-sam ", opt$out_path, "step4/readsXbacters.sam > ",
    opt$out_path, "step4/readsXbacters.sam.stats"
  )

  # fc("\n",
  #   "rm -rf ",opt$out_path,"step3/*.sam */run* ",opt$out_path,"step4/* ",opt$out_path,"*/*assemble*/run* ",opt$out_path,"step0/* ",opt$out_path,"step4/unmappedVectorReads.fasta ",opt$out_path,"step2/*.fasta step* error.log formatdb.log")
}


message("\n\n[[ Execution finished! ;P  ]]\n **Part I finished \n \n\n")


## pipeline_virus.classify_contigs_by_pattern
if (!dir.exists(paste0(opt$out_path, "step10/"))) {
  dir.create(paste0(opt$out_path, "step10/"))
}
### identifying origin of blast contigs
fc(
  "",
  "perl -pi -e 's/>/>viralBlastNucleotide_/g' ",
  opt$out_path, "step7/contigs.analyze.virus.contigs.fasta"
)
fc(
  "",
  "perl -pi -e 's/>/>viralBlastProtein_/g' ",
  opt$out_path, "step7/filterblast.virus.virus.contigs.fasta"
)

#### merge two files with contigs hit virus
fc(
  "",
  "cat ",
  opt$out_path, "step7/*.virus.*fasta > ",
  opt$out_path, "step10/all_contigs_hit_virus.fasta"
)

# creating reference table with identified contigs hit virus by sequence similarity
fc(
  "",
  "grep \">\" ", opt$out_path, "step10/all_contigs_hit_virus.fasta | ",
  "cut -f 2 -d '>'  > ",
  opt$out_path, "step10/all_contigs_hit_virus_sequence_similarity.tab"
)

## merge viruses by blast and no-hit contigs
fc(
  "",
  "cat ",
  opt$out_path, "step10/all_contigs_hit_virus.fasta ",
  opt$out_path, "step9/seqNoHit.blast.fasta > ",
  opt$out_path, "step10/contigs_virus_and_nohit.fasta"
)
## format for bowtie
fc(
  "",
  "bowtie-build ", opt$out_path, "step10/contigs_virus_and_nohit.fasta ",
  opt$out_path, "step10/contigs_virus_and_nohit.fasta"
)

### bowtie
fc(
  "",
  "bowtie -f -S -k 1 -p ", opt$threads, " -v 1  ",
  opt$out_path, "step10/contigs_virus_and_nohit.fasta ",
  opt$out_path, "step2/trimmed_filtered_gt15.fasta | ",
  "awk -F '\t' '{if( $2 != 4) print $0}'  > ",
  opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.sam"
)
### identifying contigs with siRNA and piRNA signature
fc(
  "identifying contigs with siRNA and piRNA signature\n",
  "perl ", opt$src_path, "samStatistics_v3.pl ",
  "-sam ", opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.sam ",
  "-fa ", opt$out_path, "step10/contigs_virus_and_nohit.fasta ",
  "-p ", opt$out_path, "step10/reads.VS.contigs_virus_and_nohit ",
  "--profile"
)


# formatting candidate contigs for bowtie
fc(
  "",
  "bowtie-build ",
 opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA_and_PiRNA.fasta ",
 opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA_and_PiRNA.fasta > /dev/null "
)
fc(
  "",
  "bowtie-build ",
 opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA.fasta ",
 opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA.fasta > /dev/null "
)


### bowtie
fc(
  "",
  "bowtie -f -S -k 1 -p ", opt$threads, " -v 1  ",
 opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA.fasta ",
 opt$out_path, "step2/trimmed_filtered_gt15.fasta | ",
  "awk -F '\t' '{if( $2 != 4) print $0}' > ",
 opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs.sam"
)
fc(
  "",
  "bowtie -f -S -k 1 -p ", opt$threads, " -v 1  ",
 opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA_and_PiRNA.fasta ",
 opt$out_path, "step2/trimmed_filtered_gt15.fasta | ",
  "awk -F '\t' '{if( $2 != 4) print $0}'  > ",
 opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.sam"
)


#############
#### PLOTS####
#############
if (isTRUE(opt$plot)) {
  ### creating folder to store plots
  if (!dir.exists(paste0(out_path, "step10/plots"))) {
    dir.create(paste0(out_path, "step10/plots"))
  }
  # plotting distribution and density plots
  # fc("",
  #   "perl ",opt$src_path,"plotMappingDataPerBasePreference.pl ",
  #     "-sam ", opt$out_path,"step10/reads.VS.contigs_virus_and_nohit.siRNAs.sam -s 15 -e 35 ",
  #     "-fa ", opt$out_path,"step10/reads.VS.contigs_virus_and_nohit.withSiRNA.fasta -pace 1 ",
  #     "-p ", opt$out_path,"step10/plots/reads.VS.contigs_virus_and_nohit.withSiRNA ",
  #     "--profile --pattern")
  # fc("",
  #   "perl ",opt$src_path,"plotMappingDataPerBasePreference.pl ",
  #     "-sam ", opt$out_path,"step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.sam -s 15 -e 35 ",
  #     "-fa ", opt$out_path,"step10/reads.VS.contigs_virus_and_nohit.withSiRNA_and_PiRNA.fasta -pace 1 ",
  #     "-p ", opt$out_path,"step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs ",
  #     "--profile --pattern")


  ## calculating mean pattern
  fc(
    "",
    "perl ", opt$src_path, "calcPatternInSamFile.pl ",
    "-s ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs.sam ",
    "-o ",opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs.pattern"
  )

  fc(
    "",
    "perl ", opt$src_path, "calcPatternInSamFile.pl ",
    "-s ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.sam ",
    "-o ",opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.pattern"
  )

  # Plot geral distribution of reads in contigs with siRNA and siRNA+piRNAs
  fc(
    "",
    "perl ", opt$src_path, "plotGeralDistributionPerBaseByReads.pl ",
    "-sam ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs.sam -s 15 -e 35 ",
    "-p ",opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs.geral_distribution ",
    "--plot"
  )

  fc(
    "",
    "perl ", opt$src_path, "plotGeralDistributionPerBaseByReads.pl ",
    "-sam ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.sam -s 15 -e 35 ",
    "-p ",opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.geral_distribution ",
    "--plot"
  )


  # Generating clusterized heatmaps
  fc(
    "",
    "perl ", opt$src_path, "Z-score.bothstrands.pl ",
    "-sam ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs.sam ",
    "-p ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs"
  )

  fc(
    "",
    "perl ", opt$src_path, "Z-score.bothstrands.pl ",
    "-sam ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.sam ",
    "-p ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs"
  )

  fc(
    "",
    "R --no-save ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs.zscore.tab ",
   opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs < ", opt$src_path, "heatmap_correlation_VISA.R"
  )
  fc(
    "",
    "R --no-save ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.zscore.tab ",
   opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs < ", opt$src_path, "heatmap_correlation_VISA.R"
  )
}
### important table
# c("", opt$out_path,"step10/all_contigs_hit_virus_sequence_similarity.tab")

source(paste0(opt$opt$src_path, "build_final_table.R"))
luciorq/five documentation built on May 21, 2019, 2:30 a.m.