# =============================================================================
# Header
# Loading libraries
devtools::load_all(here::here())
# library(five)
# CLI arguments
opt_parser <- five::parse_arguments()
# parse parameters
opt <- optparse::parse_args(opt_parser)
# Handle parameters
opt <- five::handle_arguments(opt)
# Check if docker is available
opt <- five::is_docker_available(opt)
# defining and cleaning variables
# for development
# =========DEVELOPMENT_start=================
if (isTRUE(Sys.getenv("RSTUDIO") == "1")) {
# Interactive Development parameters
opt <- five::assign_test(opt)
# Sys.getenv("PATH")
}
# Print option values
five::print_opt(opt)
# =========DEVELOPMENT_end===================
# =============================================================================
# Start pipeline
message("Running FIVE\n")
# Create directory infrasctructure
opt <- five::create_infrastructure(opt)
# Maybe use step counter as opt$step_counter ??
step_count <- 0
# Initial parameters description
message(
c(
"#####################", "\n",
"# Input file: ", opt$input, "\n",
"# Reference: ", opt$ref, "\n",
"# Hash: ", opt$hash, "\n",
"# Start: ", opt$si, "\n",
"# End: ", opt$se, "\n",
"#####################", "\n"
)
)
# Prepare bowtie indexes
# Checking if all bowtie indexes exist and blast dbs are formatted
#
# ================================ EXPERIMENTAL ================================
message("\nPreparation step - build indexes \n")
five::build_bowtie_indexes(opt)
build_blast_database <- function(opt) {
fc("perl", "-c")
}
build_blast_database(opt)
# Preprocessing sequencing libraries
# If necessary download library from SRA
five::preprocess_data(opt)
# Mapping steps
# Map agains reference
five::map_against_reference_genome(opt)
five::map_against_bacterial_genomes(opt)
# selecting filtered sequences by size
# Filter reads by size range from FASTA file
filter_reads_by_size <- function(opt, min, max) {
fc(
"",
"python ", fs::path(opt$src_path, "filter_fasta_by_size.py"),
opt$out_path, "step4/unmappedVectorBacters.fasta ", min, " ", max, " ",
opt$out_path, paste0("step4/unmapped_trimmed_filtered_", min, "_", max, ".fasta")
)
}
message("[FILTER UNMAPPED SEQUENCES BY SIZE (21NT)] \n")
filter_reads_by_size(opt, opt$si, opt$se)
fc(
"[FILTER UNMAPPED SEQUENCES BY SIZE (21NT)] \n",
"python ", opt$src_path, "filter_fasta_by_size.py ",
opt$out_path, "step4/unmappedVectorBacters.fasta ", opt$si, " ", opt$se, " ",
opt$out_path, "step4/unmapped_trimmed_filtered.fasta"
)
message(paste0(
"Reads ranging from ", opt$si, "nt to ", opt$se, "nt: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step4/unmapped_trimmed_filtered.fasta | wc -l"
),
intern = TRUE
)
), "\n")
fc(
"[FILTER UNMAPPED SEQUENCES BY SIZE (20-23NT)]\n",
"python ", opt$src_path, "filter_fasta_by_size.py ",
opt$out_path, "step4/unmappedVectorBacters.fasta 20 23 ",
opt$out_path, "step4/unmapped_trimmed_filtered.20to23.fasta"
)
message(paste0(
"Reads ranging from 20nt to 23nt: ",
system(paste0(
"grep \"^>\" ", opt$out_path,
"step4/unmapped_trimmed_filtered.20to23.fasta | wc -l"
),
intern = TRUE
)
), "\n")
fc(
"[FILTER UNMAPPED SEQUENCES BY SIZE (21-29NT)]\n",
"python ", opt$src_path, "filter_fasta_by_size.py ",
opt$out_path, "step4/unmappedVectorBacters.fasta 24 30 ",
opt$out_path, "step4/unmapped_trimmed_filtered.21to29.fasta"
)
message(paste0(
"Reads ranging from 24nt to 30nt: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step4/unmapped_trimmed_filtered.21to29.fasta | wc -l"
),
intern = TRUE
)
), "\n")
if (isTRUE(opt$deg)) {
fc(
"[FILTER UNMAPPED SEQUENCES BY SIZE (16-30NT)]\n",
"python ", opt$src_path, "filter_fasta_by_size.py ",
opt$out_path, "step4/unmappedVectorBacters.fasta 15 30 ",
opt$out_path, "step4/unmapped_trimmed_filtered.16to30.fasta"
)
message(paste0(
"Reads ranging from 15nt to 30nt: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step4/unmapped_trimmed_filtered.16to30.fasta | wc -l"
),
intern = TRUE
)
), "\n")
}
######################################################
# Running velvet optmiser (automatically defined hash)#
######################################################
message("\n#[RUNNING VELVET OPTIMIZER]\n")
fc(
"#Running step 6 [ Assemble unmapped 21 nt - velvetOptimser.pl ]\n",
"VelvetOptimiser.pl ",
"--d step5_opt_run1 ",
"--t ", opt$threads,
" --s 13 --e 19 ",
"--f '-short -fasta ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta' ",
"--a "
)
fc(
"\t#Running step 6_1 [ merging unmmaped 21nt reads and assembled contigs ]\n",
"cat ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta ",
"step5_opt_run1/contigs.fa > ",
"step5_opt_run1/unmapped21Andcontigs.fasta"
)
fc(
"\t#Running step 6_4 [ Assemble run 1 + reads - VelvetOptmiser.pl ] \n",
"VelvetOptimiser.pl ",
"--d step5_opt_run2 ",
"--t ", opt$threads,
" --s 13 --e 19 ",
"--f '-short -fasta step5_opt_run1/unmapped21Andcontigs.fasta' ",
"--a "
)
fc(
"\t#Running step 6_5 [ merging assembled contigs - mergeContigs.pl ] \n",
"perl ", opt$src_path, "mergeContigsNew.pl ",
"-contig1 step5_opt_run1/contigs.fa ",
"-contig2 step5_opt_run2/contigs.fa ",
"-output ", opt$out_path, "step5/contigs.opt.fasta "
)
message(paste0(
"Number of assembled contigs: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step5/contigs.opt.fasta | wc -l"
),
intern = TRUE
)
), "\n")
if (isTRUE(opt$deg)) {
fc(
"\t#Running step 6_6.1 [ Assemble run 1 + reads - velvetOptmiser.pl - 16-30 ] \n",
"VelvetOptimiser.pl ",
"--d step5_opt_run3 ",
"--t ", opt$threads,
" --s 15 --e 25 ",
"--f '-short -fasta ", opt$out_path, "step4/unmapped_trimmed_filtered.16to30.fasta' ",
"--a "
)
}
###############################################
# Running velvet (fixed hash) #
###############################################
message("\n[RUNNING DEFAULT VELVET]\n")
fc(
"\t#Running step 6 [ Assemble unmapped 21 nt - velvet hash ]\n",
"velveth ",
opt$out_path, "step5_fix_run1 ", opt$hash,
" -fasta -short ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta"
)
fc(
"\n",
"velvetg ",
opt$out_path, "step5_fix_run1 -exp_cov auto -cov_cutoff auto"
)
fc(
"\n\nRunning step 6_1 [ concatenando reads and contigs - cat ]\n",
"cat ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta ",
opt$out_path, "step5_fix_run1/contigs.fa > ",
opt$out_path, "step5_fix_run1/unmapped21Andcontigs.fasta"
)
fc(
"\n",
"velveth ",
opt$out_path, "step5_fix_run2 ", opt$hash,
" -fasta -short ", opt$out_path, "step5_fix_run1/unmapped21Andcontigs.fasta"
)
fc(
"\n",
"velvetg ",
opt$out_path, "step5_fix_run2 -exp_cov 20 -cov_cutoff auto"
)
fc(
"\t#Running step 6_5 [ merge assemblies - mergeContigs.pl ] \n",
"perl ", opt$src_path, "mergeContigsNew.pl ",
"-contig1 ", opt$out_path, "step5_fix_run1/contigs.fa ",
"-contig2 ", opt$out_path, "step5_fix_run2/contigs.fa ",
"-output ", opt$out_path, "step5/contigs.fix.fasta "
)
message(paste0(
"Number of assembled contigs: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step5/contigs.fix.fasta | wc -l"
),
intern = TRUE
)
), "\n")
###############################################
# Running velvet optmiser (FIXED hash) #
###############################################
message("\n[VELVET OPTIMISER HASH ONLY 15]\n")
fc(
"\t#Running step 6_6 [ Assemble unmapped 21 nt - velvetOptimser.pl ]\n",
"VelvetOptimiser.pl ",
"--d step5_opt_fix_run1 ",
"--t ", opt$threads, " --s ", opt$hash, " --e ", opt$hash, " ",
"--f '-short -fasta ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta' --a"
)
fc(
"\t#Running step 6_7 [ concatenando reads and contigs - cat ]\n",
"cat ", opt$out_path, "step4/unmapped_trimmed_filtered.fasta ",
"step5_opt_fix_run1/contigs.fa > ",
"step5_opt_fix_run1/unmapped21Andcontigs.fasta"
)
fc(
"\t#Running step 6_8 [ Assemble run 1 + reads - velvetOptmiser.pl ] \n",
"VelvetOptimiser.pl ",
"--d step5_opt_fix_run2 ",
"--t ", opt$threads, " --s ", opt$hash, " --e ", opt$hash, " ",
" --f '-short -fasta step5_opt_fix_run1/unmapped21Andcontigs.fasta' --a"
)
fc(
"\t#Running step 6_9 [ merge assemblies - mergeContigs.pl ] \n",
"perl ", opt$src_path, "mergeContigsNew.pl ",
"-contig1 step5_opt_fix_run1/contigs.fa ",
"-contig2 step5_opt_fix_run2/contigs.fa ",
"-output ", opt$out_path, "step5/contigs.fix_opt.fasta "
)
message(paste0(
"Number of assembled contigs: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step5/contigs.fix_opt.fasta | wc -l"
),
intern = TRUE
)
), "\n")
##############################################
# Running velvet optmiser (FIXED hash) 20-23 #
##############################################
message("\n[VELVET OPTIMISER HASH ONLY 15 - 20-23nt]\n")
fc(
"\t#Running step 6_10 [ Assemble unmapped 20-23nt nt - velvetOptimser.pl ]\n",
"VelvetOptimiser.pl ",
"--d step5_opt_20to23_run1 ",
"--t ", opt$threads, " --s ", opt$hash, " --e ", opt$hash, " ",
"--f '-short -fasta ", opt$out_path, "step4/unmapped_trimmed_filtered.20to23.fasta' --a "
)
fc(
"\t#Running step 6_11 [ concatenando reads and contigs - cat ]\n",
"cat ", opt$out_path, "step4/unmapped_trimmed_filtered.20to23.fasta ",
"step5_opt_20to23_run1/contigs.fa > ",
"step5_opt_20to23_run1/unmapped21Andcontigs.fasta"
)
fc(
"\t#Running step 6_12 [ Assemble run 1 + reads - velvetOptmiser.pl ] \n",
"VelvetOptimiser.pl ",
"--d step5_opt_20to23_run2 ",
"--t ", opt$threads, " --s ", opt$hash, " --e ", opt$hash, " ",
"--f '-short -fasta step5_opt_20to23_run1/unmapped21Andcontigs.fasta' --a "
)
fc(
"\t#Running step 6_13 [ merge assemblies - mergeContigs.pl ] \n",
"perl ", opt$src_path, "mergeContigsNew.pl ",
"-contig1 step5_opt_20to23_run1/contigs.fa ",
"-contig2 step5_opt_20to23_run2/contigs.fa ",
"-output ", opt$out_path, "step5/contigs.20to23.fasta "
)
fc(
"\t#Running step 6_9 [ merge assemblies - mergeContigs.pl ] \n",
"perl ", opt$src_path, "mergeContigsNew.pl ",
"-contig1 step5_opt_fix_run1/contigs.fa ",
"-contig2 step5_opt_fix_run2/contigs.fa ",
"-output ", opt$out_path, "step5/contigs.fix_hash.fasta "
)
message(paste0(
"Number of assembled contigs: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step5/contigs.fix_hash.fasta | wc -l"
),
intern = TRUE
)
), "\n")
if (isTRUE(opt$deg)) {
##############################################
# Running velvet optmiser (hash 17) 21-29 #
##############################################
message("\n[VELVET OPTIMISER - 21-29nt]\n")
fc(
"\t#Running step 6_10 [ Assemble unmapped 21-29nt nt - velvetOptimser.pl ]\n",
"VelvetOptimiser.pl ",
"--d step5_opt_21to29_run1 ",
"--t ", opt$threads, " --s 17 --e 17 ",
"--f '-short -fasta ", opt$out_path, "step4/unmapped_trimmed_filtered.21to29.fasta' --a "
)
fc(
"\t#Running step 6_11 [ concatenando reads and contigs - cat ]\n",
"cat ", opt$out_path, "step4/unmapped_trimmed_filtered.21to29.fasta ",
"step5_opt_21to29_run1/contigs.fa > ",
"step5_opt_21to29_run1/unmapped21Andcontigs.fasta"
)
fc(
"\t#Running step 6_12 [ Assemble run 1 + reads - velvetOptmiser.pl ] \n",
"VelvetOptimiser.pl ",
"--d step5_opt_21to29_run2 ",
"--t ", opt$threads, " --s 17 --e 17 ",
"--f '-short -fasta step5_opt_21to29_run1/unmapped21Andcontigs.fasta' --a "
)
fc(
"\t#Running step 6_13 [ merge assemblies - mergeContigs.pl ] \n",
"perl ", opt$src_path, "mergeContigsNew.pl ",
"-contig1 step5_opt_21to29_run1/contigs.fa ",
"-contig2 step5_opt_21to29_run2/contigs.fa ",
"-output ", opt$out_path, "step5/contigs.21to29.fasta "
)
message(paste0(
"Number of assembled contigs: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step5/contigs.21to29.fasta | wc -l"
),
intern = TRUE
)
), "\n")
}
###############################
# Merging assemblies #
###############################
if (!isTRUE(opt$deg)) {
## URGENT
## there still some other files missing, should check '$step5_opt/run3/contigs.fa'
fc(
"\n",
"touch ", opt$out_path, "step5/contigs.21to29.fasta"
)
if (!dir.exists("step5_opt_run3")) {
dir.create("step5_opt_run3")
}
fc(
"\n",
"touch step5_opt_run3/contigs.fa"
)
}
if (!dir.exists("step5_opt_run3")) {
dir.create("step5_opt_run3")
}
if (!file.exists("step5_opt_run3/contigs.fa")) {
fc(
"\n",
"touch step5_opt_run3/contigs.fa"
)
}
message("\n[MERGING CONTIGS AND RUNNING CAP3]\n")
fc(
"\t#Running STEP [CAT] Concatenating contigs...\n",
"cat ",
"step5_opt_run3/contigs.fa ",
opt$out_path, "step5/contigs.20to23.fasta ",
opt$out_path, "step5/contigs.fix_hash.fasta ",
opt$out_path, "step5/contigs.fix.fasta ",
opt$out_path, "step5/contigs.opt.fasta ",
opt$out_path, "step5/contigs.fix_opt.fasta ",
opt$out_path, "step5/contigs.21to29.fasta > ",
opt$out_path, "step5/all_contigs.fasta"
)
message(paste0(
"Number of merged contigs: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step5/all_contigs.fasta | wc -l"
),
intern = TRUE
)
), "\n")
fc(
"\t#Running step CAP3 [ assembly contigs from different assemblies ] \n",
"cap3 ",
opt$out_path, "step5/all_contigs.fasta > ",
opt$out_path, "step5/log.cap3"
)
fc(
"\t#Running [ Merging contigs and singlets from CAP3]\n",
"cat ",
opt$out_path, "step5/all_contigs.fasta.cap.contigs ",
opt$out_path, "step5/all_contigs.fasta.cap.singlets > ",
opt$out_path, "step5/contigs_merged.final.fasta"
)
message(paste0(
"Number of assembled contigs: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step5/contigs_merged.final.fasta | wc -l"
),
intern = TRUE
)
), "\n")
fc(
"\t#Running step 6_7 [ selecting contigs larger than n50 - calcN50.pl ] \n",
"perl ", opt$src_path, "fixIDcap3Contigs.pl ",
"-i ", opt$out_path, "step5/contigs_merged.final.fasta ",
"-s 50 ",
"-p ", opt$out_path, "step5/contigs_merged.final"
)
message(paste0(
"Number of assembled contigs larger than n50: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step5/contigs_merged.final.gt50.fasta | wc -l"
),
intern = TRUE
)
), "\n")
################################
# Blastn contigs #
################################
message("\n[BlastN contigs gt N50]\n")
fc(
"\t#Running step 10_111 [ Blast against NT - blastall -p blastn ]\n",
"blastall -p blastn ",
"-i ", opt$out_path, "step5/contigs_merged.final.gt50.fasta ",
"-d ", opt$nt, " -v 5 -b 5 -e 1e-5 ",
"-o ", opt$out_path, "step6/contigs_merged.final.gt50.1e5.blastn ",
"-a ", opt$threads, ""
)
fc(
"\t#Running filterblast...\n",
"perl ", opt$src_path, "filterblast.pl -b ",
opt$out_path, "step6/contigs_merged.final.gt50.1e5.blastn ",
"-evalue 1e-5 --best --desc > ",
opt$out_path, "step7/contigs.blastn.1e5.report"
)
# system(paste0("cat ",opt$out_path,"step7/contigs.blastn.1e5.report | wc -l"))
fc(
"\n",
"perl ", opt$src_path, "analyzeContigsFilterBlastn.pl ",
"-i ", opt$out_path, "step7/contigs.blastn.1e5.report ",
"-f ", opt$out_path, "step5/contigs_merged.final.gt50.fasta -q '' ",
"-p ", opt$out_path, "step7/contigs.n.analyze --fasta > ",
opt$out_path, "step7/contigs.blastn.analyze"
)
fc(
"\n",
"perl ", opt$src_path, "analyzeContigsFilterBlastn.pl ",
"-i ", opt$out_path, "step7/contigs.blastn.1e5.report ",
"-f ", opt$out_path, "step5/contigs_merged.final.gt50.fasta -q 'virus' ",
"-p ", opt$out_path, "step7/contigs.analyze --fasta > ",
opt$out_path, "step7/contigs.blastn.virus.analyze"
)
message(paste0(
"Number of contigs with hit to viral nucleotide sequences: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step7/contigs.analyze.virus.contigs.fasta | wc -l"
),
intern = TRUE
)
), "\n")
fc(
"\n[Extracting contigs no hit blastn 1e-5]\n",
"perl ", opt$src_path, "extractSequencesNoHitBlast.pl ",
"-seq ", opt$out_path, "step5/contigs_merged.final.gt50.fasta ",
"-blast ", opt$out_path, "step7/contigs.blastn.1e5.report ",
"-out ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta"
)
message(paste0(
"Number of contigs without hit to known nucleotide sequences: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step6/seqNoHit.blastn.1e5.fasta | wc -l"
),
intern = TRUE
)
), "\n")
################################
#### BLASTX #
################################
message("\n[BlastX contigs gt N50]\n")
fc(
"\t#Running step 9 [ Blast against NR - blastall -p blastx ]\n",
"blastall -p blastx ",
"-i ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta ",
"-d ", opt$nr, " -v 5 -b 5 -e 1 ",
"-o ", opt$out_path, "step6/seqNoHit.blastn.blastx ",
"-a ", opt$threads, ""
)
fc(
"\t#Running filterblast...\n",
"perl ", opt$src_path, "filterblast.pl ",
"-b ", opt$out_path, "step6/seqNoHit.blastn.blastx -evalue 1e-2 --best --desc > ",
opt$out_path, "step7/contigs.blastx.1e2.report"
)
fc(
"\t#Running stats from blastn...\n",
"perl ", opt$src_path, "analyzeReportFilterblast.pl ",
"-i ", opt$out_path, "step7/contigs.blastx.1e2.report > ",
opt$out_path, "step7/filterblastx.1e2.allHits.analyze"
)
fc(
"\n",
"perl ", opt$src_path, "filterBlastStats.pl ",
"-i ", opt$out_path, "step7/contigs.blastx.1e2.report -q virus ",
"-f ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta > ",
opt$out_path, "step7/filterblast.1e2.allHits.stats"
)
fc(
"\n",
"perl ", opt$src_path, "analyzeContigsFilterBlastx.pl ",
"-i ", opt$out_path, "step7/contigs.blastx.1e2.report ",
"-f ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta -q virus ",
"-p ", opt$out_path, "step7/filterblast.virus --fasta > ",
opt$out_path, "step7/contigs.blastx.virus.analyze"
)
fc(
"\n",
"perl ", opt$src_path, "analyzeContigsFilterBlastx.pl ",
"-i ", opt$out_path, "step7/contigs.blastx.1e2.report ",
"-f ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta -q \"\" ",
"-p ", opt$out_path, "step7/contigs.x.analyze --fasta > ",
opt$out_path, "step7/contigs.blastx.analyze"
)
fc(
"\n[Extracting contigs no hit blastx 1e-2]\n",
"perl ", opt$src_path, "extractSequencesNoHitBlast.pl ",
"-seq ", opt$out_path, "step6/seqNoHit.blastn.1e5.fasta ",
"-blast ", opt$out_path, "step7/contigs.blastx.1e2.report ",
"-out ", opt$out_path, "step9/seqNoHit.blast.fasta"
)
message(paste0(
"Number of contigs without homology to known protein sequences: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step9/seqNoHit.blast.fasta | wc -l"
),
intern = TRUE
)
), "\n")
####### FINAL OVERVIEW VIRAL SEQUENCES
if (!dir.exists(paste0(opt$out_path, "step_virus/"))) {
dir.create(paste0(opt$out_path, "step_virus/"))
}
fc(
"\n",
"cat ",
opt$out_path, "step7/contigs.blastx.1e2.report ",
opt$out_path, "step7/contigs.blastn.1e5.report > ",
opt$out_path, "step_virus/merge.blast.report"
)
fc(
"\n",
"grep 'virus' ", opt$out_path, "step_virus/merge.blast.report > ",
opt$out_path, "step_virus/virus.merge.blast.report"
)
fc(
"\n",
"perl ", opt$src_path, "analyzeContigsFilterBlastx.pl ",
"-i ", opt$out_path, "step_virus/merge.blast.report ",
"-f ", opt$out_path, "step5/contigs_merged.final.gt50.fasta -q \"\" ",
"-p ", opt$out_path, "step_virus/contigs.analyze --fasta > ",
opt$out_path, "step_virus/contigs.blastn.analyze"
)
message(paste0(
"????: ",
system(paste0(
"grep \"^>\" ",
opt$out_path, "step_virus/contigs.analyze..contigs.fasta | wc -l"
),
intern = TRUE
)
), "\n")
###############################################
# Mapping contigs against filtered reads #
###############################################
message("\n[Mapping filtered reads against all contigs]\n")
fc(
"\t#Formatting contigs file...\n",
"bowtie-build -f ", opt$out_path, "step9/seqNoHit.blast.fasta ",
opt$out_path, "step9/seqNoHit.blast.fasta"
)
fc(
"\t#Mapping reads against contigs...\n",
"bowtie -f -S -p ", opt$threads, " -v 1 ",
opt$out_path, "step9/seqNoHit.blast.fasta ",
opt$out_path, "step4/unmappedVectorBacters.fasta | ",
"awk -F'\t' '{if( $2 != 4) print $0}' > ",
opt$out_path, "step9/contigs_no_hit_x_filtered_reads.v1.sam "
)
fc(
"\t#Calculating statistics...\n",
"perl ", opt$src_path, "samStatistics.pl ",
"-sam ", opt$out_path, "step9/contigs_no_hit_x_filtered_reads.v1.sam --profile > ",
opt$out_path, "step9/contigs_no_hit_x_filtered_reads.v1.sam.statistics"
)
## THIS IS COMPLETELY wrong
if (isTRUE(opt$clean)) {
## trimmed_filtered_gt15_X_vector.sam don't exist
fc(
"\n\n[ CLEAN LARGE INTERMEDIATE FILES ] \n\n",
"perl ", opt$src_path, "samStatistics.pl ",
"-sam ", opt$out_path, "step4/trimmed_filtered_gt15_X_vector.sam > ",
opt$out_path, "step4/trimmed_filtered_gt15_X_vector.sam.stats"
)
## readsXbacters.sam don't exist
fc(
"\n",
"perl ", opt$src_path, "samStatistics.pl ",
"-sam ", opt$out_path, "step4/readsXbacters.sam > ",
opt$out_path, "step4/readsXbacters.sam.stats"
)
# fc("\n",
# "rm -rf ",opt$out_path,"step3/*.sam */run* ",opt$out_path,"step4/* ",opt$out_path,"*/*assemble*/run* ",opt$out_path,"step0/* ",opt$out_path,"step4/unmappedVectorReads.fasta ",opt$out_path,"step2/*.fasta step* error.log formatdb.log")
}
message("\n\n[[ Execution finished! ;P ]]\n **Part I finished \n \n\n")
## pipeline_virus.classify_contigs_by_pattern
if (!dir.exists(paste0(opt$out_path, "step10/"))) {
dir.create(paste0(opt$out_path, "step10/"))
}
### identifying origin of blast contigs
fc(
"",
"perl -pi -e 's/>/>viralBlastNucleotide_/g' ",
opt$out_path, "step7/contigs.analyze.virus.contigs.fasta"
)
fc(
"",
"perl -pi -e 's/>/>viralBlastProtein_/g' ",
opt$out_path, "step7/filterblast.virus.virus.contigs.fasta"
)
#### merge two files with contigs hit virus
fc(
"",
"cat ",
opt$out_path, "step7/*.virus.*fasta > ",
opt$out_path, "step10/all_contigs_hit_virus.fasta"
)
# creating reference table with identified contigs hit virus by sequence similarity
fc(
"",
"grep \">\" ", opt$out_path, "step10/all_contigs_hit_virus.fasta | ",
"cut -f 2 -d '>' > ",
opt$out_path, "step10/all_contigs_hit_virus_sequence_similarity.tab"
)
## merge viruses by blast and no-hit contigs
fc(
"",
"cat ",
opt$out_path, "step10/all_contigs_hit_virus.fasta ",
opt$out_path, "step9/seqNoHit.blast.fasta > ",
opt$out_path, "step10/contigs_virus_and_nohit.fasta"
)
## format for bowtie
fc(
"",
"bowtie-build ", opt$out_path, "step10/contigs_virus_and_nohit.fasta ",
opt$out_path, "step10/contigs_virus_and_nohit.fasta"
)
### bowtie
fc(
"",
"bowtie -f -S -k 1 -p ", opt$threads, " -v 1 ",
opt$out_path, "step10/contigs_virus_and_nohit.fasta ",
opt$out_path, "step2/trimmed_filtered_gt15.fasta | ",
"awk -F '\t' '{if( $2 != 4) print $0}' > ",
opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.sam"
)
### identifying contigs with siRNA and piRNA signature
fc(
"identifying contigs with siRNA and piRNA signature\n",
"perl ", opt$src_path, "samStatistics_v3.pl ",
"-sam ", opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.sam ",
"-fa ", opt$out_path, "step10/contigs_virus_and_nohit.fasta ",
"-p ", opt$out_path, "step10/reads.VS.contigs_virus_and_nohit ",
"--profile"
)
# formatting candidate contigs for bowtie
fc(
"",
"bowtie-build ",
opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA_and_PiRNA.fasta ",
opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA_and_PiRNA.fasta > /dev/null "
)
fc(
"",
"bowtie-build ",
opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA.fasta ",
opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA.fasta > /dev/null "
)
### bowtie
fc(
"",
"bowtie -f -S -k 1 -p ", opt$threads, " -v 1 ",
opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA.fasta ",
opt$out_path, "step2/trimmed_filtered_gt15.fasta | ",
"awk -F '\t' '{if( $2 != 4) print $0}' > ",
opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs.sam"
)
fc(
"",
"bowtie -f -S -k 1 -p ", opt$threads, " -v 1 ",
opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.withSiRNA_and_PiRNA.fasta ",
opt$out_path, "step2/trimmed_filtered_gt15.fasta | ",
"awk -F '\t' '{if( $2 != 4) print $0}' > ",
opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.sam"
)
#############
#### PLOTS####
#############
if (isTRUE(opt$plot)) {
### creating folder to store plots
if (!dir.exists(paste0(out_path, "step10/plots"))) {
dir.create(paste0(out_path, "step10/plots"))
}
# plotting distribution and density plots
# fc("",
# "perl ",opt$src_path,"plotMappingDataPerBasePreference.pl ",
# "-sam ", opt$out_path,"step10/reads.VS.contigs_virus_and_nohit.siRNAs.sam -s 15 -e 35 ",
# "-fa ", opt$out_path,"step10/reads.VS.contigs_virus_and_nohit.withSiRNA.fasta -pace 1 ",
# "-p ", opt$out_path,"step10/plots/reads.VS.contigs_virus_and_nohit.withSiRNA ",
# "--profile --pattern")
# fc("",
# "perl ",opt$src_path,"plotMappingDataPerBasePreference.pl ",
# "-sam ", opt$out_path,"step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.sam -s 15 -e 35 ",
# "-fa ", opt$out_path,"step10/reads.VS.contigs_virus_and_nohit.withSiRNA_and_PiRNA.fasta -pace 1 ",
# "-p ", opt$out_path,"step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs ",
# "--profile --pattern")
## calculating mean pattern
fc(
"",
"perl ", opt$src_path, "calcPatternInSamFile.pl ",
"-s ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs.sam ",
"-o ",opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs.pattern"
)
fc(
"",
"perl ", opt$src_path, "calcPatternInSamFile.pl ",
"-s ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.sam ",
"-o ",opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.pattern"
)
# Plot geral distribution of reads in contigs with siRNA and siRNA+piRNAs
fc(
"",
"perl ", opt$src_path, "plotGeralDistributionPerBaseByReads.pl ",
"-sam ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs.sam -s 15 -e 35 ",
"-p ",opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs.geral_distribution ",
"--plot"
)
fc(
"",
"perl ", opt$src_path, "plotGeralDistributionPerBaseByReads.pl ",
"-sam ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.sam -s 15 -e 35 ",
"-p ",opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.geral_distribution ",
"--plot"
)
# Generating clusterized heatmaps
fc(
"",
"perl ", opt$src_path, "Z-score.bothstrands.pl ",
"-sam ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs.sam ",
"-p ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs"
)
fc(
"",
"perl ", opt$src_path, "Z-score.bothstrands.pl ",
"-sam ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.sam ",
"-p ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs"
)
fc(
"",
"R --no-save ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs.zscore.tab ",
opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs < ", opt$src_path, "heatmap_correlation_VISA.R"
)
fc(
"",
"R --no-save ",opt$out_path, "step10/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs.zscore.tab ",
opt$out_path, "step10/plots/reads.VS.contigs_virus_and_nohit.siRNAs_and_piRNAs < ", opt$src_path, "heatmap_correlation_VISA.R"
)
}
### important table
# c("", opt$out_path,"step10/all_contigs_hit_virus_sequence_similarity.tab")
source(paste0(opt$opt$src_path, "build_final_table.R"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.