R/oop_functions.R

##' Generates the S4 class of the parameter list 
##'
##' The S4 class has some validity functions to check the input
##' parameters. The access of the functions always runs by
##' par_list["value"]. If no S4 class is wanted, the user could also
##' build a own list object with the given names.
##' @title S4 class of the parameter list
##' @param index_genome_dir path to the index of the reference genome
##'   generated by Bowtie2 or Star
##' @param index_amino_dir path to the index of the amino acid
##'   reference genome generated by Pauda
##' @param qc should the quality trimming used [true]
##' @param set_mapper which mapper should be used [bowtie]
##' @param ref_seq_file path to the reference sequence file in fasta
##'   format
##' @param prot_info_df sqlite3 database of the protein information,
##'   see \code{\link{setup_aa_info_sample_sqlite}} for setup
##' @param species_info_df sqlite3 database of the species/strain
##'   information, see \code{\link{setup_species_info_sqlite}} for
##'   setup
##' @return
##' A S4 class with the following slots [default values]
##' \describe{
##' \item{num_decoy_reads = "numeric"}{number of generated decoy reads, if decoy is true [500]}
##' \item{min_num_reads = "numeric"}{mimimum number of reads per reference [50]} 
##' \item{min_coverage = "numeric"}{mimimum coverage of the reference by reads to keep the hit [0.05]} 
##' \item{map_dna_in = "list"}{internal file storage for dna mapping} 
##' \item{map_dna_stats = "list"}{internal list for DNA mapping statistics}  
##' \item{map_pep_in = "list"}{internal file storage for amino mapping}  
##' \item{decoy = "logical"}{should the decoy approach be run [false]}
##' \item{tax = "logical"}{should the tax trees are generated [true]}
##' \item{qc = "logical"}{should the quality control by trimmomatic be run [true]}
##' \item{clean = "logical"}{should intermediate files be deleted [true]} 
##' \item{paired = "logical"}{should paired read infiles assumed [false], but will be checked internally}
##' \item{plot = "logical"}{should the final visualization of the mapped reads plotted [true]}
##' \item{black_list = "character"}{list of corrupt NCBI GenBAnk IDs [Null]}
##' \item{gen_prot_sep = "character"}{the seperator, how the genebank_id is seperated prom the prot_id [_]}
##' \item{check_host = "logical"}{should the host be checked by blastn [true]}
##' \item{run_dna_mapping = "logical"}{should the DNA mapping be run [true]}
##' \item{run_pep_mapping = "logical"}{should the amino acid mapping be run? [true]}
##' \item{consensus = "logical"}{should the consensus of the reads be generated [true]}
##' \item{tmp_dir = "character"}{path to the temp dir [/home/temp]}
##' \item{sql_dir = "character"}{path to the sql dir [/home/sql]}
##' \item{index_genome_dir = "character"}{path to the genome index generated by Star or Bowtie2}
##' \item{index_amino_dir = "character"}{path to the amino index generated by Pauda}
##' \item{ref_seq_file = "character"}{path to the reference.fasta}
##' \item{prot_info = "tbl_dbi"}{sqlite3 database of the protein information}
##' \item{species_info = "tbl_dbi"}{sqlite3 database of the species/strain information}
##' \item{plot_id = "character"}{should only a subset of genebank_ids be plotted?}
##' \item{mapper = "character"}{sould the Bowtie2 or Star mapper be used [bowtie]}
##' \item{num_plot = "numeric"}{how many plots of the TOP`num_plot` should be generated [25]}
##' \item{ncore = "numeric"}{how many cores should be used}
##' \item{pdf_file = "character"}{name of the final plot pdf file}
##' }
##' @author Jochen Kruppa
##' @export
set_par_list <- function(index_genome_dir,
                         index_amino_dir,
                         qc = TRUE,
                         set_mapper = "bowtie",
                         ref_seq_file = "",
                         prot_info_df = "",
                         species_info_df = "")
{
  if(any(nchar(prot_info_df) == 0) | any(nchar(species_info_df) == 0)) {
    new(Class = "par_list",
        index_genome_dir = index_genome_dir,
        index_amino_dir = index_amino_dir,
        ref_seq_file = ref_seq_file,
        mapper = set_mapper,
        qc = qc)
  } else {
    new(Class = "par_list",
        index_genome_dir = index_genome_dir,
        index_amino_dir = index_amino_dir,
        qc = qc,
        mapper = set_mapper,
        ref_seq_file = ref_seq_file,
        prot_info = prot_info_df,
        species_info = species_info_df)
  }
}

##' Many programs are needed for the run of virDisco. Here the program
##' executables are defined.
##'
##' See \url{https://github.com/jkruppa/virDisco} for a list of
##' program sources.
##' @title Path to the external program executables
##' @param bowtie_dir Path to Bowtie2 DNA mapper
##' @param pauda_dir Path to PAUDA - a poor man's BLASTX
##' @param star_dir Path to Star mapper [optional]
##' @param bowtie_build_dir Path to the Bowtie2 build dir
##' @param pauda_build_dir Path to the Bowtie2 build dir for PAUDA
##' @param star_build_dir Path to the Star build dir
##' @param samtools_dir Path to Samtools
##' @param trimmomatic_dir Path to Trimmomatic
##' @param pandaseq_dir Path to PANDAseq
##' @param seqtk_dir Path to Seqtk
##' @param blastn_dir Path to the BLAST executables
##' @param ete3_dir Path to the ETE Toolkit
##' @return program_list
##' @author Jochen Kruppa
##' @export
set_program_list <- function(bowtie_dir,
                             pauda_dir,
                             star_dir,
                             bowtie_build_dir,
                             pauda_build_dir,
                             samtools_dir,
                             trimmomatic_dir,
                             pandaseq_dir,
                             seqtk_dir,
                             blastn_dir,
                             ete3_dir)
{
  new(Class = "program_list",
      bowtie2 = bowtie_dir,
      pauda = pauda_dir,
      star = star_dir,
      bowtie2_build = bowtie_build_dir,
      pauda_build = pauda_build_dir,
      samtools = samtools_dir,
      trimmomatic = trimmomatic_dir,
      pandaseq = pandaseq_dir,
      seqtk = seqtk_dir,
      blastn = blastn_dir,
      ete3 = ete3_dir)
}
jkruppa/viralDetectTools documentation built on May 30, 2019, 3:41 p.m.