## This code is part of the megaptera package
## © C. Heibl 2014 (last update 2019-09-17)
#' @title Create an Object of Class "megapteraPars"
#' @description S4 Class for parameters of a megaptera project pipeline, as
#' stored in \code{\link{megapteraProj}}.
#' @param ... Arguments in \code{tag = value} form. The tags must come from the
#' names of the parameters described in the `Pipeline Parameters' section.
#' @section Pipeline Parameters:
#' \describe{
#' \item{\code{data.path}}{A character string giving the path to the directory there all
#' data and results will be stored (see \code{\link{megapteraInit}}).}
#' \item{\code{gb.seq.download}}{A character string defining how sequences should be
#' downloaded from GenBank Nucleotide; can be \code{"eutils"} or \code{"ftp"}.}
#' \item{\code{debug.level}}{Numeric, a
#' number between 0 and 5, determining the pipeline's verbosity (see Details). (default: 1)}
#' \item{\code{parallel}}{Logical: if
#' \code{TRUE}, several steps in the pipeline will be run in parallel,
#' otherwise all steps are serial.} \item{\code{cpus}}{Numerical: if
#' \code{TRUE}, several steps in the pipeline will be run in parallel,
#' otherwise all steps are serial.} \item{\code{cluster.type}}{A character
#' string: if \code{TRUE}, several steps in the pipeline will be run in
#' parallel, otherwise all steps are serial.}
#' \item{\code{update.seqs}}{\emph{Currently unused}.}
#' \item{\code{retmax}}{Numeric, giving the batch size when downloading
#' sequences from the Entrez History server (default: 500).}
#' \item{\code{max.gi.per.spec}}{Numeric, giving the maximum number of
#' sequences that will be used per species. Can be used to avoid model
#' organism (e.g., rice, \emph{Drosophila}, ...) cluttering up the pipeline
#' with thousands of sequences (default: 1000).} \item{\code{max.bp}}{Numeric,
#' the maximal length of DNA sequences in base pairs to be included in the
#' alignment. The upper limit is determined by the alignment program and the
#' specific alignment and can only be determined by trial-and-error (default:
#' 5000).} \item{\code{reference.max.dist}}{\emph{Currently unused}.}
#' \item{\code{min.seqs.reference}}{\emph{Currently unused}.}
#' \item{\code{fract.miss}}{Numeric, ranging between 0 and 1. To avoid long
#' stretches of only a few sequences at the beginning and the ending of an
#' alignment block a minimum required number of sequences can be set as a
#' fraction of the total number of sequences in this alignment block. Has been
#' superseeded by the \code{gb.*} parameters.}
#' \item{\code{block.max.dist}}{Numeric, ranging between 0 and 1.
#' \code{block.max.dist} gives the maximum genetic distance (measured as the
#' fraction of divergent nucleotide positions) allowed in a sequence alignment
#' block. The alignment of individual marker is iteratively broken into
#' smaller blocks until this condition is met with.}
#' \item{\code{min.n.seq}}{Numeric, the minimum number of sequences required
#' for an alignment block. Alignment blocks with less than \code{min.n.seq}
#' are dropped from the output.} \item{\code{max.mad}}{Numeric, giving the
#' treshold value for the assessment of saturation: alignments with a median
#' average distance (MAD) of \code{max.mad} or greater will be broken into
#' blocks. The default value has been estimated with simulation by Smith et
#' al. (2009).} \item{\code{gb1}}{Parameters for masking of alignment blocks
#' with \code{\link{gblocks}}.} \item{\code{gb2}}{Parameters for masking of
#' alignment blocks with \code{\link{gblocks}}.} \item{\code{gb3}}{Parameters
#' for masking of alignment blocks with \code{\link{gblocks}}.}
#' \item{\code{gb4}}{Parameters for masking of alignment blocks with
#' \code{\link{gblocks}}.} \item{\code{gb5}}{Parameters for masking of
#' alignment blocks with \code{\link{gblocks}}.} }
#' @references Smith, S.A., J.M. Beaulieu, and M.J. Donoghue. 2009.
#' Mega-phylogeny approach for comparative biology: an alternative to
#' supertree and supermatrix approaches. \emph{BMC Evolutionary Biology}
#' \bold{9}:37.
#' @details The pipeline's verbosity can be fine-tuned with \code{debug.level}:
#' \tabular{ll}{ 0 \tab No progess and diagnostic messages\cr 1 \tab Messages
#' on screen\cr 2 \tab Messages logged to file\cr 3 \tab Messages on screen
#' and logged to file\cr 4 \tab Same as 3, in addition current data is saved
#' as .rda object in case of a foreseeable error\cr 5 \tab Same as 4, in
#' addition current data is always saved\cr }
#' @seealso \code{\link{megapteraProj}} for creating a megaptera project.
#' @include megapteraPars-class.R
#' @importFrom methods new slotNames
#' @export
"megapteraPars" <- function(...){
params <- list(data.path = "undefined",
gb.seq.download = "eutils",
debug.level = 1,
parallel = FALSE,
cpus = 0,
cluster.type = "none",
update.seqs = "all",
retmax = 500,
max.gi.per.spec = 1000,
max.bp = 5000,
reference.max.dist = 0.25,
min.seqs.reference = 10,
fract.miss = .25,
block.max.dist = .5, # step G
min.n.seq = 5, # step G
max.mad = .01,
gb1 = .5, # step F + G
gb2 = .5, # step F + G
gb3 = 9999, # step F + G
gb4 = 2, # step F + G
gb5 = "a" # step F + G
)
args <- list(...)
notDef <- setdiff(names(args), names(params))
if (length(notDef))
stop ("parameter '", notDef[1], "' is not defined", sep = "")
id <- match(names(args), names(params))
params[id] <- args
## data.path lead to an existing path
if (params$data.path == "undefined")
stop("data.path must be defined")
if (!dir.exists(params$data.path)){
stop("data.path '", params$data.path, "' does not exist", sep = "")
}
if (params$parallel & params$cpus == 0){
stop("number of CPUs must be given")
}
if (params$parallel & params$cluster.type == "none"){
stop("type of cluster must be given: 'SOCK', 'MPI', 'PVM', or 'NWS'")
}
new("megapteraPars",
data.path = params$data.path,
gb.seq.download = params$gb.seq.download,
debug.level = params$debug.level,
parallel = params$parallel,
cpus = params$cpus,
cluster.type = params$cluster.type,
update.seqs = params$update.seqs,
retmax = params$retmax,
max.gi.per.spec = params$max.gi.per.spec,
max.bp = params$max.bp,
reference.max.dist = params$reference.max.dist,
min.seqs.reference = params$min.seqs.reference,
fract.miss = params$fract.miss,
block.max.dist = params$block.max.dist,
min.n.seq = params$min.n.seq,
max.mad = params$max.mad,
gb1 = params$gb1,
gb2 = params$gb2,
gb3 = params$gb3,
gb4 = params$gb4,
gb5 = params$gb5
)
}
setMethod("show",
signature(object = "megapteraPars"),
function (object)
{
out <- sapply(slotNames(object), slot, object = object)
names(out) <- format(names(out), justify = "right")
out <- paste("\n ", names(out), "=", out)
cat("MEGAPTERA pipeline parameters:", out)
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.