R/generateBashScript.R

##' This function generates a bash script that runs RAxML on each of
##' the replicated dataset.
##'
##' For now, the bash script generated by this function is not very
##' elegant (to say the least), but it gets the job done.
##'
##' If you want to use this function by yourself, the recommended
##' usage is to call it from the directory above where your alignments
##' are stored.
##'
##' The seed provided or generated is used in all the calls.
##'
##' @title Generate a bash script
##' @param path character string indicating the path (i.e., directory)
##' where the simulated datasets on which to run the likelihood
##' searches are stored.
##' @param output character string indicating the file name (and path
##' if needed) of where the script should be written.
##' @param partfile character string indicating the name of the
##' partition file to be used by RAxML during the likelihood
##' estimations on the simulated data sets (WARNING: this is *NOT* the
##' same as the partition file you might have used in your original
##' anlysis, instead use the path for the file that was generated by
##' \code{\link{finalizeAlignments}}).
##' @param model character string indicating the model of molecular
##' evolution to be used during the likelihood estimation on the
##' replicated datasets (should be the same as the one used for the
##' original analyses).
##' @param prefix character string indicating the prefix used to name
##' the individual RAxML runs, for instance if
##' \code{prefix="best"}, the RAxML run on the first replicated
##' dataset will be named: \code{RAxML_info.best-rep001}.
##' @param tree If \code{NULL} or missing, RAxML will look for the
##' \sQuote{best} tree (using the \sQuote{\code{-f d}} RAxML algorithm),
##' otherwise a character string indicating the file name of a tree to
##' be used as a constraint during the likelihood search (using the
##' \sQuote{\code{-f d -g}} RAxML algorithm).
##' @param seed character string indicating the seed to be used to
##' initiate the likelihood search in RAxML. If missing a 7-digit
##' random seed is generated.
##' @param raxmlCmd character string used to call RAxML. Adjust it
##' depending on your installation, and the flavor of RAxML you want
##' to use, (default is \dQuote{raxmlHPC-PTHREADS-SSE3}).
##' @param raxmlArg character string for additional arguments to be
##' passed to RAxML (default \dQuote{-T7})
##' @param overwrite What to do when a file with the same name as the
##' one specified by \code{output} already exists? if \code{TRUE} it
##' will be overwritten, otherwise an error will be thrown (default
##' \code{FALSE}).
##' @return TRUE, but really used for its side effect of generating a
##' bash script that contains the appropriate command to run a
##' likelihood search on each of the replicate dataset.
##' @author Francois Michonneau
##' @export
generateBashScript <- function(path, output, partfile, model,
                               prefix, tree, seed,
                               raxmlCmd="raxmlHPC-PTHREADS-SSE3",
                               raxmlArg="-T7", overwrite=FALSE) {
    path <- gsub("\\/$", "", path)
    model <- match.arg(model, c("GTRGAMMA", "GTRGAMMAI"))
    lAlg <- list.files(pattern="rep[0-9]+\\.phy$", path=path)
    reps <- gsub(".+-(.+)\\.phy", "\\1", lAlg)

    if (file.exists(output) && !overwrite) {
        stop("File ", output, " already exists. If overwritting is OK ",
             "use option overwrite=TRUE.")
    }

    if (!length(lAlg)) {
        stop("No alignments found in this path.")
    }

    if (missing(seed)) {
        seed <- as.integer(runif(1) * 9999999)
    }

    if (missing(tree) || is.null(tree)) {
        treecmd <- "-f d" # basic unconstrained
    }
    else {
        treecmd <- paste("-f d -g", tree)
    }

    cmd <- character(length(lAlg))
    for (i in 1:length(lAlg)) {
        cmd[i] <- paste(raxmlCmd, "-s", file.path(path, lAlg[i]),
                        "--no-bfgs", "-m", model,
                        treecmd, "-p", seed, "-q", partfile, "-n",
                        paste(prefix, reps[i], sep="-"), raxmlArg)
    }
    cat(cmd, sep="\n", file=output)
    TRUE
}
fmichonneau/rsowh documentation built on May 16, 2019, 1:47 p.m.