R/Rhisat2.R

Defines functions hisat2_version hisat2_usage hisat2_build_usage hisat2 hisat2_build

Documented in hisat2 hisat2_build hisat2_build_usage hisat2_usage hisat2_version

#' Generate HISAT2 index
#'
#' This function can be used to call the \code{hisat2-build} binary.
#'
#' All additional arguments in \code{...} are interpreted as additional
#' arguments to the HISAT2 binaries. Any flags are supposed to be represented as
#' logical values (e.g., \code{quiet=TRUE} will be translated into
#' \code{--quiet}). Parameters with additional input are supposed to be
#' character or numeric vectors, and the individual elements are collapsed into
#' a single comma-separated string (e.g., \code{k=2} is translated into \code{-k
#' 2}, \code{bmax=100} into \code{--bmax 100}). Some arguments to the HISAT2
#' binaries will be ignored if they are already handled as explicit function
#' arguments. See the output of \code{hisat2_build_usage()} for details about
#' available parameters.
#'
#' @param references Character vector. The path to the files containing the
#'   reference sequences from which to build the HISAT2 index.
#' @param outdir Character scalar. The path to the output directory in which to
#'   store the HISAT2 index. If the directory already exists, the function will
#'   throw an error, unless \code{force=TRUE}.
#' @param prefix Character scalar. The prefix to use for the HISAT2 index files.
#' @param force Logical scalar. Whether to force overwriting of \code{outdir}.
#' @param execute Logical scalar. Whether to execute the assembled shell
#'   command. If FALSE, return a string with the command.
#' @param strict Logical scalar. Whether strict checking of input arguments
#'   should be enforced.
#' @param ... Additional arguments passed to the binaries.
#'
#' @author Charlotte Soneson, based on code from Florian Hahne.
#'
#' @export
#'
#' @references
#' Kim D, Langmead B and Salzberg SL. HISAT: a fast spliced aligner with low
#' memory requirements. Nature Methods 12:357-360 (2015).
#'
#' @return If \code{execute} is TRUE, the output generated by calling the
#'   \code{hisat2-build} binary. If \code{execute} is FALSE, the
#'   \code{hisat2-build} command.
#'
#' @examples
#' tmp <- tempdir()
#' refs <- list.files(system.file(package="Rhisat2", "extdata/refs"),
#'                    full.names=TRUE, pattern="\\.fa$")
#' x <- hisat2_build(references=refs, outdir=file.path(tmp, "index"),
#'                   force=TRUE)
#' head(x)
#' list.files(file.path(tmp, "index"))
#'
hisat2_build <- function(references, outdir, ..., prefix="index",
                         force=FALSE, strict=TRUE, execute=TRUE) {
    if (strict && (!is.character(references) ||
                   !all(file.exists(references)))) {
        stop("Argument 'references' has to be a character vector of names ",
             "of existing fasta files for building the sequence index.")
    }
    if (strict && (!is.character(outdir) || length(outdir) != 1)) {
        stop("Argument 'outdir' must be a character scalar giving the output ",
             "directory to store the HISAT2 indices in.")
    }
    if (strict && (file.exists(outdir) && !force)) {
        stop("Directory '", outdir, "' exists. Use 'force=TRUE' to overwrite.")
    }
    dir.create(outdir, recursive=TRUE, showWarnings=FALSE)
    indexPrefix <- shQuote(path.expand(file.path(outdir, prefix)))

    args <- sprintf("%s %s %s", .createFlags(list(...)),
                    paste(shQuote(path.expand(references)), collapse=","),
                    indexPrefix)

    return(invisible(.hisat2Bin("hisat2-build", args, execute=execute)))
}

#' Align reads with HISAT2
#'
#' The function can be used to call the \code{hisat2} binary.
#'
#' All additional arguments in \code{...} are interpreted as additional
#' arguments to the HISAT2 binaries. Any flags are supposed to be represented as
#' logical values (e.g., \code{quiet=TRUE} will be translated into
#' \code{--quiet}). Parameters with additional input are supposed to be
#' character or numeric vectors, and the individual elements are collapsed into
#' a single comma-separated string (e.g., \code{k=2} is translated into \code{-k
#' 2}, \code{bmax=100} into \code{--bmax 100}). Some arguments to the HISAT2
#' binaries will be ignored if they are already handled as explicit function
#' arguments. See the output of \code{hisat2_usage()} for details about
#' available parameters.
#'
#' @param sequences If \code{type} is \code{single}, a character vector of file
#'   names if the additional argument \code{c} is FALSE, otherwise a vector of
#'   read sequences. If \code{type} is \code{paired}, a length-2 list of file
#'   names or sequences, where the first list item corresponds to the first mate
#'   pair sequences, and the second list item to the second mate pair sequences.
#' @param index Character scalar. The path+prefix of the HISAT2 index to align
#'   against (in the form \code{<path/to/index>/<prefix>}).
#' @param type Character scalar, either "single" or "paired". If \code{single},
#'   the input sequences are interpreted as single-end reads. If \code{paired},
#'   they are supposed to be paired reads.
#' @param outfile (optional) Character scalar. The path to the output file. If
#'   missing, the alignments will be returned as an R character vector.
#' @param force Logical scalar. Whether to force overwriting of \code{outdir}.
#' @param execute Logical scalar. Whether to execute the assembled shell
#'   command. If FALSE, return a string with the command.
#' @param strict Logical scalar. Whether strict checking of input arguments
#'   should be enforced.
#' @param ... Additional arguments passed to the binaries.
#'
#' @author Charlotte Soneson, based on code from Florian Hahne.
#'
#' @export
#'
#' @references
#' Kim D, Langmead B and Salzberg SL. HISAT: a fast spliced aligner with low
#' memory requirements. Nature Methods 12:357-360 (2015).
#'
#' @return If \code{execute} is TRUE, the output generated by calling the
#'   \code{hisat2} binary. If \code{execute} is FALSE, the \code{hisat2}
#'   command.
#'
#' @examples
#' tmp <- tempdir()
#' refs <- list.files(system.file("extdata/refs", package="Rhisat2"),
#'                    full.names=TRUE, pattern="\\.fa$")
#' hisat2_build(references=refs, outdir=file.path(tmp, "index"),
#'              force=TRUE, prefix="index")
#' reads <- list.files(system.file("extdata/reads", package="Rhisat2"),
#'                     full.names=TRUE, pattern="\\.fastq$")
#' hisat2(sequences=as.list(reads), index=file.path(tmp, "index/index"),
#'        type="paired", outfile=file.path(tmp, "out.sam"), force=TRUE)
#'
hisat2 <- function(sequences, index, ...,
                   type=c("single", "paired"), outfile,
                   force=FALSE, strict=TRUE, execute=TRUE) {
    type <- match.arg(type)
    args <- list(...)

    ## The read files and the index  are given by the 'sequences' and 'index'
    ## arguments, so don't specify them separately.
    args <- args[setdiff(names(args), c("1", "2", "U", "x"))]

    ## Input sequences given directly as string?
    seqIn <- !is.null(args[["c"]]) && args[["c"]]
    seqArg <- ""
    if (strict) {
        seqArg <- switch(
            type,
            single={
                if (!is.character(sequences) ||
                    (!seqIn && !all(file.exists(sequences)))) {
                    stop("Argument 'sequences' has to be a character vector ",
                         "of filenames to align against the HISAT2 index or ",
                         "a character of read sequences if the additional ",
                         "argument c==TRUE.")
                }
                paste("-U", paste(shQuote(path.expand(sequences)),
                                  collapse=","))
            },
            paired={
                if (!is.list(sequences) || length(sequences) != 2) {
                    stop("Argument 'sequences' must be a list of length 2.")
                }
                tmp <- NULL
                for (i in seq_len(2)) {
                    if (!is.character(sequences[[i]]) ||
                       (!seqIn && !all(file.exists(sequences[[i]])))) {
                        stop("Argument 'sequences[[", i, "]]' has to be a ",
                             "character vector of filenames to align ",
                             "against the HISAT2 index or a character of read ",
                             "sequences if the additional argument c==TRUE.")
                    }
                    tmp <- paste(tmp,  "-", i, " ",
                                 paste(shQuote(path.expand(sequences[[i]])),
                                       collapse=","),
                                 " ", sep="")
                }
                ## Remove trailing whitespaces
                gsub(" +$", "", tmp)
            })

        if (!is.character(index) || !file.exists(dirname(index))) {
            stop("Argument 'index' has to be a character scalar giving ",
                 "the path to the index directory.")
        }
    }
    outfile <- if (!missing(outfile)) {
        if (strict && (!is.character(outfile) || length(outfile) != 1)) {
            stop("Argument 'outfile' must be a character scalar giving the ",
                 "output file name to store the HISAT2 alignments in.")
        }
        if (strict && (file.exists(outfile) && !force)) {
            stop("File '", outfile, "' exists. Use 'force=TRUE' to overwrite.")
        }
        sprintf("-S %s", shQuote(path.expand(outfile)))
    } else {
        ""
    }

    args <- sprintf("%s -x %s %s %s", .createFlags(args),
                    shQuote(path.expand(index)), seqArg, outfile)

    return(invisible(.hisat2Bin("hisat2", args, execute=execute)))
}

#' Print usage of hisat2-build
#'
#' @author Charlotte Soneson
#'
#' @export
#'
#' @return No value is returned, the usage of \code{hisat2_build} is printed to
#'   the console.
#'
#' @examples
#' hisat2_build_usage()
hisat2_build_usage <- function() {
    print(.hisat2Bin(bin="hisat2-build", args="--help", execute=TRUE))
}

#' Print usage of hisat2
#'
#' @author Charlotte Soneson
#'
#' @export
#'
#' @return No value is returned, the usage of \code{hisat2} is printed to the
#'   console.
#'
#' @examples
#' hisat2_usage()
hisat2_usage <- function() {
    print(.hisat2Bin(bin="hisat2", args="--help", execute=TRUE))
}

#' Print HISAT2 version
#'
#' @author Charlotte Soneson
#'
#' @export
#'
#' @return No value is returned, the version information for \code{hisat2} is
#'   printed to the console.
#'
#' @examples
#' hisat2_version()
hisat2_version <- function() {
    print(.hisat2Bin(bin="hisat2", args="--version", execute=TRUE))
}

Try the Rhisat2 package in your browser

Any scripts or data that you put into this service are public.

Rhisat2 documentation built on Nov. 8, 2020, 5:49 p.m.