#' @title Cluster Sequences with VSEARCH
#' @description Cluster putative LTR transposons predicted by \code{\link{LTRpred}} using VSEARCH.
#' @param file path to predicted LTR transposons generated by \code{\link{LTRpred}} (in fasta format).
#' @param similarity reject if sequence similarity is lower than this threshold.
#' @param strand cluster using plus or both strands.
#' @param cores number of cores that shall be used for parallel computations.
#' @param out.name name of the output files (\code{*.uc}, \code{*.log}, \code{*.blast6out}).
#' @param output path to a folder in which output shall be stored.
#' @author Hajk-Georg Drost
#' @details
#' To be able to use this function the VSEARCH command line tool needs to be installed.
#' @return
#' First the following files generated by VSEARCH are stored in the output folder (default: \code{\link{getwd}}):
#'
#' \itemize{
#' \item \code{*.uc} USEARCH cluster format generated by VSEARCH storing the sequence cluster information of the input LTR transposons.
#' \item \code{*.log} a log file of the VSEARCH run.
#' \item \code{*.blast6out} BLAST output generated by VSEARCH storing the BLAST hit information of the input LTR transposons.
#' }
#'
#'
#' A USEARCH cluster format (*.uc file extension) table (see \code{\link{read.uc}} for specifications).
#' @references https://github.com/torognes/vsearch
#' @export
CLUSTpred <- function(file,
similarity = 0.9,
strand = "both",
cores = 1,
out.name = paste0(basename(file), "_CLUSTpred"),
output = NULL){
test_installation_vsearch()
message("Running CLUSTpred with ", similarity * 100, "% as sequence similarity threshold using ", cores, " cores ...")
if (!file.exists(file))
stop(
"CLUSTpred: The file '",
file,
"' does not exist! Please check the path to cluster input file.",
call. = FALSE
)
if (!is.element(strand, c("both", "plus")))
stop("Please specify either strand = 'both' or strand = 'plus'.")
if (is.null(output))
output <- getwd()
if (parallel::detectCores() < cores)
stop("You specified more cores than are available on this machine. Please fix...",
call. = FALSE)
system(
paste0(
"vsearch --cluster_fast ",
ws.wrap.path(file),
" \ ",
"--qmask none \ ",
"--uc ",
ws.wrap.path(file.path(output, paste0(out.name, ".uc"))),
" \ ",
"--id ",
similarity,
" \ ",
"--threads ",
cores,
" \ ",
"--clusterout_sort \ ",
"--clusterout_id \ ",
"--strand ",
strand,
" \ ",
"--log ",
ws.wrap.path(file.path(output, paste0(out.name, ".log"))),
" \ ",
"--blast6out ",
ws.wrap.path(file.path(output, paste0(
out.name, ".blast6out"
))),
" \ ",
"--sizeout"
)
)
message("CLUSTpred output has been stored in: ", output)
uc.file <-
read.uc(uc.file = file.path(output, paste0(out.name, ".uc")))
return(uc.file)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.