#' @title Compute all pairwise (global) alignments with VSEARCH
#' @description This function is a wrapper function to compute all pairwise (global) alignments using VSEARCH.
#' @param file path to fasta file storing sequences for which all possible pairwise alignments shall be computed.
#' @param cores number of cores that shall be used for parallel computations.
#' @param mask shall the aligned sequences be maksed? Options are: \code{mask = "none"}, \code{mask = "dust"}, \code{mask = "soft"}.
#' @param out.name name of the output files (\code{*.uc}, \code{*_userout.txt}, \code{*.blast6out}), \code{*_fasta_pairs.fasta}, \code{*.sam}.
#' @param output path to a folder in which output shall be stored.
#' @author Hajk-Georg Drost
#' @details
#' To be able to use this function the VSEARCH command line tool needs to be installed.
#' @return
#' A folder named \code{vsearch_pairwise_align} will be created and the following files will be stored in this output folder:
#'
#' \itemize{
#' \item \code{*.uc} USEARCH cluster format generated by VSEARCH storing the sequence cluster information of aligned sequences.
#' \item \code{*_userout.txt} a tab separated file storing the alignment information in the columns: \code{query}, \code{target}, \code{sequence identity (ID)}.
#' \item \code{*.blast6out} BLAST output format of the pairwise (global) sequence alignments generated by VSEARCH.
#' \item \code{*_fasta_pairs.fasta} fasta file storing the alignments.
#' \item \code{*.sam} SAM file storing the alignments.
#' }
#'
#'
#' @references https://github.com/torognes/vsearch
#' @export
AllPairwiseAlign <- function(file,
cores = 1,
mask = "none",
out.name = "AllPairwiseAlign",
output = NULL){
if (!file.exists(file))
stop ("AllPairwiseAlign: The file '",file,"' does not exist! Please check the path to cluster input file.", call. = FALSE)
if (!is.element(mask, c("none","dust","soft")))
stop ("Please specify either mask = 'none', mask = 'dust', or mask = 'soft'.")
if (is.null(output))
output <- getwd()
if (parallel::detectCores() < cores)
stop ("You specified more cores than are available on this machine. Please fix...", call. = FALSE)
if (!file.exists(file.path(output,"vsearch_pairwise_align")))
dir.create(file.path(output,"vsearch_pairwise_align"))
system(paste0("vsearch --allpairs_global ", ws.wrap.path(file), " \ ",
"--acceptall \ ",
"--dbmask ", mask ," \ ",
"--uc ", ws.wrap.path(file.path(output,"vsearch_pairwise_align", paste0(out.name,".uc"))), " \ ",
"--fastapairs ", ws.wrap.path(file.path(output,"vsearch_pairwise_align",paste0(out.name,"_fasta_pairs.fasta"))), " \ ",
"--userfields query+target+id \ ",
"--userout ", ws.wrap.path(file.path(output,"vsearch_pairwise_align",paste0(out.name,"_userout.txt"))), " \ ",
"--threads ", cores, " \ ",
"--samheader \ ",
"--samout ", ws.wrap.path(file.path(output,"vsearch_pairwise_align",paste0(out.name,".sam"))), " \ ",
"--blast6out ", ws.wrap.path(file.path(output,"vsearch_pairwise_align",paste0(out.name,".blast6out"))), " \ "))
cat("AllPairwiseAlign output has been stored in: ",file.path(output,"vsearch_pairwise_align"))
cat("\n")
cat("\n")
res.file <- readr::read_tsv(file.path(output,"vsearch_pairwise_align", paste0(out.name,".uc")), col_names = FALSE)
colnames(res.file) <- c("query", "target", "id")
return (res.file)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.