R/AllPairwiseAlign.R

Defines functions AllPairwiseAlign

Documented in AllPairwiseAlign

#' @title Compute all pairwise (global) alignments with VSEARCH
#' @description This function is a wrapper function to compute all pairwise (global) alignments using VSEARCH.
#' @param file path to fasta file storing sequences for which all possible pairwise alignments shall be computed.
#' @param cores number of cores that shall be used for parallel computations.
#' @param mask shall the aligned sequences be maksed? Options are: \code{mask = "none"}, \code{mask = "dust"}, \code{mask = "soft"}. 
#' @param out.name name of the output files (\code{*.uc}, \code{*_userout.txt}, \code{*.blast6out}), \code{*_fasta_pairs.fasta}, \code{*.sam}.
#' @param output path to a folder in which output shall be stored.
#' @author Hajk-Georg Drost
#' @details 
#' To be able to use this function the VSEARCH command line tool needs to be installed.
#' @return
#' A folder named \code{vsearch_pairwise_align} will be created and the following files will be stored in this output folder:
#' 
#' \itemize{
#' \item \code{*.uc} USEARCH cluster format generated by VSEARCH storing the sequence cluster information of aligned sequences.
#' \item \code{*_userout.txt} a tab separated file storing the alignment information in the columns: \code{query}, \code{target}, \code{sequence identity (ID)}.
#' \item \code{*.blast6out} BLAST output format of the pairwise (global) sequence alignments generated by VSEARCH.
#' \item \code{*_fasta_pairs.fasta} fasta file storing the alignments.
#' \item \code{*.sam} SAM file storing the alignments.
#' }
#' 
#'   
#' @references https://github.com/torognes/vsearch
#' @export

AllPairwiseAlign <- function(file, 
                      cores      = 1,
                      mask       = "none",
                      out.name   = "AllPairwiseAlign",
                      output     = NULL){
    
    if (!file.exists(file))
        stop ("AllPairwiseAlign: The file '",file,"' does not exist! Please check the path to cluster input file.", call. = FALSE)
    
    if (!is.element(mask, c("none","dust","soft")))
        stop ("Please specify either mask = 'none',  mask = 'dust', or mask = 'soft'.")
    
    if (is.null(output))
        output <- getwd()
    
    if (parallel::detectCores() < cores)
        stop ("You specified more cores than are available on this machine. Please fix...", call. = FALSE)
    
    if (!file.exists(file.path(output,"vsearch_pairwise_align")))
        dir.create(file.path(output,"vsearch_pairwise_align"))
    
    system(paste0("vsearch --allpairs_global ", ws.wrap.path(file), " \ ",
                  "--acceptall \ ",
                  "--dbmask ", mask ," \ ",
                  "--uc ", ws.wrap.path(file.path(output,"vsearch_pairwise_align", paste0(out.name,".uc"))), " \ ",
                  "--fastapairs ", ws.wrap.path(file.path(output,"vsearch_pairwise_align",paste0(out.name,"_fasta_pairs.fasta"))), " \ ",
                  "--userfields query+target+id \ ",
                  "--userout ", ws.wrap.path(file.path(output,"vsearch_pairwise_align",paste0(out.name,"_userout.txt"))), " \ ",
                  "--threads ", cores, " \ ",
                  "--samheader \ ",
                  "--samout ", ws.wrap.path(file.path(output,"vsearch_pairwise_align",paste0(out.name,".sam"))), " \ ",
                  "--blast6out ", ws.wrap.path(file.path(output,"vsearch_pairwise_align",paste0(out.name,".blast6out"))), " \ "))
    
    cat("AllPairwiseAlign output has been stored in: ",file.path(output,"vsearch_pairwise_align"))
    cat("\n")
    cat("\n")
    res.file <- readr::read_tsv(file.path(output,"vsearch_pairwise_align", paste0(out.name,".uc")), col_names = FALSE)
    colnames(res.file) <- c("query", "target", "id") 
    
    return (res.file)
}
HajkD/LTRpred documentation built on April 22, 2022, 4:35 p.m.