R/packAlign.R

Defines functions packAlign

Documented in packAlign

#' @title
#' Global Alignment with VSEARCH
#'
#' @description
#' A global pairwise alignment of pack-TYPE elements by 
#' sequence similarity. mIt may be useful to run 
#' \code{\link{packClust}} to identify groups of similar 
#' transposable elements, before generating alignments of 
#' each group.
#'
#' @param packMatches
#' A dataframe of potential Pack-TYPE transposable elements, 
#' in the format given by \code{\link{packSearch}}. This 
#' dataframe is in the format produced by coercing a 
#' \code{link[GenomicRanges:GRanges-class]{GRanges}} 
#' object to a dataframe: \code{data.frame(GRanges)}. 
#' Will be saved as a FASTA file for VSEARCH.
#'
#' @param Genome
#' A DNAStringSet object containing sequences referred to in 
#' \code{packMatches} (the object originally used to predict 
#' the transposons \code{\link{packSearch}}).
#'
#' @param identity
#' The sequence identity of two transposable elements in 
#' \code{packMatches} required to be grouped into a cluster.
#'
#' @param threads
#' The number of threads to be used by VSEARCH.
#'
#' @param identityDefinition 
#' The pairwise identity definition used by VSEARCH. 
#' Defaults to 2, the standard VSEARCH definition.
#'
#' @param saveFolder
#' The folder to save saveFolder files (uc, blast6out, FASTA)
#'
#' @param vSearchPath
#' When the package is run on windows systems, the 
#' location of the VSEARCH executable file must be 
#' given; this should be left as default on 
#' Linux/MacOS systems.
#'
#' @param maxWildcards
#' The maximal allowable proportion of wildcards in the 
#' sequence of each match (defaults to \code{0.05}).
#'
#' @return
#' Saves alignment information, including a \code{uc}, 
#' \code{blast6out} and a pairwise alignment \code{fasta} 
#' file, to the specified location. Returns the uc summary 
#' file generated by the alignment.
#' 
#' @note
#' In order to align sequences using VSEARCH, the 
#' executable file must first be installed.
#'
#' @seealso
#' \code{\link{tirClust}}, \code{\link{packClust}}, 
#' \code{\link{readBlast}}, \code{\link{readUc}},
#' \code{\link{filterWildcards}}, \code{\link{packSearch}}
#' 
#' 
#' @references
#' VSEARCH may be downloaded from 
#' \url{https://github.com/torognes/vsearch}, along with a 
#' manual documenting the program's parameters. See 
#' \url{https://www.ncbi.nlm.nih.gov/pubmed/27781170} for 
#' further information.
#' 
#' @examples 
#' data(arabidopsisThalianaRefseq)
#' data(packMatches)
#' 
#' # packAlign run on a Linux/MacOS system
#' \dontrun{
#'     packAlign(packMatches, Genome)
#' }
#' 
#' # packAlign run on a Windows system
#' \dontrun{
#'     packAlign(packMatches, Genome, 
#'             vSearchPath = "path/to/vsearch/vsearch.exe")
#' }
#' 
#' @author
#' Jack Gisby
#' 
#' @export

packAlign <- function(packMatches, Genome, identity = 0, threads = 1, 
                    identityDefinition = 2, maxWildcards = 0.05, saveFolder,
                    vSearchPath = "vsearch") {
    
    if (is.null(saveFolder)) {
        message("Results will be saved in the working directory: ", getwd())
        saveFolder <- getwd()
    }
    
    clustTest(saveFolder, threads, identity, strand = NULL, vSearchPath, 
            identityDefinition, type = "packAlign")

    packMatches <- 
        filterWildcards(packMatches, Genome, maxWildcards = maxWildcards)

    packMatchesFile <- file.path(saveFolder, "packMatches.fasta")
    packMatchesSet <- getPackSeqs(packMatches, Genome, output = "DNAStringSet")
    names(packMatchesSet) <- as.character(rownames(packMatches))
    Biostrings::writeXStringSet(packMatchesSet, packMatchesFile)

    system2(command = vSearchPath, args = paste0(
        "--allpairs_global ", packMatchesFile, " \ ", "--id ", identity, " \ ", 
        "--output_no_hits \ ", "--iddef ", identityDefinition, " \ ", 
        "--threads ", threads, " \ ",
        "--uc ", file.path(saveFolder, "vSearchPairwiseAlignment.uc"), " \ ",
        "--fastapairs ", file.path(saveFolder, "alignmentPairs.fasta"), " \ ",
        "--blast6out ", file.path(saveFolder, 
                                "vSearchPairwiseAlignment.blast6out"), " \ ")
    )

    return(readUc(file.path(saveFolder, "vSearchPairwiseAlignment.uc"), 
                            output = "alignment"))
}
jackgisby/packFinder documentation built on July 19, 2022, 2:25 a.m.