#' @title Pairwise Global Sequence Alignments
#' @description This function takes a FASTA file containing two DNA or amino acid sequences
#' that shall be aligned and computes a paiwise alignment using a defined alignment method.
#' @param file a character string specifying the path to the file storing the sequences in FASTA format.
#' @param tool a character string specifying the program/algorithm that should be used: \code{tool} = \code{"NW"}.
#' @param seq_type a character string specifying the sequence type stored within the given FASTA file. Options are:
#' \itemize{
#' \item \code{seq_type = "protein"} (default)
#' \item \code{seq_type = "cds"}
#' \item \code{seq_type = "dna"}
#' }
#' @param get_aln a logical value indicating whether the produced alignment should be returned.
#' @param pairwise_aln_name a character string specifying the name of the stored alignment file.
#' Default is \code{pairwise_aln_name} = \code{NULL} denoting a default name: 'toolname_seq_type.aln' .
#' @param path a character string specifying the path to the pairwise alignment program (in case you don't use the default path).
#' @param quiet a logical value specifying whether a successful interface call shall be printed out.
#' @param store_locally a logical value indicating whether or not alignment files shall be stored locally rather than in \code{tempdir()}.
#' @author Sarah Scharfenberg and Hajk-Georg Drost
#' @details This function provides an interface between R and common pairwise alignment computation methods.
#'
#' The current version of this function computes pairwise alignments based on the \code{\link[Biostrings]{pairwiseAlignment}}
#' function implemented in the \pkg{Biostrings} package.
#'
#' The default pairwise alignment method is based on the \emph{Needleman-Wunsch Algorithm}.
#'
#' @examples \dontrun{
#'
#' # Needleman-Wunsch Example:
#'
#' # in case Biostrings works properly
#' pairwise_aln( file = system.file('seqs/aa_seqs.fasta', package = 'homologr'),
#' tool = "NW",
#' get_aln = TRUE,
#' seq_type = "protein")
#'
#'
#'
#' }
#' @return In case the argument \code{get_aln} is set \code{TRUE}, an object of class alignment of the seqinr package is returned.
#' @import Biostrings
#' @export
pairwise_aln <- function(file,
tool = "NW",
seq_type = "protein",
get_aln = FALSE,
pairwise_aln_name = NULL,
path = NULL,
quiet = FALSE,
store_locally = FALSE) {
if (!is_pairwise_aln_tool(tool))
stop("Please choose a tool that is supported by this function.", call. = FALSE)
if (!is.element(seq_type, c("dna", "cds", "protein")))
stop("Please choose a seq_type that is supported by this function.",
call. = FALSE)
if (is.element(seq_type, c("dna", "cds")))
seqtype <- "DNA"
if (seq_type == "protein")
seqtype <- "AA"
if (!file.exists(file.path(
ifelse(store_locally, "homologr_alignment_files", tempdir()),
"_alignment"
))) {
dir.create(file.path(
ifelse(store_locally, "homologr_alignment_files", tempdir()),
"_alignment"
))
}
if (!file.exists(file.path(
ifelse(store_locally, "homologr_alignment_files", tempdir()),
"_alignment",
"pairwise_aln"
))) {
dir.create(file.path(
ifelse(store_locally, "homologr_alignment_files", tempdir()),
"_alignment",
"pairwise_aln"
))
}
if (is.null(pairwise_aln_name)) {
file.out <-
file.path(
ifelse(store_locally, "homologr_alignment_files", tempdir()),
"_alignment",
"pairwise_aln",
paste0(tool, "_", seqtype, ".aln")
)
}
if (!is.null(pairwise_aln_name)) {
file.out <-
file.path(
ifelse(store_locally, "homologr_alignment_files", tempdir()),
"_alignment",
"pairwise_aln",
paste0(pairwise_aln_name, "_", tool, "_", seqtype, ".aln")
)
}
# Needleman-Wunsch using Biostrings::pairwiseAlignment( type=global)
if (tool == "NW") {
#read file
if (is.element(seq_type, c("dna", "cds"))) {
# not possible as is sorts the input by name
# seqs <- read.cds(file=file, format=format)
# names <- dt[,geneids]
# seqs <- sapply(dt[,seqs] , Biostrings::DNAString)
input <- seqinr::read.fasta(file = file,
seqtype = seqtype)
names <- names(input)
seqs <-
lapply(input, function(x) {
return(Biostrings::DNAString(seqinr::c2s(x)))
})
}
if (seq_type == "protein") {
# dt <- read.proteome(file=file, format=format)
# names <- dt[,geneids]
# seqs <- sapply(dt[,seqs] , Biostrings::AAString)
input <-
seqinr::read.fasta(file = file,
seqtype = seqtype)
names <- names(input)
seqs <-
lapply(input, function(x) {
return(Biostrings::AAString(seqinr::c2s(x)))
})
}
tryCatch({
# align
aln <-
Biostrings::pairwiseAlignment(pattern = seqs[[1]],
subject = seqs[[2]],
type = "global")
}, error = function(e) {stop("Something went wrong during the NW global alignment step in pairwise_aln(), please check whether your input sequences can be aligned with Biostrings::pairwiseAlignment().", call. = FALSE)})
#write file -> comment Hajk: what does pattern() and subject() do in pattern(aln), subject(aln) ?
# pattern(aln) == get aligned sequence that was set as pattern input
# subject(aln) == get aligned sequence that was set as subject input
seqinr::write.fasta(
sequences = list(Biostrings::pattern(aln), IRanges::subject(aln)),
names = names,
file.out = file.out
)
if (store_locally) {
local_store_path <-
file.path("homologr_alignment_files",
"_pairwise_alignment_with_score")
Biostrings::writePairwiseAlignments(aln, file.path(
local_store_path,
paste0(pairwise_aln_name, "_", tool, "_", seqtype, ".aln")
))
} else {
local_store_path <-
file.path(tempdir(), "_pairwise_alignment_with_score")
Biostrings::writePairwiseAlignments(aln, file.path(
local_store_path,
paste0(pairwise_aln_name, "_", tool, "_", seqtype, ".aln")
))
}
if (!quiet) {
message("Pairwise alignment file was successfully written to ", file.out, ".")
}
if (get_aln)
return(aln)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.