R/pairwise_aln.R

Defines functions pairwise_aln

Documented in pairwise_aln

#' @title Pairwise Global Sequence Alignments
#' @description This function takes a FASTA file containing two DNA or amino acid sequences
#' that shall be aligned and computes a paiwise alignment using a defined alignment method.
#' @param file a character string specifying the path to the file storing the sequences in FASTA format.
#' @param tool a character string specifying the program/algorithm that should be used: \code{tool} = \code{"NW"}.
#' @param seq_type a character string specifying the sequence type stored within the given FASTA file. Options are:
#' \itemize{
#' \item \code{seq_type = "protein"} (default)
#' \item \code{seq_type = "cds"}
#' \item \code{seq_type = "dna"}
#' }
#' @param get_aln a logical value indicating whether the produced alignment should be returned.
#' @param pairwise_aln_name a character string specifying the name of the stored alignment file.
#' Default is \code{pairwise_aln_name} = \code{NULL} denoting a default name: 'toolname_seq_type.aln' .
#' @param path a character string specifying the path to the pairwise alignment program (in case you don't use the default path).
#' @param quiet a logical value specifying whether a successful interface call shall be printed out.
#' @param store_locally a logical value indicating whether or not alignment files shall be stored locally rather than in \code{tempdir()}.
#' @author Sarah Scharfenberg and Hajk-Georg Drost
#' @details This function provides an interface between R and common pairwise alignment computation methods.
#'
#' The current version of this function computes pairwise alignments based on the \code{\link[Biostrings]{pairwiseAlignment}}
#' function implemented in the \pkg{Biostrings} package.
#'
#' The default pairwise alignment method is based on the \emph{Needleman-Wunsch Algorithm}.
#'
#' @examples \dontrun{
#'
#' # Needleman-Wunsch Example:
#'
#' # in case Biostrings works properly
#' pairwise_aln( file     = system.file('seqs/aa_seqs.fasta', package = 'homologr'),
#'               tool     = "NW",
#'               get_aln  = TRUE,
#'               seq_type = "protein")
#'
#'
#'
#' }
#' @return In case the argument \code{get_aln} is set \code{TRUE}, an object of class alignment of the seqinr package is returned.
#' @import Biostrings
#' @export
pairwise_aln <- function(file,
                         tool              = "NW",
                         seq_type          = "protein",
                         get_aln           = FALSE,
                         pairwise_aln_name = NULL,
                         path              = NULL,
                         quiet             = FALSE,
                         store_locally     = FALSE) {
  if (!is_pairwise_aln_tool(tool))
    stop("Please choose a tool that is supported by this function.", call. = FALSE)

  if (!is.element(seq_type, c("dna", "cds", "protein")))
    stop("Please choose a seq_type that is supported by this function.",
         call. = FALSE)

  if (is.element(seq_type, c("dna", "cds")))
    seqtype <- "DNA"

  if (seq_type == "protein")
    seqtype <- "AA"


  if (!file.exists(file.path(
    ifelse(store_locally, "homologr_alignment_files", tempdir()),
    "_alignment"
  ))) {
    dir.create(file.path(
      ifelse(store_locally, "homologr_alignment_files", tempdir()),
      "_alignment"
    ))
  }

  if (!file.exists(file.path(
    ifelse(store_locally, "homologr_alignment_files", tempdir()),
    "_alignment",
    "pairwise_aln"
  ))) {
    dir.create(file.path(
      ifelse(store_locally, "homologr_alignment_files", tempdir()),
      "_alignment",
      "pairwise_aln"
    ))
  }

  if (is.null(pairwise_aln_name)) {
    file.out <-
      file.path(
        ifelse(store_locally, "homologr_alignment_files", tempdir()),
        "_alignment",
        "pairwise_aln",
        paste0(tool, "_", seqtype, ".aln")
      )
  }

  if (!is.null(pairwise_aln_name)) {
    file.out <-
      file.path(
        ifelse(store_locally, "homologr_alignment_files", tempdir()),
        "_alignment",
        "pairwise_aln",
        paste0(pairwise_aln_name, "_", tool, "_", seqtype, ".aln")
      )
  }

  # Needleman-Wunsch using Biostrings::pairwiseAlignment( type=global)
  if (tool == "NW") {
    #read file
    if (is.element(seq_type, c("dna", "cds"))) {
      #                         not possible as is sorts the input by name
      #                         seqs <- read.cds(file=file, format=format)
      #                         names <- dt[,geneids]
      #                         seqs <- sapply(dt[,seqs] , Biostrings::DNAString)

      input <- seqinr::read.fasta(file    = file,
                                  seqtype = seqtype)

      names <- names(input)
      seqs <-
        lapply(input, function(x) {
          return(Biostrings::DNAString(seqinr::c2s(x)))
        })
    }

    if (seq_type == "protein") {
      #                         dt <- read.proteome(file=file, format=format)
      #                         names <- dt[,geneids]
      #                         seqs <- sapply(dt[,seqs] , Biostrings::AAString)

      input <-
        seqinr::read.fasta(file    = file,
                           seqtype = seqtype)

      names <- names(input)
      seqs <-
        lapply(input, function(x) {
          return(Biostrings::AAString(seqinr::c2s(x)))
        })
    }

    tryCatch({
      # align
      aln  <-
        Biostrings::pairwiseAlignment(pattern = seqs[[1]],
                                      subject = seqs[[2]],
                                      type    = "global")
    }, error = function(e) {stop("Something went wrong during the NW global alignment step in pairwise_aln(), please check whether your input sequences can be aligned with Biostrings::pairwiseAlignment().", call. = FALSE)})


    #write file -> comment Hajk: what does pattern() and subject() do in pattern(aln), subject(aln) ?
    # pattern(aln) == get aligned sequence that was set as pattern input
    # subject(aln) == get aligned sequence that was set as subject input
    seqinr::write.fasta(
      sequences = list(Biostrings::pattern(aln), IRanges::subject(aln)),
      names     = names,
      file.out  = file.out
    )

    if (store_locally) {
      local_store_path <-
        file.path("homologr_alignment_files",
                  "_pairwise_alignment_with_score")
      Biostrings::writePairwiseAlignments(aln, file.path(
        local_store_path,
        paste0(pairwise_aln_name, "_", tool, "_", seqtype, ".aln")
      ))
    } else {
      local_store_path <-
        file.path(tempdir(), "_pairwise_alignment_with_score")
      Biostrings::writePairwiseAlignments(aln, file.path(
        local_store_path,
        paste0(pairwise_aln_name, "_", tool, "_", seqtype, ".aln")
      ))
    }

    if (!quiet) {
      message("Pairwise alignment file was successfully written to ", file.out, ".")
    }

    if (get_aln)
      return(aln)
  }
}
drostlab/homologr documentation built on Sept. 28, 2020, 12:44 a.m.