R/stringdist_biostrings.R

#' To find the scores between strings using Biostrings scores
#'
#' @param sequences a list of character vectors, the sequences we are analysing
#' @param similarity_percentage TRUE if we want a \% score, not an absolute score
#' @return A dist object of the pairwise distances between the sequences
#' @export

stringdist_biostrings <- function(sequences, similarity_percentage = FALSE)
{
  if(length(sequences) == 1) {
    if(!similarity_percentage) {
      return(Biostrings::pairwiseAlignment(paste(sequences[[1]], collapse=""),
                                           paste(sequences[[1]], collapse=""),
                                           scoreOnly=TRUE))
    }
    else {
      return(Biostrings::pid(
               Biostrings::pairwiseAlignment(paste(sequences[[1]], collapse=""),
                                             paste(sequences[[1]], collapse="")),
               "PID4"))
    }
  }
  dist_matrix <- as.matrix(dist(1:length(sequences)))

  for(i in 1:length(sequences))
  {
    for(j in i:length(sequences))
    {
      if(!similarity_percentage) {
        score_i_j <- Biostrings::pairwiseAlignment(paste(sequences[[i]], collapse=""),
                                                           paste(sequences[[j]], collapse=""),
                                                           scoreOnly=TRUE)
        dist_matrix[i, j] <-  -score_i_j
        dist_matrix[j, i] <-  -score_i_j
      }
      else {
        score_i_j <- Biostrings::pid(
                       Biostrings::pairwiseAlignment(paste(sequences[[i]], collapse=""),
                                                           paste(sequences[[j]], collapse="")),
                       "PID4")
        dist_matrix[i, j] <-  score_i_j
        dist_matrix[j, i] <-  score_i_j
      }
    }
  }
  return(as.dist(dist_matrix))
}
sams25/rcombinator_old documentation built on May 28, 2019, 8:40 a.m.