#' To find the scores between strings using Biostrings scores
#'
#' @param sequences a list of character vectors, the sequences we are analysing
#' @param similarity_percentage TRUE if we want a \% score, not an absolute score
#' @return A dist object of the pairwise distances between the sequences
#' @export
stringdist_biostrings <- function(sequences, similarity_percentage = FALSE)
{
if(length(sequences) == 1) {
if(!similarity_percentage) {
return(Biostrings::pairwiseAlignment(paste(sequences[[1]], collapse=""),
paste(sequences[[1]], collapse=""),
scoreOnly=TRUE))
}
else {
return(Biostrings::pid(
Biostrings::pairwiseAlignment(paste(sequences[[1]], collapse=""),
paste(sequences[[1]], collapse="")),
"PID4"))
}
}
dist_matrix <- as.matrix(dist(1:length(sequences)))
for(i in 1:length(sequences))
{
for(j in i:length(sequences))
{
if(!similarity_percentage) {
score_i_j <- Biostrings::pairwiseAlignment(paste(sequences[[i]], collapse=""),
paste(sequences[[j]], collapse=""),
scoreOnly=TRUE)
dist_matrix[i, j] <- -score_i_j
dist_matrix[j, i] <- -score_i_j
}
else {
score_i_j <- Biostrings::pid(
Biostrings::pairwiseAlignment(paste(sequences[[i]], collapse=""),
paste(sequences[[j]], collapse="")),
"PID4")
dist_matrix[i, j] <- score_i_j
dist_matrix[j, i] <- score_i_j
}
}
}
return(as.dist(dist_matrix))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.