#' @title Import Sequences
#'
#' @description Imports a file containing ID and sequences to a dataframe.
#'
#' @param file The name of the input file containing the sequence alignment.
#' Works with .nex, .fasta and .phy files.
#'
#' @return A dataframe containing the sequence identifiers as the first column
#' and the sequence data in the second column.
#'
#'
#' @export
import_sequences <- function(file) {
## Import .nex file ##
import_nex <- function(file) {
# Read file and comvert to dataframe
dataframe <- as.data.frame(scan(file, character(), quiet = TRUE))
# Find where the sequences start
start.line <- grep("matrix", dataframe[, 1], ignore.case = TRUE) + 1
# Find where the sequences end
end.line <- grep("end", dataframe[start.line:length(dataframe[, 1]), 1], ignore.case = TRUE)[1] - 3 + start.line
# Length of the sequence data
l <- end.line - start.line
# Find the number of sequences
n.tax <- utils::type.convert(gsub(";", "", gsub("ntax=", "", dataframe[grep("ntax=", dataframe[, 1], ignore.case = TRUE),
], ignore.case = TRUE)))
# ID of the sequences
seq.name <- dataframe[(1:(n.tax)) * 2 + start.line - 2, ]
# Create a list of all the sequences
seq.text <- list()
for (n in 1:n.tax) {
sequence <- paste(dataframe[(1:((l + 1)/(2 * n.tax))) * 2 * n.tax - (2 * n.tax - start.line + 1 - 2 *
n), ], collapse = "")
seq.text <- rbind(seq.text, sequence)
}
# Combine ID and sequences into a data frame
nex_data <- suppressWarnings(cbind.data.frame(seq.name, seq.text))
# Return data frame
nex_data
}
# import fasta files
import_fasta <- function(file) {
phylotools::read.fasta(file, clean_name = FALSE)
}
# import phylic files
import_phy <- function(file) {
phylotools::read.phylip(file, clean_name = FALSE)
}
# Find the apporpraite function for the filetype
if (grepl("*.nex$", file) == TRUE) {
data <- import_nex(file)
} else {
if (grepl("*.fasta$", file) == TRUE) {
data <- import_fasta(file)
} else {
if (grepl("*.phy$", file) == TRUE) {
data <- import_phy(file)
} else {
stop("error")
}
}
}
if (length(unique(nchar(as.vector(data[, 2])))) != 1)
stop("Inconsistent sequence lengths")
# remove apostrophes from names
data[, 1] <- gsub("'", "", data[, 1])
# remove NA from end of line
data[, 2] <- gsub("NA$", "", data[, 2])
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.