#### author: Jinlong Zhang <jinlongzhang01@gmail.com>
#### institution: Kadoorie Farm and Botanic Garden, Hong Kong
#### package: phylotools
#### URL: http://github.com/helixcn/phylotools
#### date: 26 MAY 2015
read.fasta <- function(file = NULL, clean_name = FALSE) {
fasta.content <- readLines(file)
### find and process the names of the sequence.
### all the punctuated symbols will be replaced with _
seq.name.index <- grep(">", fasta.content)
seq.name <- gsub(">", "", fasta.content[seq.name.index])
if (clean_name) {
seq.name <- gsub("^[_]+|[_]+$| ", "", gsub(
"^[[:space:]]+|[[:space:]]+$",
"", gsub("[[:punct:]]", "_", seq.name)
))
}
### obtain the sequence
seq.start.index <- seq.name.index + 1
seq.end.index <- c(seq.name.index, length(fasta.content) + 1)[-1] - 1
seq.text <- rep(NA, length(seq.name.index))
### find the starting and ending of each sequence, and concantented the lines.
for (i in 1:length(seq.name.index)) {
seq.start <- seq.start.index[i]
seq.end <- seq.end.index[i]
seq.text[i] <- gsub(
"[[:space:]]", "",
paste(fasta.content[seq.start:seq.end],
collapse = ""
)
)
}
res <- data.frame(seq.name, seq.text)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.