Nothing
#' Read FASTA amino acids file into a dataframe
#'
#' This function reads a FASTA amino acids file into a dataframe
#'
#' @export read_faa
#'
#' @note This function was adapted from `read.fasta.R` by Jinlong Zhang (jinlongzhang01@@gmail.com) for the phylotools package (http://github.com/helixcn/phylotools)
#'
#' @param file file path to the FASTA format file containing the protein sequences
#'
#' @return Dataframe containing the sequence name (seq_name) and sequence (seq_aa) columns
#'
#' @examples
#'
#' read_faa(system.file("extdata/bat_protein.fasta", package = "ampir"))
#'
#' ## Output
#' # seq_name seq_aa
#' # [1] G1P6H5_MYOLU MALTVRIQAACLLLLLLASLTSYSL....
read_faa <- function (file = NULL) {
faa_lines <- readLines(file)
### get sequence names
seq_name_index <- grep(">", faa_lines)
seq_name <- gsub(">", "", faa_lines[seq_name_index])
### get sequence
seq_aa_start_index <- seq_name_index + 1
seq_aa_end_index <- c(seq_name_index, length(faa_lines)+1)[-1]-1
seq_aa <- rep(NA, length(seq_name_index))
### replace NA content with actual sequence content, and concatenate the lines
for(i in seq_along(seq_name_index)){
seq_aa_start <- seq_aa_start_index[i]
seq_aa_end <- seq_aa_end_index[i]
seq_aa[i] <- gsub("[[:space:]]", "",
paste(faa_lines[seq_aa_start:seq_aa_end],
collapse = ""))
}
data.frame(seq_name, seq_aa, stringsAsFactors = FALSE)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.