#' Conversion of (mothur) fasta files to a dataframe
#'
#' @param fastaFile standard format DNA (not RNA/protein) fasta file
#' @param mothurrepseqs boolean indicating wether or not the file contains mothur-formatted headers for each sequence as generated by get.oturep. Defaults to TRUE.
#' @param striparg string to be kept as headers in more complex headers (allows for regex), ignored in case mothurrepseqs=TRUE (default)
#' @importFrom Biostrings readDNAStringSet
#' @examples
#' ## Short example
#'
#' # Load precomputed example data
#' #TODO: add export option
#'
#' @export
fasta2dataframe <- function(fastaFile,mothurrepseqs=TRUE,striparg="Otu"){
ffread <- readDNAStringSet(fastaFile)
SeqIDs <- names(ffread)
if(mothurrepseqs==TRUE){
SeqIDs <- sub(".*(Otu[0-9]+).*","\\1",SeqIDs)
}else{
if(striparg!="Otu"){
replchar <- paste0(".*(",striparg,").+")
SeqIDs <- sub(replchar,"\\1",SeqIDs)
}else{
SeqIDs <- sub(".*(Otu[0-9]+).*","\\1",SeqIDs)
}
}
readseq <- paste(ffread)
SeqID_seq<-data.frame(SeqIDs,readseq)
return(SeqID_seq)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.