#' A function to write multiple fasta files from a dataframe, grouping based on different variables.
#'
#' @param dataframe A DataFrame containing the \code{seq_col}, the \code{header_col} and \code{group_by_cols}.
#' @param group_by_cols A character vector with the names of the columns whose values are used to group by.
#' @param seq_col A string with the name of the column containing the fasta sequence.
#' @param header_col A string with the name of the column containing the fasta header.
#' @param toWrite A boolean, if \code{toWrite} is TRUE, fasta files are written.
#' @param path Path where files are written if \code{toWrite} is TRUE.
#'
#' @return A list of lists containing both the DataFrame grouped based on \code{group_by_cols} and the name of the file to write.
#' @export
#'
multiple_fasta_files <- function(dataframe, group_by_cols,
seq_col = "fasta_seq",
header_col = "fasta_header",
toWrite=FALSE,
path = "."){
## Check arguments
if(!is.character(group_by_cols))
stop("'group_by_cols' should be character")
if(!is.character(seq_col))
stop("'seq_col' should be character")
if(!is.character(header_col))
stop("'header_col' should be character")
if(!is.character(path))
stop("'path' should be character")
if(length(seq_col) != 1)
stop("'seq_col' should be length 1")
if(length(header_col) != 1)
stop("'header_col' should be length 1")
if(length(path) != 1)
stop("'path' should be length 1")
if(!all(group_by_cols %in% colnames(dataframe)))
stop("'group_by_cols' are not contained in dataframe")
seq_col <- rlang::sym(seq_col)
header_col <- rlang::sym(header_col)
#Comprobamos si existe el directorio principal: Por ejemplo, fascinas 1
if (toWrite & !dir.exists(file.path(path)))
{dir.create(file.path(path))}
# Iteramos a lo largo de las distintas columnas que serán subdirectorios
df_list <- purrr::map(group_by_cols, function(x){
#Comprobamos si existe el subdirectorio
if (toWrite & !dir.exists(file.path(path, x)))
{dir.create(file.path(path, x))}
#Destacamos los valores únicos de esa columna
dataframe %>%
tidyr::drop_na(x) %>%
dplyr::pull(x) %>% unique()%>%
#Creamos una lista con el dataframe y su path
purrr::map(function(y){
var_column <- rlang::sym(x)
dataframe %>%
dplyr::filter(!!var_column == y) %>%
dplyr::distinct(!!seq_col, .keep_all = TRUE)%>%
dplyr::select(!!seq_col, !!header_col) %>%
list(file.path(path, x, paste0(y, ".fasta"))) %>% return()
})}) %>% purrr::flatten() #Eliminamos un nivel de jerarquía
if (toWrite){
# Escribimos los archivos
df_list %>% purrr::walk(function(z){
seqinr::write.fasta(open = "w",
sequences = as.list(z[[1]] %>% dplyr::pull(seq_col)),
names = z[[1]] %>% dplyr::pull(header_col),
nbchar = 80, as.string = TRUE,
file.out = z[[2]])# filepath
})
}
return(df_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.