data-raw/hmmer_Q16658.R

## code to prepare `hmmer_Q16658` dataset goes here
# Leemos todos los archivos .json y .fa que nos descargamos de HMMER. Para ello
# hacemos uso de las funciones `Biostrings::readAAStringSet` y de la función
# propia `create_hmmer_json_dataframe`.

library(FascinRSCA)
path <- "data-raw/hmmer_Q16658/"
files_json <- purrr::map_chr(dir(path, pattern = "*.json"), ~paste0(path, .x))
dataset_names <- c("Ensembl", "RefProteomes", "SwissProt")
files_fasta <- purrr::map_chr(dir(path, pattern = "*fullseq.fa"), ~paste0(path, .x))

df <- FascinRSCA::create_hmmer_json_dataframe(files_json,
  dataset_names, desc_null = TRUE) %>%
  dplyr::select(-c(domains, seqs, pdbs)) %>%
  dplyr::distinct() %>%
  dplyr::arrange(file_label)
# We load the fasta file
fasta_seq <- Biostrings::readAAStringSet(files_fasta)

# Changue order manually some entries
purrr::walk2(c(38, 112, 313, 636),
             c(39, 113, 314, 637),
             function(x, y){
               temp <- df[y,]
               df[y,] <<- df[x,]
               df[x,] <<- temp
             })

if (all(df$acc == names(fasta_seq))){
  df <- df %>% dplyr::mutate(fasta_header = paste(df$acc,df$acc2, sep = "|") %>%
                   make.unique(sep = '_'),
                 fasta_seq =  as.character(fasta_seq))
  hmmer_Q16658 <- df
  usethis::use_data(hmmer_Q16658, overwrite = TRUE)
}
currocam/FascinRSCA documentation built on March 21, 2022, 6:29 a.m.