## code to prepare `df_Q16658` dataset goes here
# NCBI taxonomic INFO
df <- FascinRSCA::hmmer_Q16658
hmmer_Q16658_tax <- FascinRSCA::create_ncbi_basic_tax_dataframe(as.character(df$taxid))%>%
dplyr::right_join(df, by = "taxid")%>%
dplyr::select(-species.x)%>%
dplyr::mutate(species = species.y)%>%
dplyr::select(-species.y)
# EMBOSS Pepstats
path_pepstats <- "data-raw/pepstats_Q16658/"
fasta_seq <- hmmer_Q16658_tax %>%
dplyr::pull(fasta_seq) %>%
Biostrings::AAStringSet()
names(fasta_seq) <- df$fasta_header
fasta_seq[1:500] %>%
Biostrings::writeXStringSet(paste0(path_pepstats,
'fascin_homologous_1_500.fasta'))
fasta_seq[501:length(fasta_seq)] %>%
Biostrings::writeXStringSet(paste0(path_pepstats,
'fascin_homologous_501_860.fasta'))
write(hmmer_Q16658_tax$fasta_header, paste0(path_pepstats, "id_pepstats.txt"))
# Use pepstats EMBOSS
# Concatenate files
# cat fascin_homologous_* > fascin_homologous_1_860.pepstats
# Call python script
# python3 parser_pepstats.py -i fascin_homologous_1_860.pepstats -n ../id_pepstats.txt -o fascin_homologous_1_860 --csv
pepstats_Q16658 <- hmmer_Q16658_tax %>%
dplyr::left_join(by = c("fasta_header" = "names"), readr::read_csv(paste0(path_pepstats,
"EMBOSS/fascin_homologous_1_860.csv")))
if (!all(nchar(pepstats_Q16658$fasta_seq) == pepstats_Q16658$residue_n)){
stop()
}
# UniProt Query
UniProt_results <- FascinRSCA::annotation_uniprot_query(pepstats_Q16658$acc, pepstats_Q16658$acc2) %>%
dplyr::select("Entry", "Entry name", "Status",
"Protein names", "Gene names",
"Organism","Length", "acc", "acc2")
df_Q16658 <- UniProt_results %>%
dplyr::distinct(acc, acc2, .keep_all = TRUE) %>%
dplyr::right_join(pepstats_Q16658, by = c("acc" = "acc","acc2" = "acc2"))
print(str(df_Q16658))
usethis::use_data(df_Q16658, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.