# microhum/metaproteome/GNT+21/download_HOMD.R
# Download selected faa files from Human Oral Microbome Database
# 20221105 jmd
# REQUIRED FILE:
# TableS9-S12_Majority_Protein_IDs.txt
# - Majority protein IDs extracted from SI Tables of Granato et al. (2021)
# - Only lists IDs starting with SEQF (for HOMD database)
# Get bacterial protein IDs from SI Tables of GNT+21
dat <- readLines("TableS9-S12_Majority_Protein_IDs.txt")
## All IDs
#IDs <- unlist(strsplit(dat, ";"))
# First IDs
IDs <- sapply(strsplit(dat, ";"), "[", 1)
# Get unique organism IDs
orgs <- unique(sapply(strsplit(IDs, "_"), "[", 1))
# Skip already downloaded files
orgs <- orgs[!orgs %in% gsub(".faa", "", dir())]
# Download faa files
for(org in orgs) {
URL <- paste0("https://homd.org/ftp/genomes/PROKKA/current/faa/", org, ".faa")
cmd <- paste("wget", URL)
print(cmd)
system(cmd)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.