inst/extdata/microhum/metaproteome/GNT+21/download_HOMD.R

# microhum/metaproteome/GNT+21/download_HOMD.R
# Download selected faa files from Human Oral Microbome Database
# 20221105 jmd

# REQUIRED FILE:
# TableS9-S12_Majority_Protein_IDs.txt
#   - Majority protein IDs extracted from SI Tables of Granato et al. (2021)
#   - Only lists IDs starting with SEQF (for HOMD database)

# Get bacterial protein IDs from SI Tables of GNT+21
dat <- readLines("TableS9-S12_Majority_Protein_IDs.txt")
## All IDs
#IDs <- unlist(strsplit(dat, ";"))
# First IDs
IDs <- sapply(strsplit(dat, ";"), "[", 1)

# Get unique organism IDs
orgs <- unique(sapply(strsplit(IDs, "_"), "[", 1))
# Skip already downloaded files
orgs <- orgs[!orgs %in% gsub(".faa", "", dir())]
# Download faa files
for(org in orgs) {
  URL <- paste0("https://homd.org/ftp/genomes/PROKKA/current/faa/", org, ".faa")
  cmd <- paste("wget", URL)
  print(cmd)
  system(cmd)
}
jedick/JMDplots documentation built on April 12, 2025, 1:35 p.m.