inst/scripts/make-data.R

#!/usr/bin/env Rscript
###############################################################################
#
# Script to generate Bioconductor resources for all EuPathDB databases
#
###############################################################################
source("config.R")

info("Initializing EuPathDB Package Creation...")
info("Downloading EuPathDB metadata...")
meta <- download_eupathdb_metadata(bioc_version = bioc_version, build_dir = build_dir,
                                   overwrite = TRUE, webservice = "eupathdb",
                                   eupathdb_version = eupathdb_version, write_csv = TRUE,
                                   verbose = TRUE)

info("Finished downloading metadata.")

num_total <- nrow(meta$valid)

info(sprintf("Found: %d valid organisms.)", num_total))
warn(sprintf("Ignoring %d organisms with insufficient taxonomic information:", nrow(meta$invalid)))

for (x in meta$invalid$TaxonUnmodified) {
  warn("  ", x)
}

all_metadata <- meta$valid

for (i in 1:nrow(all_metadata)) {
  entry <- all_metadata[i, ]
  species <- entry$Species

  info("")
  info("================================================================================")
  info("=")
  info(sprintf("= Starting processing of %s %s (%d / %d)", species, entry$Strain, i, num_total))
  info(sprintf("= %s", Sys.time()))
  info("=")
  info("================================================================================")
  info("")

  pkgnames <- get_eupathdb_pkgnames(entry)

  if (bsgenome) {
    info("Creating BSGenome for ", species)

    bsgenome_result <- make_eupathdb_bsgenome(entry, eupathdb_version = eupathdb_version, workdir = build_dir, copy_s3 = TRUE)


library(testthat)
webservice <- "eupathdb"
meta <- download_eupath_metadata(overwrite=TRUE, webservice=webservice,
                                 verbose=TRUE, write_csv=TRUE)
all_metadata <- meta[["valid"]]
end <- nrow(all_metadata)

start <- 1
for (it in start:end) {
  entry <- all_metadata[it, ]
  species <- entry[["Species"]]
  message("Starting generation of ", species, ", which is ", it, " of ", end, " species.")
  pkgnames <- get_eupath_pkgnames(entry)
  if (isTRUE(bsgenome)) {
    bsgenome_result <- make_eupath_bsgenome(entry, copy_s3=TRUE)

  if (orgdb) {
    #
    # some harmless warnings are likely to be encountered.
    #
    # Warning arises from AnnotationForge::makeOrgPackage():
    #
    #   In result_fetch(res@ptr, n = n):
    #   SQL statements must be issued with dbExecute() or dbSendStatement() instead of dbGetQuery() or dbSendQuery().
    #   Calls: make_eupathdb_orgdb ... .createMetadataTables -> dbGetQuery -> dbGetQuery -> .local -> dbFetch -> dbFetch -> .local -> result_fe
    #
    # Warning generated by post_eupathdb_raw():
    #
    #   In post_eupathdb_raw(entry, question = question, parameters = parameters,  ... :                                                █
    #   The provided species does not have a table of weights.
    #
    # The above warning will occur whenever an API request fails. Sometimes this is
    # expected when building packages for organisms with insufficient data available on
    # EuPathDB.
    #
    orgdb_pkgname <- make_eupathdb_orgdb(entry, workdir = build_dir, copy_s3 = TRUE)

    if (is.null(orgdb_pkgname)) {
      warn("Unable to create the OrgDB package: ", entry$OrgdbFile)
    } else {
      expected <- pkgnames[["orgdb"]]
      testthat::test_that("Does make_eupathdb_orgdb return something sensible?", {
                          testthat::expect_equal(expected, orgdb_pkgname)
      })
      results[["orgdb"]][[species]] <- orgdb_pkgname
    }
  }

    ##
    ## Create TxDb
    ##
    if (txdb) {
    txdb_pkgname <- make_eupathdb_txdb(entry, workdir = build_dir, eupathdb_version = eupathdb_version, copy_s3 = TRUE)

    if (is.null(txdb_pkgname)) {
      warn("Unable to create the txdb package: ", entry$TxdbFile)
    } else {
      expected <- pkgnames[["txdb"]]
      testthat::test_that("Does make_eupathdb_txdb return something sensible?", {
                            testthat::expect_equal(expected, txdb_pkgname)
      })
      results[["txdb"]][[species]] <- txdb_pkgname
    }
  }

  #
  # Create OrganismDb
  #
  if (organismdb) {
    info("Creating OrganismDb for ", species)

    organ_result <- make_eupathdb_organismdbi(entry, workdir = build_dir, eupathdb_version = eupathdb_version, copy_s3 = TRUE)
    actual <- organ_result[["organismdb_name"]]
    expected <- pkgnames[["organismdbi"]]
    testthat::test_that("Does make_eupathdb_organismdbi return something sensible?", {
                          testthat::expect_equal(expected, actual)
    })
    results[["organismdbi"]] <- organ_result
  }
} ## End iterating over every entry in the eupathdb metadata.

info("Finished generating EuPathDB packages")

## check_csv checks each metadata csv file to see that the files exist.
## check_files checks the list of files in each directory to see that they all have
## entries in the csv.

#
# BSGenome (check)
#
if (bsgenome) {
  info("Creating AnnotationHubMetadata for ", species, " (BSGenome)")

  bs_csv <- check_csv(build_dir, file_type = "BSgenome", bioc_version =
                      bioc_version, eupathdb_version = eupathdb_version)
  bs_files <- check_files("BSgenome", bioc_version = bioc_version,
                          eupathdb_version = eupathdb_version)
  csv_copy_path <- file.path(path.package("EuPathDB"), "inst", "extdata", bs_csv)

  copied <- file.copy(bs_csv, csv_copy_path)
  testthat::expect_true(copied)

  bs_checked <- AnnotationHubData::makeAnnotationHubMetadata(path.package("EuPathDB"), bs_csv)
  save(list = c("bs_checked"), file = file.path(build_dir, "bsgenome_metadata.rda"))
}

#
# OrgDb (check)
#
if (orgdb) {
  info("Creating AnnotationHubMetadata for ", species, " (OrgDb)")

  org_csv <- check_csv(build_dir, file_type = "OrgDb",
                       bioc_version = bioc_version,
                       eupathdb_version = eupathdb_version)
  org_files <- check_files("OrgDb", bioc_version = bioc_version,
                           eupathdb_version = eupathdb_version)

  csv_copy_path <- file.path(path.package("EuPathDB"), "inst", "extdata", org_csv)
  copied <- file.copy(org_csv, csv_copy_path)
  testthat::expect_true(copied)

  org_checked <- AnnotationHubData::makeAnnotationHubMetadata(path.package("EuPathDB"), org_csv)
  save(list = c("org_checked"), file = file.path(build_dir, "orgdb_metadata.rda"))
}

#
# TxDb (check)
#
if (txdb) {
  info("Creating AnnotationHubMetadata for ", species, " (TxDb)")

  txdb_csv <- check_csv(build_dir, file_type = "TxDb", bioc_version =
                        bioc_version, eupathdb_version = eupathdb_version)
  tx_files <- check_files("TxDb", bioc_version = bioc_version, eupathdb_version
                          = eupathdb_version)
  csv_copy_path <- file.path(path.package("EuPathDB"), "inst", "extdata", txdb_csv)
  copied <- file.copy(txdb_csv, csv_copy_path)
  testthat::expect_true(copied)
  tx_checked <- AnnotationHubData::makeAnnotationHubMetadata(path.package("EuPathDB"), txdb_csv)
  save(list = c("tx_checked"), file = file.path(build_dir, "txdb_metadata.rda"))
}


#
# OrganismDb (check)
#
if (organismdb) {
  info("Creating AnnotationHubMetadata for ", species, " (OrganismDb)")

  organ_csv <- check_csv(build_dir, file_type = "OrganismDbi", bioc_version = bioc_version, eupathdb_version = eupathdb_version)
  organ_files <- check_files("OrganismDbi", bioc_version = bioc_version, eupathdb_version = eupathdb_version)
  csv_copy_path <- file.path(path.package("EuPathDB"), "inst", "extdata", organ_csv)
  copied <- file.copy(organ_csv, csv_copy_path)
  testthat::expect_true(copied)
  organ_checked <- AnnotationHubData::makeAnnotationHubMetadata(path.package("EuPathDB"), organ_csv)
  save(list = c("organ_checked"), file = file.path(build_dir, "organismdb_metadata.rda"))
}

#
# GRanges (check)
#
if (granges) {
  info("Creating AnnotationHubMetadata for ", species, " (GRanges)")

  grange_csv <- check_csv(build_dir, file_type = "GRanges", bioc_version = bioc_version, eupathdb_version = eupathdb_version)
  grange_files <- check_files("GRanges", bioc_version = bioc_version, eupathdb_version = eupathdb_version)
  csv_copy_path <- file.path(path.package("EuPathDB"), "inst", "extdata", grange_csv)
  copied <- file.copy(grange_csv, csv_copy_path)
  testthat::expect_true(copied)
  grange_checked <- AnnotationHubData::makeAnnotationHubMetadata(path.package("EuPathDB"), grange_csv)
  save(list = c("grange_checked"), file = file.path(build_dir, "granges_metadata.rda"))
}

info("Finished EuPathDB Package Creation.")
khughitt/EuPathDB documentation built on Nov. 4, 2023, 4:19 a.m.