R/module-repository.R

utils::globalVariables(c(
  "actualFile", "checksum.x", "checksum.y", "expectedFile", "filesize.x", "filesize.y", "result"
))

defaultGitRepoToSpaDESModules <- "PredictiveEcology/SpaDES-modules"

#' Find the latest module version from a SpaDES module repository
#'
#' Modified from <https://stackoverflow.com/a/25485782/1380598>.
#'
#' @param name  Character string giving the module name.
#'
#' @param repo  GitHub repository name, specified as `"username/repo"`.
#'              Default is `"PredictiveEcology/SpaDES-modules"`, which is
#'              specified by the global option `spades.moduleRepo`.
#'              Only `master`/`main` branches can be used at this point.
#' @param moduleFiles Optional. List of files of the `name` and `repo`. If not
#'   supplied, this function will get that information by using `checkModule`.
#' @inheritParams reproducible::Cache
#'
#' @return `numeric_version`
#'
#' @details
#' `getModuleVersion` extracts a module's most recent version by
#' looking at the module \file{.zip} files contained in the module directory.
#' It takes the most recent version, based on the name of the zip file.
#'
#' See the modules vignette for details of module directory structure
#' (<https://spades-core.predictiveecology.org/articles/ii-modules.html#module-directory-structure-modulename>),
#' and see our SpaDES-modules repo for details of module repository structure
#' (<https://github.com/PredictiveEcology/SpaDES-modules>).
#'
#' @author Alex Chubaty
#' @export
#' @inheritParams checkModule
#' @rdname getModuleVersion
#' @seealso [zipModule()] for creating module \file{.zip} folders.
#'
setGeneric("getModuleVersion", function(name, repo, token, moduleFiles = NULL,
                                        verbose = getOption("reproducible.verbose")) {
  standardGeneric("getModuleVersion")
})

#' @rdname getModuleVersion
setMethod(
  "getModuleVersion",
  signature = c(name = "character", repo = "character", token = "ANY"),
  definition = function(name, repo, token, moduleFiles = NULL,
                        verbose = getOption("reproducible.verbose")) {
    if (length(name) > 1) {
      warning("name contains more than one module. Only the first will be used.")
      name <- name[1]
    }
    if (is.null(moduleFiles))
      moduleFiles <- checkModule(name, repo, token = token, verbose = verbose)
    zipFiles <- grep(paste0(name, "_+.+.zip"), moduleFiles, value = TRUE) # moduleName_....zip only
    zipFiles <- grep(file.path(name, "data"), zipFiles, invert = TRUE, value = TRUE) # remove any zip in data folder
    # all zip files is not correct behaviour, only
    versions <- strsplit(zipFiles, "_") |>
      unlist() |>
      grep("[.]zip$", x = _, value = TRUE) |>
      strsplit("[.]zip$") |>
      unlist() |>
      as.numeric_version()
    currentVersion <- sort(versions, decreasing = TRUE)[1]

    return(currentVersion)
})

#' @rdname getModuleVersion
setMethod("getModuleVersion",
          signature = c(name = "character", repo = "missing", token = "ANY"),
          definition = function(name, token, moduleFiles = NULL,
                                verbose = getOption("reproducible.verbose")) {
            v <- getModuleVersion(name, token = token,
                                  getOption("spades.moduleRepo", defaultGitRepoToSpaDESModules),
                                  moduleFiles = moduleFiles, verbose = verbose)
            return(v)
})

#' Check for the existence of a remote module
#'
#' Looks in the remote `repo` for a module named `name`.
#'
#' @param name  Character string giving the module name.
#'
#' @param repo  GitHub repository name.
#'              Default is `"PredictiveEcology/SpaDES-modules"`, which is
#'              specified by the global option `spades.moduleRepo`.
#'
#' @param token A github repository token as from `gitcreds::gitcreds_get()`
#'
#' @return a character vector of module file paths (invisibly).
#'
#' @author Eliot McIntire and Alex Chubaty
#' @export
#' @inheritParams paramCheckOtherMods
#' @importFrom cli col_magenta
#' @importFrom utils packageVersion
#' @rdname checkModule
setGeneric("checkModule", function(name, repo, token, verbose) {
  standardGeneric("checkModule")
})

#' @rdname checkModule
setMethod(
  "checkModule",
  signature = c(name = "character", repo = "character", token = "ANY"),
  definition = function(name, repo, token, verbose = getOption("reproducible.verbose")) {
    goAhead <- FALSE
    if (requireNamespace("httr", quietly = TRUE)) {
      if (packageVersion("httr") >= "1.2.1") {
        goAhead <- TRUE
      }}
    if (goAhead) {

      if (length(name) > 1) {
        warning("name contains more than one module. Only the first will be used.")
        name <- name[1]
      }
      apiurl <- paste0("https://api.github.com/repos/", repo, "/git/trees/master?recursive=1") # nolint

      ua <- httr::user_agent(getOption("spades.useragent"))
      if (missing(token))
        token <- getGitCredsToken()
      request <- if (!is.null(token)) {
        GETWauthThenNonAuth(apiurl, token = token, verbose = verbose)
        # httr::GET(apiurl, ua)
      } else {
        pat <- Sys.getenv("GITHUB_PAT")
        message(cli::col_magenta("Using GitHub PAT from envvar GITHUB_PAT", sep = ""))
        httr::GET(apiurl, ua, config = list(httr::config(token = pat)))
      }

      httr::stop_for_status(request)
      allFiles <- unlist(lapply(httr::content(request)$tree, "[", "path"), use.names = FALSE)
      moduleFiles <- grep(paste0("^modules/", name), allFiles, value = TRUE)
      if (length(moduleFiles) == 0) {
        moduleFiles <- grep(paste0("^", name), allFiles, value = TRUE)
        if (length(moduleFiles) == 0) {
          agrep(name, allFiles, max.distance = 0.25, value = TRUE,
                ignore.case = FALSE) |>
            strsplit(split = "/") |>
            lapply(function(x) x[2]) |>
            unique() |>
            unlist() |>
            paste(collapse = ", ") |>
            stop("Module ", name, " does not exist in the repository. ",
                 "Did you mean: ", ., "?")
        }
      }
    } else {
      stop("checkModule does not work without httr package: ",
              "install.packages('httr')")
    }
    return(invisible(moduleFiles))
})

#' @rdname checkModule
setMethod("checkModule",
          signature = c(name = "character", repo = "missing", token = "ANY"),
          definition = function(name, token, verbose = getOption("reproducible.verbose")) {
            v <- checkModule(name, getOption("spades.moduleRepo",
                                             defaultGitRepoToSpaDESModules),
                             verbose = verbose)
            return(v)
})

################################################################################
#' Check for the existence of a module locally
#'
#' Looks the module path for a module named `name`, and checks for existence
#' of all essential module files listed below.
#'
#' \itemize{
#'   \item \file{data/CHECKSUMS.txt}
#'   \item \file{name.R}
#' }
#'
#' @param name  Character string giving the module name.
#'
#' @param path  Local path to modules directory.
#'              Default is specified by the global option `spades.modulePath`.
#'
#' @param version Character specifying the desired module version.
#'
#' @return Logical indicating presence of the module (invisibly).
#'
#' @author Alex Chubaty
#' @export
#' @rdname checkModuleLocal
#'
setGeneric("checkModuleLocal", function(name, path, version) {
  standardGeneric("checkModuleLocal")
})

#' @rdname checkModuleLocal
setMethod(
  "checkModuleLocal",
  signature = c(name = "character", path = "character", version = "character"),
  definition = function(name, path, version) {
    if (length(name) > 1) {
      warning("name contains more than one module. Only the first will be used.")
      name <- name[1]
    }

    essentialFiles <- file.path(path, name, c(paste0(name, ".R")))

    moduleFiles <- file.path(path, name) |>
      list.files(full.names = TRUE, recursive = TRUE) |>
      unlist(use.names = FALSE)

    result <- FALSE
    # check whether any module files exist locally
    if (length(moduleFiles > 0)) {
      # check all essential files exist locally
      if (all(essentialFiles %in% moduleFiles)) {
        # check that local module version matches that desired
        # if desired version is NA then we need to download most recent version
        if (!is.na(version)) {
          v <- .parseModulePartial(filename = file.path(path, name, paste0(name, ".R")),
                                   defineModuleElement = "version")
          if (isTRUE(length(v) > length(name))) {
            v <- v[names(v) %in% name]
          }
          result <- ifelse(v == numeric_version(version), TRUE, FALSE)
        }
      }
    }

    return(invisible(result))
})

#' @rdname checkModuleLocal
setMethod(
  "checkModuleLocal",
  signature = c(name = "character", path = "ANY", version = "ANY"),
  definition = function(name, path, version) {
    if (missing(path)) path <- getOption("spades.modulePath")
    if (missing(version)) version <- NA_character_

    result <- checkModuleLocal(name, path, version)
    return(invisible(result))
})

################################################################################
#' Download a module from a SpaDES module GitHub repository
#'
#' Download a .zip file of the module and extract (unzip) it to a user-specified location.
#'
#' Currently only works with GitHub repositories where modules are located in
#' a `modules` directory in the root tree on the `master` branch.
#' Module .zip files' names should contain the version number and be inside their
#' respective module folders (see [zipModule()] for zip compression of modules).
#'
#' @note `downloadModule` uses the `GITHUB_PAT` environment variable
#' if a value is set. This alleviates 403 errors caused by too-frequent downloads.
#' Generate a GitHub personal access token with no additional permissions at
#' <https://github.com/settings/tokens>, and add this key to \file{.Renviron}
#' as `GITHUB_PAT=<your-github-pat-here>`.
#'
#' @note The default is to overwrite any existing files in the case of a conflict.
#'
#' @seealso [zipModule()] for creating module .zip folders.
#'
#' @inheritParams getModuleVersion
#'
#' @param path    Character string giving the location in which to save the
#'                downloaded module.
#'
#' @param version The module version to download. (If not specified, or `NA`,
#'                the most recent version will be retrieved.)
#'
#' @param data    Logical. If `TRUE`, then the data that is identified in the
#'                module metadata will be downloaded, if possible. Default `FALSE`.
#'
#' @param quiet   Logical. This is passed to `download.file` (default `FALSE`).
#'
#' @param quickCheck Logical. If `TRUE`, then the check with local data will only
#'                   use `file.size` instead of `digest::digest`.
#'                   This is faster, but potentially much less robust.
#'
#' @param overwrite Logical. Should local module files be overwritten in case they exist?
#'                  Default `FALSE`.
#'
#' @return A list of length 2. The first element is a character vector containing
#'    a character vector of extracted files for the module. The second element is
#'    a `tbl` with details about the data that is relevant for the function,
#'    including whether it was downloaded or not, and whether it was renamed
#'    (because there was a local copy that had the wrong file name).
#'
#' @author Alex Chubaty and Eliot McIntire
#' @export
#' @rdname downloadModule
setGeneric("downloadModule", function(name, path, version, repo, data, quiet,
                                      quickCheck = FALSE, overwrite = FALSE,
                                      verbose = getOption("reproducible.verbose")) {
  standardGeneric("downloadModule")
})

#' @rdname downloadModule
#' @importFrom reproducible checkPath
#' @importFrom utils unzip zip
#' @importFrom data.table setDF rbindlist
setMethod(
  "downloadModule",
  signature = c(name = "character", path = "character", version = "character",
                repo = "character", data = "logical", quiet = "logical",
                quickCheck = "ANY", overwrite = "logical"),
  definition = function(name, path, version, repo, data, quiet, quickCheck,
                        overwrite, verbose = getOption("reproducible.verbose")) {
    if (requireNamespace("httr", quietly = TRUE)) {
      path <- checkPath(path, create = TRUE)
      checkPath(file.path(path, name), create = TRUE)

      # check locally for module. only download if doesn't exist locally,
      # or if overwrite is wanted
      if (!checkModuleLocal(name, path, version) | overwrite) {
        # check remotely for module
        # Authentication
        token <- NULL
        usesGitCreds <- requireNamespace("gitcreds", quietly = TRUE) &&
          requireNamespace("httr", quietly = TRUE)
        if (usesGitCreds) {
          token <- getGitCredsToken()
        }

        moduleFiles <- checkModule(name, repo, token = token, verbose = verbose)
        if (is.na(version))
          version <- getModuleVersion(name, repo, token = token, moduleFiles = moduleFiles,
                                      verbose = verbose)

        innerPaths <- c(paste0("/master/modules/", name, "/"), "/master/")
        for (tries in 1:2) {
          innerPath <- innerPaths[tries]

          zip <- paste0("https://raw.githubusercontent.com/", repo,
                        innerPath, name, "_", version, ".zip") # nolint
          localzip <- file.path(path, basename(zip))

          ua <- httr::user_agent(getOption("spades.useragent"))
          request <- if (!is.null(token)) {
            message(cli::col_magenta("Using GitHub token stored with gitcreds", sep = ""))
            GETWauthThenNonAuth(zip, # ua, httr::write_disk(localzip, overwrite = overwrite),
                                          token = token)
          } else {
            pat <- Sys.getenv("GITHUB_PAT")
            message(cli::col_magenta("Using GitHub PAT from envvar GITHUB_PAT", sep = ""))
            httr::GET(zip, ua, config = list(httr::config(token = pat)),
                      httr::write_disk(localzip, overwrite = overwrite))
          }
          status1 <- try(httr::stop_for_status(request), silent = TRUE)
          if (!is(status1, "try-error")) break
          if (is(status1, "try-error") && tries == 2) stop(status1)
        }

        dataFromGET <- httr::content(request, "raw")
        zipfile <- tempfile(fileext = ".zip")
        writeBin(dataFromGET, zipfile)
        on.exit(unlink(zipfile))
        linkOrCopy(zipfile, localzip, symlink = FALSE, overwrite = overwrite)
        files <- unzip(localzip, exdir = file.path(path), overwrite = overwrite)
      } else {
        files <- list.files(file.path(path, name))
      }

      # after download, check for childModules that also require downloading
      files2 <- list()
      children <- .parseModulePartial(filename = file.path(path, name, paste0(name, ".R")),
                                      defineModuleElement = "childModules")
      childVersions <- .parseModulePartial(filename = file.path(path, name, paste0(name, ".R")),
                                           defineModuleElement = "version")

      dataList2 <- data.frame(result = character(0), expectedFile = character(0),
                              actualFile = character(0), checksum.x = character(0),
                              checksum.y = character(0), algorithm.x = character(0),
                              algorithm.y = character(0),
                              stringsAsFactors = FALSE)
      dataList3 <- dataList2
      if (!is.null(children)) {
        if (all(nzchar(children) & !is.na(children)) && length(children)) {
          tmp <- lapply(children, function(x) {
            f <- if (!is.null(childVersions[[x]])) {
              downloadModule(x, path = path, repo = repo, data = data, version = childVersions[[x]],
                             quickCheck = quickCheck, overwrite = overwrite)
            } else {
              downloadModule(x, path = path,  repo = repo, data = data, quickCheck = quickCheck,
                             overwrite = overwrite)
            }
            files2 <<- append(files2, f[[1]])
            dataList2 <<- setDF(rbindlist(list(dataList2, f[[2]]), use.names = TRUE, fill = TRUE))
          })
        }
      }

      if (data) {
        moduleFilename <- file.path(path, name, paste0(name, ".R"))
        inputs <- .parseModulePartial(filename = moduleFilename,
                                      defineModuleElement = "inputObjects")
        urls <- inputs$sourceURL
        objNames <- if (is.call(inputs$objectName)) {
          unlist(lapply(tail(parse(text = inputs$objectName), length(urls)), function(x) deparse(x)))
        } else {
          inputs$objectName
        }
        names(urls) <- objNames

        children <- .parseModulePartial(filename = moduleFilename,
                                        defineModuleElement = "childModules")

        dataList <- downloadData(module = name, path = path, quiet = quiet,
                                 quickCheck = quickCheck, urls = urls, children = children)
      } else {
        dataList <- checksums(module = name, path = path, quickCheck = quickCheck)
      }
      message(cli::col_magenta("Download complete for module ", name,
                               " (v", version, " at '", path,"').", sep = ""))
    } else{
      stop("downloadModule does not work without httr package: ",
           "install.package('httr')")
    }

    return(list(c(files, files2),
                setDF(rbindlist(list(dataList, dataList2), use.names = TRUE, fill = TRUE))))
  })

#' @rdname downloadModule
setMethod(
  "downloadModule",
  signature = c(name = "character", path = "missing", version = "missing",
                repo = "missing", data = "missing", quiet = "missing",
                quickCheck = "ANY", overwrite = "ANY"),
  definition = function(name, quickCheck, overwrite) {
    path <- checkModulePath()
    files <- downloadModule(name, path = path,
                            version = NA_character_,
                            repo = getOption("spades.moduleRepo",
                                             defaultGitRepoToSpaDESModules),
                            data = FALSE, quiet = FALSE,
                            quickCheck = quickCheck, overwrite = overwrite)
    return(invisible(files))
  })

#' @rdname downloadModule
setMethod(
  "downloadModule",
  signature = c(name = "character", path = "ANY", version = "ANY",
                repo = "ANY", data = "ANY", quiet = "ANY", quickCheck = "ANY",
                overwrite = "ANY"),
  definition = function(name, path, version, repo, data, quiet, quickCheck,
                        overwrite) {
    if (missing(path)) path <- getOption("spades.modulePath")
    if (missing(version)) version <- NA_character_
    if (missing(repo)) repo <- getOption("spades.moduleRepo",
                                         defaultGitRepoToSpaDESModules)
    if (missing(data)) data <- FALSE
    if (missing(quiet)) quiet <- FALSE
    if (missing(quickCheck)) quickCheck <- FALSE
    if (missing(overwrite)) overwrite <- FALSE

    files <- downloadModule(name, path, version, repo, data, quiet, quickCheck, overwrite)
    return(invisible(files))
})

Try the SpaDES.core package in your browser

Any scripts or data that you put into this service are public.

SpaDES.core documentation built on Jan. 11, 2026, 9:06 a.m.