# returns the environment variable that stores the auth token
get_authtoken_envvar <- function(host) {
stopifnot(is_non_empty_char(host))
token_mapping <- getOption("staged.dependencies.token_mapping")
if (!host %in% names(token_mapping)) {
stop("unknown host ", host, ", please set the package option staged.dependencies.token_mapping")
}
token_mapping[[host]]
}
# url for `git clone`
get_repo_url <- function(repo, host) {
stopifnot(
is_non_empty_char(repo),
is_non_empty_char(host)
)
file.path(host, paste0(repo, ".git"))
}
# gets the currently checked out branch
get_current_branch <- function(git_repo) {
git2r::repository_head(git_repo)$name
}
# checks that all branches start with "<<remote_name>>/" (or staged_dep_tag_ which is the name of a branch where
# staged_dep has previously checked out a version where ref = <<tag_name>>)
check_only_remote_branches <- function(git_repo, remote_name) {
all_branches <- names(git2r::branches(git_repo))
stopifnot(all(vapply(all_branches, function(x) startsWith(x, paste0(remote_name, "/")) || startsWith(x, "staged_dep_tag_"),
logical(1))))
}
# clones the repo and only keeps remote branches
# if repo is already there, fetches and prunes (removes) remote branches that are
# no longer there
# select_ref_rule is a function that is given the available refs
# and selects one of them
# if must_work is TRUE then error is thrown if repo is not accessible, if FALSE then warning is thrown
# verbose level: 0: none, 1: print high-level git operations, 2: print git clone detailed messages etc.
# returns: list of repo_dir and checked out branch/ref (according to branch rule)
checkout_repo <- function(repo_dir, repo_url, select_ref_rule, token_envvar = NULL, must_work = FALSE) {
stopifnot(
is.function(select_ref_rule),
endsWith(repo_url, ".git")
)
creds <- if (is.null(token_envvar)) {
NULL
} else {
git2r::cred_token(token = token_envvar)
}
if (!dir.exists(repo_dir)) {
stopifnot(is_non_empty_char(repo_url))
message_if_verbose("clone ", repo_url, " to directory ", repo_dir, required_verbose = 2)
message_if_verbose("clone ", repo_url, " to cache directory...",
required_verbose = 1, is_equal = TRUE
)
cloned_repo <- tryCatch(
{
git_repo <- git2r::clone(
url = repo_url, local_path = repo_dir,
credentials = creds, progress = verbose_sd_get() >= 2
)
},
error = function(e) {
# catch some common errors
# only do this when cloning because the API calls introduce quite some time overhead
host <- paste(utils::head(strsplit(repo_url, "/", fixed = TRUE)[[1]], -2), collapse = "/")
repo <- paste(utils::tail(strsplit(repo_url, "/", fixed = TRUE)[[1]], 2), collapse = "/")
repo <- substr(repo, start = 0, stop = nchar(repo) - nchar(".git"))
if (!identical(httr::status_code(httr::HEAD(host)), 200L)) {
stop("Host ", host, " not reachable")
}
notification_function <- if (must_work) stop else warning
resp <- get_repo_access(repo, host, token_envvar)
if (!is.null(resp) && httr::status_code(resp) > 200) {
notification_function(
paste0(
"You cannot access ", repo, " at host ", host,
". If you expect to be able to access this repo then ",
"check that repo and token in envvar '", token_envvar,
"' are correct.\n",
"The response's content was:\n", paste(httr::content(resp), collapse = "\n"),
if (!must_work) " Staged dependencies will continue, ignoring this repository. Some packages may ",
"not be able to be installed and its package name is assumed to match repository name."
)
)
} else {
notification_function(
paste0(
"Repo ", repo, " could ",
"not be cloned. The git2r::clone error is: ", e$message,
if (!must_work) "\nStaged dependencies will continue, ignoring this repository. Some packages may ",
"not be able to be installed and its package name is assumed to match repository name."
)
)
}
return(NULL)
}
)
if (is.null(cloned_repo)) {
return(list(dir = as.character(NA), ref = as.character(NA), sha = as.character(NA), accessible = FALSE))
}
# git automatically created local tracking branch (for master or main), checkout
# corresponding remote branch and delete local branch, so we only have remote
# branches
# note: git2r::clone seems to have an argument `checkout = FALSE`, but it does not
# seem to work (it still checks out the local branch)
local_branch <- git2r::repository_head(git_repo)
remote_branch <- git2r::branch_get_upstream(local_branch)
git2r::checkout(git_repo, branch = remote_branch$name)
git2r::branch_delete(local_branch)
rm(local_branch, remote_branch)
# todo: on.exit if unsuccessful
} else {
message_if_verbose("fetch ", git2r::remote_url(repo_dir), " in directory ", repo_dir, required_verbose = 2)
message_if_verbose("fetch ", git2r::remote_url(repo_dir), " in cache directory...",
required_verbose = 1, is_equal = TRUE
)
git_repo <- git2r::repository(repo_dir)
# prune (remove) remote branches that were deleted from remote
git2r::config(git_repo, remote.origin.prune = "true")
tryCatch(
{
git2r::fetch(git_repo,
name = get_remote_name(git_repo, repo_url),
credentials = creds,
verbose = verbose_sd_get() >= 2
)
},
error = function(cond) {
warning(
"Unable to fetch from remote for ", repo_dir, " using state of repo found in cache.\n",
"Error message when trying to fetch: ", cond$message
)
}
)
}
check_only_remote_branches(git_repo, remote_name = get_remote_name(git_repo, repo_url))
available_refs <- available_references(repo_dir, remote_name = get_remote_name(git_repo, repo_url))
selected_ref <- select_ref_rule(available_refs)
if (attr(selected_ref, "type") == "branch") {
if (!selected_ref %in% available_refs$ref[available_refs$type == "branch"]) {
stop("ref ", selected_ref, " is unavailable for this repo")
}
branch <- paste0(get_remote_name(git_repo, repo_url), "/", selected_ref)
message_if_verbose(" - checkout branch ", branch, " in directory ", repo_dir, required_verbose = 2)
message_if_verbose(" - checkout branch ", branch, " in cache directory...",
required_verbose = 1, is_equal = TRUE
)
git2r::checkout(git_repo, branch = branch, force = TRUE)
} else if (attr(selected_ref, "type") == "tag") {
message_if_verbose(" - checkout tag ", selected_ref, " in directory ", repo_dir, required_verbose = 2)
message_if_verbose(" - checkout tag ", selected_ref, " in cache directory...",
required_verbose = 1, is_equal = TRUE
)
git2r::branch_create(
commit = git2r::commits(repo = repo_dir, ref = selected_ref, n = 1)[[1]],
force = TRUE, name = paste0("staged_dep_tag_", selected_ref)
)
git2r::checkout(git_repo, branch = paste0("staged_dep_tag_", selected_ref), force = TRUE)
} else {
stop("The selected reference should have a type attribute as 'branch' or 'tag'")
}
return(list(
dir = repo_dir, ref = selected_ref,
sha = get_short_sha(repo_dir), accessible = TRUE
))
}
# Install the external deps required for a package
# does not install dependencies that appear in `internal_pkg_deps`
install_external_deps <- function(repo_dir, internal_pkg_deps, ...) {
# `remotes::install_deps` (and renv::install)
# only makes use of the package DESCRIPTION file
# So we create a temp directory containing this file and then call this function
repo_dir_external <- tempfile(paste0(basename(repo_dir), "_externalDeps"))
fs::dir_create(repo_dir_external)
fs::file_copy(
file.path(repo_dir, "DESCRIPTION"),
file.path(repo_dir_external, "DESCRIPTION")
)
# remove internal_pkg_deps from DESCRIPTION file
desc_obj <- desc::desc(file.path(repo_dir_external, "DESCRIPTION"))
new_deps <- desc_obj$get_deps()[!desc_obj$get_deps()$package %in% internal_pkg_deps, ]
desc_obj$set_deps(new_deps)
if (!is.null(Sys.getenv("RENV_PROJECT")) && Sys.getenv("RENV_PROJECT") != "" && requireNamespace("renv", quietly = TRUE)) {
# renv::install installs the package not just the dependencies
# in this case we do not want the package to be installed (as it overwrites
# the sha which is needed to make sure the internal dependencies have not changed)
temp_package_name <- paste0(desc_obj$get("Package"), ".dependencies")
desc_obj$set("Package", paste(temp_package_name))
desc_obj$write()
renv::install(repo_dir_external)
suppressMessages(utils::remove.packages(temp_package_name))
} else {
desc_obj$write()
remotes::install_deps(repo_dir_external, ...)
}
}
# function to get the remote name (e.g. origin) which matches
# the url given in the staged.deps yaml file
get_remote_name <- function(git_repo, repo_url) {
# remove the https:// and .git from repo_url
repo_url <- gsub("^.+://|.git$", "", repo_url, perl = TRUE)
remotes <- git2r::remotes(git_repo)
for (remote in remotes) {
target_url <- git2r::remote_url(git_repo, remote = remote)
target_url <- gsub("^(https://|git@ssh.|git@)|\\.git$", "", target_url)
target_url <- gsub(":", "/", target_url, fixed = TRUE)
if (repo_url == target_url) {
return(remote)
}
}
# by default return origin
return("origin")
}
#' Install a git repository
#'
#' It adds the git SHA to the DESCRIPTION file, so that the package
#' does not need to be installed again when the same commit is already
#' installed.
#'
#' @param repo_dir directory of repo
#' @param ... Additional args passed to `remotes::install_deps`. Note `upgrade`
#' is set to "never" and shouldn't be passed into this function.
install_repo_add_sha <- function(repo_dir, ...) {
checkmate::assert_directory_exists(repo_dir)
read_dcf <- function(path) {
fields <- colnames(read.dcf(path))
as.list(read.dcf(path, keep.white = fields)[1, ])
}
write_dcf <- function(path, desc) {
write.dcf(
rbind(unlist(desc)),
file = path,
keep.white = names(desc),
indent = 0
)
}
# returns the installed sha of a git package and NULL if package is not installed
# or sha was not saved in DESCRIPTION file
get_local_sha <- function(pkg_name) {
# see remotes:::package2remote
pkg_desc <- tryCatch(utils::packageDescription(pkg_name),
error = function(e) NA, warning = function(e) NA
)
if (identical(pkg_desc, NA)) {
return(NULL)
}
pkg_desc$RemoteSha
}
stopifnot(git2r::in_repository(repo_dir)) # should be a git repository
commit_sha <- git2r::sha(git2r::repository_head(repo_dir))
git_status <- git2r::status(repo_dir)
if ((length(git_status$staged) > 0) || (length(git_status$unstaged) > 0) ||
(length(git_status$untracked) > 0)) {
# check that there are no changes (so that sha is correct), there should be no
# untracked files (because the user might work on a local repo on a new file that is
# still untracked)
# a file is untracked if it was not part of the previous commit and was not staged yet
# when a file that was in the previous commit and is modified, it is unstaged
# until it is staged (with git add)
stop("The git directory ", repo_dir, " contains changes.")
}
# see remotes:::add_metadata
source_desc <- file.path(repo_dir, "DESCRIPTION")
desc <- read_dcf(source_desc)
# see https://github.com/r-lib/remotes/blob/055754a709314f325b254a6182820e1e6d9bea32/R/install-git.R#L128
# we use generic git2r remote type rather than github or gitlab because we cannot
# deduce this directly from the URL
metadata <- list(
RemoteType = "git2r",
RemoteUrl = git2r::remote_url(repo_dir),
# RemoteSubdir = NULL,
# RemoteRef = x$ref,
RemoteSha = commit_sha
)
# once we have installed or failed to install the package we reset the repo
# so that further operations on the repo do not fail by the above
# "the git directory contains changes" check
on.exit({
git2r::reset(git2r::commits(repo_dir)[[1]], reset_type = "hard")
# git clean
try(unlink(file.path(repo_dir, rlang::flatten_chr(git2r::status(repo_dir)$untracked)), recursive = TRUE))
})
desc <- utils::modifyList(desc, metadata)
write_dcf(source_desc, desc)
# only install if SHA differs
if (identical(commit_sha, get_local_sha(desc$Package))) {
message("Skipping installation of ", repo_dir, " since same commit sha already installed")
return(invisible(NULL))
}
utils::install.packages(repo_dir, repos = NULL, type = "source")
invisible(NULL)
}
get_short_sha <- function(repo_dir) {
substr(git2r::sha(git2r::repository_head(repo_dir)), 1, 7)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.