# These are modified versions of piggyback, where github enterprise is allowed.
parse_repo2 <- function(repo) {
r <- strsplit(repo, "/")[[1]]
if (length(r) != 2) {
cli::cli_abort(c("Could not parse {.val {repo}} as a GitHub repository.","Make sure you have used the format: {.val owner/repo}"))
}
return(r)
}
# used by pb_upload_file2, pb_upload2
pb_releases2 <- function(repo = paste(gh::gh_tree_remote(), collapse = "/"),
.api_url = gh:::default_api_url(),
.token = gh::gh_token(.api_url),
verbose = FALSE) {
r <- parse_repo2(repo)
releases <- tryCatch(
gh::gh("/repos/:owner/:repo/releases",owner = r[[1]], repo = r[[2]],
.limit = Inf,
.api_url = .api_url,
.token = .token),
error = function(cnd) {
cli::cli_abort(c(`!` = "Cannot access release data for repo {.val {repo}}.","Check that you have provided a {.code .token} and that the repo is correctly specified.",unlist(strsplit(cnd$message, "\\n"))))
}
)
if (length(releases) == 0) {
if (verbose) {
cli::cli_warn(c(`!` = "No GitHub releases found for {.val {repo}}!","You can make a new one with {.fun piggyback::pb_new_release}"))
}
return(invisible(data.frame()))
}
out <- data.frame(release_name = vapply(releases, `[[`, character(1),"name"),
release_id = vapply(releases, `[[`, integer(1),"id"),
release_body = vapply(releases, `[[`, character(1),"body"),
tag_name = vapply(releases, `[[`, character(1),"tag_name"),
draft = vapply(releases, `[[`, logical(1),"draft"),
created_at = vapply(releases, `[[`, character(1),"created_at"),
published_at = vapply(releases, `[[`,character(1), "published_at"),
html_url = vapply(releases,`[[`, character(1), "html_url"),
upload_url = vapply(releases,`[[`, character(1), "upload_url"),
n_assets = vapply(releases,function(x) length(x[["assets"]]), integer(1)))
return(out)
}
# used by pb_upload_file2
# uses get_release_assets2
pb_info2 <- function (repo = paste(gh::gh_tree_remote(), collapse = "/"),
tag = NULL,
.api_url = gh:::default_api_url(),
.token = gh::gh_token(.api_url)) {
r <- parse_repo2(repo)
releases <- pb_releases2(repo = repo, .token = .token,
.api_url = .api_url,
verbose = FALSE)
if (nrow(releases) == 0) {
return(data.frame(file_name = "", size = 0L, timestamp = lubridate::as_datetime(0),
tag = "", owner = r[[1]], repo = r[[2]], upload_url = "",
browser_download_url = "", id = "", state = "", stringsAsFactors = FALSE))
}
if (length(tag) == 1 && tag == "latest" && !"latest" %in% releases$tag_name)
tag <- releases$tag_name[[1]]
if (!is.null(tag))
releases <- releases[releases$tag_name %in% tag, ]
info <- get_release_assets2(releases = releases, r = r,
.api_url=.api_url,
.token = .token)
return(info)
}
# uses pb_info2, pb_releases2
pb_upload_file2 <- function(file,
repo = paste(gh::gh_tree_remote(), collapse = "/"),
tag = "latest", name = NULL,
overwrite = "use_timestamps",
use_timestamps = NULL,
show_progress = TRUE,
.api_url = gh:::default_api_url(),
.token = gh::gh_token(.api_url),
dir = NULL) {
file_path <- do.call(file.path, compact(list(dir, file)))
if (!file.exists(file_path)) {
cli::cli_warn("File {.file {file_path}} does not exist.")
return(NULL)
}
if (!is.null(use_timestamps)) {
cli::cli_warn("{.code use_timestamps} argument is deprecated, please set {.code overwrite = 'use_timestamps'} instead")
}
use_timestamps <- switch(as.character(overwrite),
`TRUE` = FALSE,
`FALSE` = FALSE,
use_timestamps = TRUE
)
overwrite <- switch(as.character(overwrite),
`TRUE` = TRUE,
`FALSE` = FALSE,
use_timestamps = TRUE
)
progress <- httr::progress("up")
if (!show_progress) {
progress <- NULL
}
if (is.null(name)) {
name <- basename(file_path)
}
df <- pb_info2(repo = repo, tag = tag,
.api_url = .api_url,
.token = .token)
i <- which(df$file_name == name)
if (length(i) > 0) {
if (use_timestamps) {
local_timestamp <- fs::file_info(file_path)$modification_time
no_update <- local_timestamp <= df[i, "timestamp"]
if (no_update) {
cli::cli_warn("Matching or more recent version of {.file {file_path}} found on GH, not uploading.")
return(invisible(NULL))
}
}
if (overwrite) {
gh::gh("DELETE /repos/:owner/:repo/releases/assets/:id",
owner = df$owner[[1]], repo = df$repo[[1]], id = df$id[i],
.api_url = .api_url,
.token = .token)
} else {
cli::cli_warn("Skipping upload of {.file {df$file_name[i]}} as file exists on GitHub and {.code overwrite = FALSE}")
return(invisible(NULL))
}
}
if (show_progress) {
cli::cli_alert_info("Uploading {.file {name}} ...")
}
releases <- pb_releases2(repo = repo,
.api_url = .api_url,
.token = .token)
upload_url <- releases$upload_url[releases$tag_name == tag]
r <- httr::RETRY(
verb = "POST", url = sub("\\{.+$", "", upload_url),
query = list(name = name),
httr::add_headers(Authorization = paste("token",.token)),
body = httr::upload_file(file_path),
progress,
terminate_on = c(400, 401, 403, 404, 422)
)
if (show_progress) {
httr::warn_for_status(r)
}
# .pb_cache_clear()
invisible(r)
}
# used by pb_info2
get_release_assets2 <- function(releases, r,
.api_url = gh:::default_api_url(),
.token = gh::gh_token(.api_url)) {
if (nrow(releases) == 0) {
return(data.frame())
}
asset_list <- vector("list", length = nrow(releases))
for (i in seq_along(releases$tag_name)) {
a <- gh::gh(
endpoint = "/repos/:owner/:repo/releases/:release_id/assets",
owner = r[[1]], repo = r[[2]],
release_id = releases$release_id[[i]],
.limit = Inf,
.api_url = .api_url,
.token = .token,
.progress = TRUE
)
if (length(a) == 0) {
next
}
if (!identical(a[[1]], "")) {
a_df <- data.frame(
file_name = vapply(a, `[[`, character(1),"name"),
size = vapply(a, `[[`, integer(1), "size"),
timestamp = lubridate::as_datetime(vapply(a,`[[`, character(1), "updated_at")),
tag = releases$tag_name[i],
owner = r[[1]],
repo = r[[2]],
upload_url = releases$upload_url[i],
browser_download_url = vapply(a, `[[`, character(1L),"browser_download_url"),
id = vapply(a, `[[`,integer(1L), "id"),
state = vapply(a, `[[`,character(1L), "state"), stringsAsFactors = FALSE)
asset_list[[i]] <- a_df
}
}
release_assets <- do.call(rbind, asset_list)
return(release_assets)
}
# uses pb_releases2, pb_release_create2
pb_upload2 <- function (file,
repo = paste(gh::gh_tree_remote(), collapse = "/"),
tag = "latest", name = NULL,
overwrite = "use_timestamps",
use_timestamps = NULL,
show_progress = TRUE,
.api_url = gh:::default_api_url(),
.token = gh::gh_token(.api_url),
dir = NULL) {
stopifnot(is.character(repo), is.character(tag), length(tag) == 1, length(repo) == 1)
releases <- pb_releases2(repo = repo,
.api_url = .api_url,
.token = .token)
if (tag == "latest" && length(releases$tag_name) > 0 &&
!"latest" %in% releases$tag_name) {
if (getOption("piggyback.verbose", default = interactive())) {
cli::cli_alert_info("Uploading to latest release: {.val {releases$tag_name[[1]]}}.")
}
tag <- releases$tag_name[[1]]
}
if (!tag %in% releases$tag_name && !interactive()) {
cli::cli_abort("Release {.val {tag}} not found in {.val {repo}}. No upload performed.")
}
if (!tag %in% releases$tag_name) {
cli::cli_alert_warning("Release {.val {tag}} not found in {.val {repo}}, creating.")
pb_release_create2(repo = repo, tag = tag,
.api_url = .api_url,
.token = .token)
# cli::cli_alert_warning("Release {.val {tag}} not found in {.val {repo}}.")
# run <- utils::menu(choices = c("Yes", "No"), title = glue::glue("Would you like to create a new release now?"))
# if (run == 2)
# return(invisible(NULL))
# if (run == 1)
# pb_release_create2(repo = repo, tag = tag,
# .api_url = .api_url,
# .token = .token)
Sys.sleep(2)
}
out <- lapply(file, function(f) pb_upload_file2(file=f, repo=repo, tag=tag, name=name,
overwrite=overwrite,
use_timestamps=use_timestamps, show_progress=show_progress,
.token=.token,
.api_url=.api_url,
dir=dir))
invisible(out)
}
# used by pb_upload2
pb_release_create2 <- function (repo = paste(gh::gh_tree_remote(), collapse = "/"),
tag, commit = NULL,
name = tag,
body = "Data release",
draft = FALSE,
prerelease = FALSE,
.api_url = gh:::default_api_url(),
.token = gh::gh_token(.api_url)) {
releases <- pb_releases2(repo = repo,
.token = .token,
.api_url=.api_url,
verbose = FALSE)
if (nrow(releases) > 0 && tag %in% releases$tag_name) {
cli::cli_warn("Failed to create release: {.val {tag}} already exists!")
return(invisible(releases[tag %in% releases$tag_name, ]))
}
r <- parse_repo2(repo)
payload <- compact(list(tag_name = tag, target_commitish = commit,
name = name, body = body, draft = draft, prerelease = prerelease))
# .api_url <- gh:::default_api_url()
resp <- httr::RETRY(verb = "POST", url = glue::glue("{.api_url}/repos/{r[[1]]}/{r[[2]]}/releases"),
httr::add_headers(Authorization = paste("token", .token)),
body = jsonlite::toJSON(payload, auto_unbox = TRUE),
terminate_on = c(400, 401, 403, 404, 422))
if (httr::http_error(resp)) {
cli::cli_warn(c(`!` = "Failed to create release: HTTP error {.val {httr::status_code(resp)}}.",
"See returned error messages for more details"))
return(httr::content(resp))
}
# .pb_cache_clear()
release <- httr::content(resp)
cli::cli_alert_success("Created new release {.val {name}}.")
return(invisible(release))
}
# 1. uses gh_download_asset2, pb_info2
pb_download2 <- function(file = NULL, dest = ".",
repo = paste(gh::gh_tree_remote(), collapse = "/"),
tag = "latest",
overwrite = TRUE, ignore = "manifest.json", use_timestamps = TRUE,
show_progress = TRUE,
.api_url = gh:::default_api_url(),
.token = gh::gh_token(.api_url)) {
progress <- httr::progress("down")
if (!show_progress) {
progress <- NULL
}
df <- pb_info2(repo=repo, tag=tag,
.api_url=.api_url,
.token=.token)
df <- df[df$state != "starter", ]
if (!is.null(file)) {
i <- which(df$file_name %in% file)
if (length(i) < 1) {
cli::cli_warn("file(s) {.file {file}} not found in repo {.val {repo}}")
}
df <- df[i, ]
} else {
i <- which(df$file_name %in% ignore)
if (length(i) >= 1) {
df <- df[-i, ]
}
file <- df$file_name
}
if (length(dest) == 1) {
i <- which(df$file_name %in% file)
dest <- file.path(dest, df$file_name[i])
}
df$dest <- dest
if (use_timestamps) {
local_timestamp <- fs::file_info(dest)$modification_time
update <- df$timestamp > local_timestamp
update[is.na(update)] <- TRUE
df <- df[update, ]
if (dim(df)[[1]] < 1) {
cli::cli_alert_info("All local files already up-to-date!")
return(invisible(NULL))
}
}
resp <- lapply(seq_along(df$id), function(i) {
gh_download_asset2(owner=df$owner[[1]],
repo=df$repo[[1]],
id = df$id[i], destfile = df$dest[i], overwrite = overwrite,
.api_url=.api_url,
.token = .token, progress = progress
)
})
return(invisible(resp))
}
# used by pb_download2
gh_download_asset2 <- function(owner, repo, id, destfile,
overwrite = TRUE,
.api_url = gh:::default_api_url(),
.token = gh::gh_token(.api_url),
progress = httr::progress("down")) {
if (fs::file_exists(destfile) && !overwrite) {
cli::cli_warn(c(
`!` = "{.val {destfile}} already exists, skipping download.",
"Set {.code overwrite = TRUE} to overwrite files."
))
return(NULL)
}
if (!is.null(progress)) {
cli::cli_alert_info("Downloading {.val {basename(destfile)}}...")
}
auth_token <- if (!is.null(.token) && .token != "") {
httr::add_headers(Authorization = paste("token", .token))
}
# .api_url <- gh:::default_api_url()
resp <- httr::RETRY(
verb = "GET",
url = paste0(.api_url,"/repos/", owner, "/", repo, "/", "releases/assets/",id),
httr::add_headers(Accept = "application/octet-stream"),
auth_token, httr::write_disk(destfile, overwrite = overwrite),
progress
)
if (resp$status_code == 400) {
resp <- httr::RETRY(verb = "GET",
url = resp$url, httr::add_headers(Accept = "application/octet-stream"),
auth_token, httr::write_disk(destfile, overwrite = TRUE),
progress)
}
# if (TRUEgetOption("piggyback.verbose", default = TRUE)) {httr::warn_for_status(resp)}
httr::warn_for_status(resp)
invisible(resp)
}
get_gitrepo <- function(path=".") {
tryCatch({
paste(gh::gh_tree_remote(path), collapse = "/")
},error=function(e) {
stop("YTError: No Git repo detected.")
})
}
get_giturl <- function(path=".") {
tryCatch({
gert::git_remote_list(repo=path)$url
},error=function(e) {
stop("YTError: no git URL detected")
})
}
get_gitapi <- function(path=".") {
git.url <- get_giturl(path=path)
git.base <- str_extract(git.url,"https?://[^/]+")
if (git.base=="https://github.com") {
git.api <- "https://api.github.com"
} else {
git.api <- paste0(git.base,"/api/v3")
}
return(git.api)
}
get_gittoken <- function(path=".") {
tryCatch({
api <- get_gitapi(path=path)
gh::gh_token(api_url = api)
},error=function(e) {
NULL
})
}
#' @export
#' @param files File(s) or folder(s) to be uploaded (absolute path or relative to `path`).
#' Default is to upload all files found in `<path>/release_data`.
#' @rdname git_release
upload_git_release <- function(files="release_data",
path = here::here(),
tag = "v0.0.0.1",
generate_load_script = "R/run_this_to_download_data.R",
repo = get_gitrepo(path=path),
api = get_gitapi(path=path),
token = get_gittoken(path=path)) {
# note to self: should probably check if in repo, and token works.
# linux doesn't seem to work if it is MSK git.. need token or something
# path="C:/Users/Ying/R/autofmt"
# files=c("data/fmt.study.compact.RData","data/fmt.tax.blast.full.rds")
# files="data/fmt.study.compact.RData"
# files=c("data","R")
# files="data"
# files="autofmt.Rproj"
# files="/"
# convert folders to files, make absolute paths
filelist <- map(files,~{
if (R.utils::isAbsolutePath(.x)) {
path <- .x
} else {
path <- file.path(path,.x)
}
if (dir.exists(path)) {
# remove subfolders
dirfiles <- setdiff(list.files(files,full.names=TRUE),list.dirs(files,full.names = TRUE))
return(dirfiles)
} else if (file.exists(path)) {
return(path)
} else {
cli::cli_abort("YTError: path does not exist: {.file {path}}")
}
}) %>% list_c()
if (length(filelist)==0) {
cli::cli_abort("No files found in {dir}")
}
base_files <- basename(filelist)
if (anyDuplicated(base_files)) {
filelist <- filelist[order(base_files)]
dups <- filelist[duplicated(base_files) | duplicated(base_files,fromLast = TRUE)]
cli::cli_abort("YTError: these files have duplicated basenames: {.file {dups}}")
}
cli::cli_alert_info("Uploading files as Git release: {.file {base_files}}")
pb_upload2(filelist,
tag=tag,
repo=repo,
.api_url = api,
.token=token)
if (!is.null(generate_load_script)) {
if (!file.exists(generate_load_script)) {
script_dir <- dirname(generate_load_script)
dir.create(script_dir, showWarnings = FALSE)
code <- str_glue('
# Github release data is stored.
# To download the data from Github and store in data folder, run code below:
# Git API: {api}
# Repo: {repo}
# Data files: {paste(base_files,collapse=', ')}
# Note that that yingtools2 0.0.1.174 or higher is needed.
# This will place files in folder release_data.
if (FALSE) {
if (!require("yingtools2") || packageVersion("yingtools2")<"0.0.1.174") {{
remotes::install_github("ying14/yingtools2")
}}
yingtools2::download_git_release()
}
')
writeLines(code, generate_load_script)
cli::cli_alert_info("Data loading script generated: {.path {generate_load_script}}")
cli::cli_alert_info("Consider adding these to {.path .gitignore}:\n{.path {files}}")
}
}
}
#' Download/Upload Git Release Ddata
#'
#' @description
#' Use these functions to store/access data on Git repositories, through release asset.
#' This essentially what [`piggyback`](https://github.com/ropensci/piggyback) does, but with modifications to work with Github Enterprise servers.
#'
#' @details
#' Storing data on Github repostories is quite limited, to 50 Mb per file. Also, each time you push a new version of the data,
#' there are multiple copies which can accumulate over time. However, you can upload the data as a *release asset*, which does not
#' have size limits, and which can be overwritten each time, so as to avoid duplicate copies.
#'
#' The [`piggyback`](https://github.com/ropensci/piggyback) package has great functions to take advantage of the *release asset* feature.
#' However, it currently only works with Github, and does not play well with Github Enterprise.
#' These functions represent modifications to make it work, and to streamline the upload/download process.
#'
#' Some of the default settings are useful for avoiding potential issues.
#' The default `tag` of `"v0.0.0.1"` is good in case the Git repo/project is an
#' R package with formal releases, where automated systems are looking for latest version
#' and you would not want it to pick up the release data by accident.
#' Data is stored in the default `dest` location `release_data`.
#'
#' You probably want to avoid storing in `data`, because if the repo/project is an
#' R package, it will try to include the release data in the package during
#' build (changing .Rbuildignore doesn't seem to work).
#'
#' @param tag The tag version to upload/download. Default is to use `"v0.0.0.1"`.
#' When downloading, can specify `"latest"` (i.e. latest version).
#' @param dest The folder where release data will be saved. Default is `release_data`.
#' @param path Path of the git repo. Default is current project's directory, `here::here()`.
#' @param repo string: GH repository name in format `"owner/repo"`. Default is to guess based on `path`.
#' @param api GitHub API URL. For standard Github this would be `"https://api.github.com"`,
#' but in Github Enterprise this would be something like `"https://github.XXXXX.org/api/v3"`.
#' Default is to guess based on info in the Git repo `path`.
#' @param token Personal access token (PAT). These credentials may be needed, depending on the operation and repo.
#' Default is to use [gh::gh_token()], if it exists, or `NULL` otherwise.
#' @export
#'
#' @examples
#' \dontrun{
#' mt1 <- mtcars %>% mutate(version=1)
#' iris1 <- iris %>% mutate(version=1)
#' # save files to the /data folder
#' write_csv(mt1,file="data/mt1.csv")
#' write_csv(iris1,file="data/iris1.csv")
#' # upload files in /data to Github as a release
#' upload_git_release()
#'
#' # In a system that doesn't have the data, download files from Github
#' download_git_release()
#' }
#' @rdname git_release
download_git_release <- function(tag = "v0.0.0.1",
dest = "release_data",
path = here::here(),
repo = get_gitrepo(path=path),
api = get_gitapi(path=path),
token = get_gittoken(path=path)) {
if (!dir.exists(dest)) {
cli::cli_alert_info("Creating folder {.path {dest}}.")
dir.create(dest, showWarnings = FALSE)
}
pb_download2(tag=tag,
dest=dest,
repo=repo,
.api_url = api,
.token=token)
}
# if (FALSE) {
# mt1 <- mtcars %>% mutate(version=1)
# iris1 <- iris %>% mutate(version=1)
# write_csv(mt1,file="mt1.csv")
# write_csv(iris1,file="iris1.csv")
# mt2 <- mtcars %>% mutate(version=2)
# iris2 <- iris %>% mutate(version=2)
# write_csv(mt2,file="mt2.csv")
# write_csv(iris2,file="iris2.csv")
#
# upload_git_release(c("mt1.csv","iris1.csv"))
#
# saved_files
# debugonce(upload_git_release)
#
# download_git_release()
# ytnotes
#
# upload_git_release(c("mt1.csv","iris1.csv"))
# download_git_release()
#
#
# }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.