R/build_targets.R

Defines functions only_target_dir cr_build_targets_artifacts resolve_bucket_folder cr_build_targets

Documented in cr_build_targets cr_build_targets_artifacts

#' Set up Google Cloud Build to run a targets pipeline
#' @family Cloud Build functions
#' @export
#' @description Creates a Google Cloud Build yaml file so as to execute \link[targets]{tar_make} pipelines
#'
#' Historical runs accumulate in the
#'   configured Google Cloud Storage bucket, and the latest output is downloaded before
#'   \link[targets]{tar_make} executes so up-to-date steps do not rerun.
#'
#' @details Steps to set up your target task in Cloud Build:
#'
#' \itemize{
#'   \item Create your `targets` workflow.
#'   \item Create a Dockerfile that holds the R and system dependencies for your workflow.  You can test the image using \link{cr_deploy_docker}.  Include \code{library(targets)} dependencies - a Docker image with \code{targets} installed is available at \code{gcr.io/gcer-public/targets}.
#'   \item Run \code{cr_build_targets} to create the cloudbuild yaml file.
#'   \item Run the build via \link{cr_build} or similar.  Each build should only recompute outdated targets.
#'   \item Optionally create a build trigger via \link{cr_buildtrigger}.
#'   \item Trigger a build. The first trigger will run the targets pipeline, subsequent runs will only recompute the outdated targets.
#'  }
#'
#' @section DAGs:
#'
#' If your target workflow has parallel processing steps then leaving this as default \code{cr_buildstep_targets_multi()} will create a build that uses waitFor and build ids to create a DAG.  Setting this to \code{cr_buildstep_targets_single()} will be single thread but you can then customise the \code{targets::tar_make} script.  Or add your own custom target buildsteps here using \link{cr_buildstep_targets} - for example you could create the docker environment targets runs within before the main pipeline.
#'
#' @return A Yaml object as generated by \link{cr_build_yaml} if \code{execute="trigger"} or the built object if \code{execute="now"}
#' @param path File path to write the Google Cloud Build yaml workflow file. Set to NULL to write no file and just return the \code{Yaml} object.
#' @param task_image An existing Docker image that will be used to run your targets workflow after the targets meta has been downloaded from Google Cloud Storage
#' @param target_folder Where target metadata will sit within the Google Cloud Storage bucket as a folder.  If NULL defaults to RStudio project name or "targets_cloudbuild" if no RStudio project found.
#' @param bucket The Google Cloud Storage bucket the target metadata will be saved to in folder `target_folder`
#' @param ... Other arguments passed to \link{cr_build_yaml}
#' @inheritDotParams cr_build_yaml
#' @param task_args A named list of additional arguments to send to \link{cr_buildstep_r} when its executing the \link[targets]{tar_make} command (such as environment arguments)
#' @param tar_make The R script that will run in the tar_make() step. Modify to include custom settings such as "script"
#' @param buildsteps Generated buildsteps that create the targets build
#' @param execute Whether to run the Cloud Build now or to write to a file for use within triggers or otherwise
#' @param local If executing now, the local folder that will be uploaded as the context for the target build
#' @param ... Other arguments passed to \link{cr_build_yaml}
#' @inheritDotParams cr_build_yaml

#' @inheritParams cr_buildstep_targets
#' @inheritParams cr_buildstep_targets_setup
#' @inheritParams cr_buildstep_targets_teardown
#' @inheritParams cr_build_targets_artifacts
#' @inheritParams cr_build_upload_gcs
#' @seealso \link{cr_buildstep_targets} if you want to customise the build
#' @examples
#'
#' write.csv(mtcars, file = "mtcars.csv", row.names = FALSE)
#'
#' targets::tar_script(
#'   list(
#'     targets::tar_target(file1,
#'       "mtcars.csv", format = "file"),
#'     targets::tar_target(input1,
#'       read.csv(file1)),
#'     targets::tar_target(result1,
#'       sum(input1$mpg)),
#'     targets::tar_target(result2,
#'       mean(input1$mpg)),
#'     targets::tar_target(result3,
#'       max(input1$mpg)),
#'     targets::tar_target(result4,
#'       min(input1$mpg)),
#'     targets::tar_target(merge1,
#'       paste(result1, result2, result3, result4))
#'     ),
#'  ask = FALSE)
#'
#' bs <- cr_buildstep_targets_multi()
#'
#' # only create the yaml
#' par_build <- cr_build_targets(bs, path = NULL)
#' par_build
#'
#' # clean up example
#' unlink("mtcars.csv")
#' unlink("_targets.R")
#'
#' \dontrun{
#' # run it immediately in cloud
#' cr_build_targets(bs, execute="now")
#'
#' # create a yaml file for use in build triggers
#' cr_build_targets(bs)
#' }
#'
cr_build_targets <- function(
  buildsteps = cr_buildstep_targets_multi(),
  execute = c("trigger", "now"),
  path = "cloudbuild_targets.yaml",
  local = ".",
  predefinedAcl = "bucketLevel",
  bucket = cr_bucket_get(),
  download_folder = getwd(),
  ...) {

  execute <- match.arg(execute)

  if(execute == "trigger"){
    yaml <- cr_build_yaml(buildsteps, ...)

    if (!is.null(path)) cr_build_write(yaml, file = path)
    return(yaml)
  }

  target_folder <- basename(tempfile())

  store <- cr_build_upload_gcs(
    local,
    bucket = bucket,
    predefinedAcl = predefinedAcl,
    deploy_folder = target_folder)

  move_it <- cr_buildstep_source_move(target_folder)
  buildsteps <- c(move_it, buildsteps)

  yaml <- cr_build_yaml(buildsteps, ...)

  myMessage(
    paste("Running Cloud Build for targets workflow in",
          normalizePath(local)),
    level = 3)

  if(getOption("googleAuthR.verbose") < 3){
    print(yaml)
  }

  build <- cr_build(yaml, launch_browser = FALSE, source = store)
  built <- cr_build_wait(build)

  extract_upload <- strsplit(buildsteps[[length(buildsteps)]]$args[[2]], " ")[[1]]
  uploaded <- extract_upload[[length(extract_upload)]]

  artifact_download <- cr_build_targets_artifacts(
    built,
    bucket = bucket,
    target_folder = basename(uploaded),
    download_folder = NULL)

  myMessage(
    sprintf("# Built targets on Cloud Build with status: %s", built$status),
    level = 3)

  if(!is.null(artifact_download)){
    myMessage(
      sprintf("Build artifacts downloaded to %s", artifact_download),
      level = 3)
  }

  built

}



resolve_bucket_folder <- function(target_folder, bucket){
  if(is.null(target_folder)) {
    target_folder <- tryCatch(
      basename(rstudioapi::getActiveProject()),
      error = function(err){
        NULL
      }
    )
    if(is.null(target_folder)){
      target_folder <- "targets_cloudbuild"
    }
  }

  myMessage(sprintf("targets cloud location: gs://%s/%s",
                    bucket, target_folder),
            level = 3)

  # gs://bucket-name/target-folder
  sprintf("gs://%s/%s", bucket, target_folder)

}


#' @rdname cr_build_targets
#' @export
#' @details
#'   Use \code{cr_build_targets_artifacts} to download the return values of a
#'   target Cloud Build, then \link[targets]{tar_read} to read the results.  You can set the downloaded files as the target store via \code{targets::tar_config_set(store="_targets_cloudbuild")}.  Set \code{download_folder = "_targets"} to overwrite your local targets store.
#' @inheritParams cr_build_artifacts
#' @param download_folder Set to NULL to overwrite local _target folder: \code{_targets/*} otherwise will write to \code{download_folder/_targets/*}
#' @param target_subfolder If you only want to download a specific folder from the _targets/ folder on Cloud Build then specify it here.
#' @return \code{cr_build_targets_artifacts} returns the file path to where the download occurred.
cr_build_targets_artifacts <- function(
  build,
  bucket = cr_bucket_get(),
  target_folder = NULL,
  download_folder = NULL,
  target_subfolder = c("all", "meta", "objects", "user"),
  overwrite = TRUE) {

  target_subfolder <- match.arg(target_subfolder)

  target_bucket <- resolve_bucket_folder(target_folder, bucket)

  build_folder <- basename(target_bucket)

  prefix <- build_folder
  if (target_subfolder != "all") {
    prefix <- paste0(build_folder, "/", target_subfolder)
  }

  arts <- googleCloudStorageR::gcs_list_objects(
    bucket = bucket, prefix = prefix
  )

  if (nrow(arts) == 0) {
    myMessage("No build artifacts found in", target_bucket, level = 3)
    return(NULL)
  }

  if(!is.null(download_folder)){
    dir.create(download_folder, showWarnings = FALSE)
  } else {
    download_folder <- "."
  }

  df_bf <- normalizePath(download_folder)
  tar_store <- targets::tar_config_get("store")

  myMessage("Downloading to download_folder:", file.path(df_bf, tar_store),
            level = 3)

  tar_store <- targets::tar_config_get("store")

  # create targets folder structure
  dir.create(df_bf, showWarnings = FALSE)
  dir.create(file.path(df_bf, tar_store), showWarnings = FALSE)
  dir.create(file.path(df_bf, tar_store, "meta"), showWarnings = FALSE)
  dir.create(file.path(df_bf, tar_store, "objects"), showWarnings = FALSE)
  dir.create(file.path(df_bf, tar_store, "user"), showWarnings = FALSE)

  # download to the local _target folder
  lapply(arts$name, function(x) {
    googleCloudStorageR::gcs_get_object(
          x,
          bucket = bucket,
          saveToDisk = only_target_dir(x),
          overwrite = overwrite
        )
     })

  file.path(df_bf, tar_store)
}

only_target_dir <- function(path){
  gsub("(.+)/(_targets/.+)", "\\2", path)
}

Try the googleCloudRunner package in your browser

Any scripts or data that you put into this service are public.

googleCloudRunner documentation built on March 18, 2022, 8 p.m.