R/run_job.R

Defines functions run_job

Documented in run_job

#' Run an existing job on Databricks.
#'
#' Takes a job_id and executes it on Databricks.  Must create a job first.
#'
#' The API endpoint for running a job is '2.0/jobs/run-now'.  For all
#' details on API calls please see the official documentation at
#' \url{https://docs.databricks.com/dev-tools/api/latest/}.
#'
#' @param job_id A number.  Generated by \code{\link{create_job}} or via
#' the Databricks web interface.
#' @param name Optional.  A string representing the name of the job.  If multiple
#' jobs share the same name, you'll need to rename the jobs or provide
#' the unique job ID.
#' @param workspace A string representing the web workspace of your Databricks
#' instance. E.g., "https://eastus2.azuredatabricks.net" or
#' "https://demo.cloud.databricks.com".
#' @param token A valid authentication token generated via User Settings in
#' Databricks or via the Databricks REST API 2.0.  If none is provided, netrc
#'  will be used.
#' @param verbose If true, will pretty print the success or failure of the
#' request and add a run_id variable to the R environment.  Defaults to TRUE.
#' @return The API response.
#'
#' @examples
#' # Run a job
#' run_job(job_id = 206, workspace = 'https://eastus2.azuredatabricks.net', token = 'dapi1903210980d9a0ads0g9fas')
#'
#' # Run a job by name
#' run_job(name = "My Unique Job Name", workspace = workspace, token = token)
#'
run_job <- function(job_id = NULL, name = NULL, workspace, token = NULL, verbose = T) {

  # If name provided, call jobs_list to find the job ID
  if (!is.null(name)) {

    jobs_tidy <- jobs_list(workspace = workspace, token = token, verbose = F)$response_tidy
    matches <- jobs_tidy[grepl(pattern = paste0("^", name,"$"), jobs_tidy$settings.name), ]

    # If there is more than one job with the same name
    if (length(matches$settings.name) > 1){

      message(paste0("Found multiple jobs with name \"", name, "\":\n"))
      message(paste0(
        capture.output(
          jobs_tidy[grepl(pattern = paste0("^", name,"$"), jobs_tidy$settings.name), ]),
        collapse = "\n"))
      return(message(paste0("\n\nPlease use a job ID or give the job a unique name.\n")))
    }

    # If no matches found
    else if (length(matches$settings.name) < 1) {
      message(paste0("No job with name \"", name, "\" found.\n Please try a different name."))
      stop("Couldn't find a job with that name.")
    }

    # If exact match fetch the job id for the run config
    run_config <- paste0('{ "job_id": ', matches$job_id, ' }')

    message(paste0("Job \"", name, "\" found with ", matches$job_id, "."))
  }

  # If no name provided, use job_id param
  else{
    run_config <- paste0('{ "job_id": ', job_id, ' }')
  }

  # Make request with netrc by default
  if (is.null(token)) {

    use_netrc <- httr::config(netrc = 1)
    res <- httr::with_config(use_netrc, {
      httr::POST(url = paste0(workspace, "/api/2.0/jobs/run-now"),
                 httr::content_type_json(),
                 body = run_config)})
  }

  else {

    # Authenticate with token
    headers <- c(
      Authorization = paste("Bearer", token)
    )

    # Make request
    res <- httr::POST(url = paste0(workspace, "/api/2.0/jobs/run-now"),
                      httr::add_headers(.headers = headers),
                      httr::content_type_json(),
                      body = run_config)
  }

  # Handling successful API response
  if (res$status_code[1] == 200) {

    run_id <- jsonlite::fromJSON(rawToChar(res$content))$run_id
    number_in_job <- jsonlite::fromJSON(rawToChar(res$content))$number_in_job

    if (verbose == T) {

      message(paste0(
        "Status: ",
        res$status_code[1],
        "\nRun ID: ", run_id,
        "\nNumber in Job: ", number_in_job
      ))
    }

  }
  else {

    run_id <- NA
    number_in_job <- NA

    if (verbose == T) {
      message(paste0(
        "Status: ", res$status_code[1],
        "\nThe request was not successful:\n\n", jsonlite::prettify(res)
      ))
    }

  }

  # Return response
  reslist <- list(run_response = res,
                  run_id = run_id,
                  number_in_job = number_in_job)
}
RafiKurlansik/bricksteR documentation built on Oct. 13, 2022, 6:58 a.m.