knitr::opts_chunk$set(echo = TRUE)
Emulation of notebook tutorials using Vertex AI SDK via Python:
Run the following chunk to install googleCloudVertexAIR
and the other required R packages to complete this tutorial (checking to see if they are installed first and only install if not already):
required_packages <- c("remotes", "googleAuthR") missing_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])] if(length(missing_packages)) install.packages(missing_packages) # remotes::install_github("justinjm/googleCloudVertexAIR") # run first time
Create a file called .Renviron
in your project's working directory and use the following environemtn arguments:
GAR_SERVICE_JSON
- path to service account (JSON) keyfile downloaded before and copied GCVA_DEFAULT_PROJECT_ID
- string of your GCP project you configured before GCVA_DEFAULT_REGION
- region of GCP resorces that can be one of: "us-central1"
or "eu"
e.g. your .Renviron
should look like:
# .Renviron GAR_SERVICE_JSON="/Users/me/auth/auth.json" GCVA_DEFAULT_PROJECT_ID="my-project" GCVA_DEFAULT_REGION="us-central1"
TODO: https://github.com/justinjm/googleCloudVertexAIR/issues/26
library(googleAuthR) library(googleCloudVertexAIR) options(googleAuthR.scopes.selected = "https://www.googleapis.com/auth/cloud-platform") gar_auth_service(json_file = Sys.getenv("GAR_SERVICE_JSON"))
projectId <- Sys.getenv("GCVA_DEFAULT_PROJECT_ID") gcva_region_set(region = "us-central1") gcva_project_set(projectId = projectId) timestamp <- strftime(Sys.time(), "%Y%m%d%H%M%S") timestamp
datasetDisplayName <- sprintf("california-housing-%s", timestamp) datasetDisplayName
Source dataset: gs://cloud-samples-data/ai-platform-unified/datasets/tabular/california-housing-tabular-regression.csv
dataset <- gcva_create_tabluar_dataset( displayName = datasetDisplayName, gcsSource = "gs://cloud-samples-data/ai-platform-unified/datasets/tabular/california-housing-tabular-regression.csv") dataset
job <- gcva_automl_tabluar_training_job( displayName = sprintf("california-housing-%s", timestamp), optimizationPredictionType = "regression", column_transformations = list( list(numeric = list(column_name = "longitude")), list(numeric = list(column_name = "latitude")), list(numeric = list(column_name = "housing_median_age")), list(numeric = list(column_name = "total_rooms")), list(numeric = list(column_name = "total_bedrooms")), list(numeric = list(column_name = "population")), list(numeric = list(column_name = "households")), list(numeric = list(column_name = "median_income")) ) )
model <- gcva_run_job( job = job, dataset = dataset, targetColumn = "median_house_value", modelDisplayName = sprintf("model-%s", datasetDisplayName)) model
california_housing
california_housing.source_data
batch02
from BQ table california_housing.source_data
bq_source_uri <- sprintf("bq://%s.california_housing.batch_02", projectId) bq_destination_prefix <- sprintf("bq://%s.california_housing", projectId)
# hard code #modelName for testing purposes, model state = completed # model <- Sys.getenv("GCVA_TEST_MODEL_NAME_AUTOML") batch_prediction_job <- gcva_batch_predict( jobDisplayName = sprintf("california-housing-%s", timestamp), model = model, bigquerySource= bq_source_uri, instancesFormat = "bigquery", predictionsFormat = "bigquery", bigqueryDestinationPrefix = bq_destination_prefix ) batch_prediction_job
Once the batch prediction job has completed, you can then view and use the predictions
Open BigQuery console and navigate to the dataset where the predictions were saved, then modify and run the query below:
SELECT predicted_TARGET_COLUMN_NAME.value, predicted_TARGET_COLUMN_NAME.lower_bound, predicted_TARGET_COLUMN_NAME.upper_bound FROM BQ_DATASET_NAME.BQ_PREDICTIONS_TABLE_NAME
See more details here: https://cloud.google.com/vertex-ai/docs/tabular-data/classification-regression/get-batch-predictions#retrieve-batch-results
predictions_TIMESTAMP
gcva_delete_dataset(dataset = dataset)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.