Nothing
## Examples used in the R package documentation / help files
## Author: Fabio Sigrist
library(gpboost)
data(GPBoost_data, package = "gpboost")
# Add intercept column
X1 <- cbind(rep(1,dim(X)[1]),X)
X_test1 <- cbind(rep(1,dim(X_test)[1]),X_test)
########################################################
## Linear random effects and Gaussian process models
########################################################
#--------------------Grouped random effects model: single-level random effect----------------
gp_model <- GPModel(group_data = group_data[,1], likelihood="gaussian")
fit(gp_model, y = y, X = X1, params = list(std_dev = TRUE))
summary(gp_model)
# Alternatively, define and fit model directly using fitGPModel
gp_model <- fitGPModel(group_data = group_data[,1], y = y, X = X1,
likelihood="gaussian", params = list(std_dev = TRUE))
summary(gp_model)
# Make predictions
pred <- predict(gp_model, group_data_pred = group_data_test[,1],
X_pred = X_test1, predict_var = TRUE)
pred$mu # Predicted mean
pred$var # Predicted variances
# Also predict covariance matrix
pred <- predict(gp_model, group_data_pred = group_data_test[,1],
X_pred = X_test1, predict_cov_mat = TRUE)
pred$mu # Predicted mean
pred$cov # Predicted covariance
#--------------------Two crossed random effects and a random slope----------------
gp_model <- fitGPModel(group_data = group_data, likelihood="gaussian",
group_rand_coef_data = X[,2],
ind_effect_group_rand_coef = 1,
y = y, X = X1, params = list(std_dev = TRUE))
summary(gp_model)
#--------------------Gaussian process model----------------
gp_model <- GPModel(gp_coords = coords, cov_function = "exponential",
likelihood="gaussian")
fit(gp_model, y = y, X = X1, params = list(std_dev = TRUE))
summary(gp_model)
# Alternatively, define and fit model directly using fitGPModel
gp_model <- fitGPModel(gp_coords = coords, cov_function = "exponential",
likelihood="gaussian", y = y, X = X1, params = list(std_dev = TRUE))
summary(gp_model)
# Make predictions
pred <- predict(gp_model, gp_coords_pred = coords_test,
X_pred = X_test1, predict_cov_mat = TRUE)
pred$mu # Predicted (posterior) mean of GP
pred$cov # Predicted (posterior) covariance matrix of GP
#--------------------Gaussian process model with Vecchia approximation----------------
gp_model <- fitGPModel(gp_coords = coords, cov_function = "exponential",
vecchia_approx = TRUE, num_neighbors = 20,
likelihood="gaussian", y = y)
summary(gp_model)
#--------------------Gaussian process model with random coefficients----------------
gp_model <- fitGPModel(gp_coords = coords, cov_function = "exponential",
gp_rand_coef_data = X[,2], y = y, X = X1,
likelihood = "gaussian", params = list(std_dev = TRUE))
summary(gp_model)
#--------------------Combine Gaussian process with grouped random effects----------------
gp_model <- fitGPModel(group_data = group_data,
gp_coords = coords, cov_function = "exponential",
likelihood = "gaussian", y = y, X = X1, params = list(std_dev = TRUE))
summary(gp_model)
#--------------------Saving a GPModel and loading it from a file----------------
gp_model <- fitGPModel(group_data = group_data[,1], y = y, X = X1, likelihood="gaussian")
pred <- predict(gp_model, group_data_pred = group_data_test[,1],
X_pred = X_test1, predict_var = TRUE)
# Save model to file
filename <- tempfile(fileext = ".json")
saveGPModel(gp_model,filename = filename)
# Load from file and make predictions again
gp_model_loaded <- loadGPModel(filename = filename)
pred_loaded <- predict(gp_model_loaded, group_data_pred = group_data_test[,1],
X_pred = X_test1, predict_var = TRUE)
# Check equality
pred$mu - pred_loaded$mu
pred$var - pred_loaded$var
#######################
## GPBoost algorithm
#######################
#--------------------Combine tree-boosting and grouped random effects model----------------
# Create random effects model
gp_model <- GPModel(group_data = group_data[,1], likelihood = "gaussian")
# The default optimizer for covariance parameters (hyperparameters) is
# Nesterov-accelerated gradient descent.
# This can be changed to, e.g., Nelder-Mead as follows:
# re_params <- list(optimizer_cov = "nelder_mead")
# gp_model$set_optim_params(params=re_params)
# Use trace = TRUE to monitor convergence:
# re_params <- list(trace = TRUE)
# gp_model$set_optim_params(params=re_params)
# Train model
bst <- gpboost(data = X, label = y, gp_model = gp_model, nrounds = 16,
learning_rate = 0.05, max_depth = 6, min_data_in_leaf = 5,
verbose = 0)
# Same thing using the gpb.train function
dtrain <- gpb.Dataset(data = X, label = y)
bst <- gpb.train(data = dtrain, gp_model = gp_model, nrounds = 16,
learning_rate = 0.05, max_depth = 6, min_data_in_leaf = 5,
verbose = 0)
# Estimated random effects model
summary(gp_model)
# Make predictions
# Predict latent variables
pred <- predict(bst, data = X_test, group_data_pred = group_data_test[,1],
predict_var = TRUE, pred_latent = TRUE)
pred$random_effect_mean # Predicted latent random effects mean
pred$random_effect_cov # Predicted random effects variances
pred$fixed_effect # Predicted fixed effects from tree ensemble
# Predict response variable
pred_resp <- predict(bst, data = X_test, group_data_pred = group_data_test[,1],
predict_var = TRUE, pred_latent = FALSE)
pred_resp$response_mean # Predicted response mean
# For Gaussian data: pred$random_effect_mean + pred$fixed_effect = pred_resp$response_mean
pred$random_effect_mean + pred$fixed_effect - pred_resp$response_mean
#--------------------Combine tree-boosting and Gaussian process model----------------
# Create Gaussian process model
gp_model <- GPModel(gp_coords = coords, cov_function = "exponential",
likelihood = "gaussian")
# Train model
bst <- gpboost(data = X, label = y, gp_model = gp_model, nrounds = 8,
learning_rate = 0.1, max_depth = 6, min_data_in_leaf = 5,
verbose = 0)
dtrain <- gpb.Dataset(data = X, label = y)
bst <- gpb.train(data = dtrain, gp_model = gp_model, nrounds = 16,
learning_rate = 0.05, max_depth = 6, min_data_in_leaf = 5,
verbose = 0)
# Estimated random effects model
summary(gp_model)
# Make predictions
# Predict latent variables
pred <- predict(bst, data = X_test, gp_coords_pred = coords_test,
predict_var = TRUE, pred_latent = TRUE)
pred$random_effect_mean # Predicted latent random effects mean
pred$random_effect_cov # Predicted random effects variances
pred$fixed_effect # Predicted fixed effects from tree ensemble
# Predict response variable
pred_resp <- predict(bst, data = X_test, gp_coords_pred = coords_test,
predict_var = TRUE, pred_latent = FALSE)
pred_resp$response_mean # Predicted response mean
#--------------------Using validation data-------------------------
set.seed(1)
train_ind <- sample.int(length(y),size=250)
dtrain <- gpb.Dataset(data = X[train_ind,], label = y[train_ind])
dtest <- gpb.Dataset.create.valid(dtrain, data = X[-train_ind,], label = y[-train_ind])
valids <- list(test = dtest)
gp_model <- GPModel(group_data = group_data[train_ind,1], likelihood="gaussian")
# Need to set prediction data for gp_model
gp_model$set_prediction_data(group_data_pred = group_data[-train_ind,1])
# Training with validation data and use_gp_model_for_validation = TRUE
bst <- gpb.train(data = dtrain, gp_model = gp_model, nrounds = 100,
learning_rate = 0.05, max_depth = 6, min_data_in_leaf = 5,
verbose = 1, valids = valids,
early_stopping_rounds = 10, use_gp_model_for_validation = TRUE)
print(paste0("Optimal number of iterations: ", bst$best_iter,
", best test error: ", bst$best_score))
# Plot validation error
val_error <- unlist(bst$record_evals$test$l2$eval)
plot(1:length(val_error), val_error, type="l", lwd=2, col="blue",
xlab="iteration", ylab="Validation error", main="Validation error vs. boosting iteration")
#--------------------Do Newton updates for tree leaves---------------
# Note: run the above examples first
bst <- gpb.train(data = dtrain, gp_model = gp_model, nrounds = 100,
learning_rate = 0.05, max_depth = 6, min_data_in_leaf = 5,
verbose = 1, valids = valids,
early_stopping_rounds = 5, use_gp_model_for_validation = FALSE,
leaves_newton_update = TRUE)
print(paste0("Optimal number of iterations: ", bst$best_iter,
", best test error: ", bst$best_score))
# Plot validation error
val_error <- unlist(bst$record_evals$test$l2$eval)
plot(1:length(val_error), val_error, type="l", lwd=2, col="blue",
xlab="iteration", ylab="Validation error", main="Validation error vs. boosting iteration")
#--------------------GPBoostOOS algorithm: GP parameters estimated out-of-sample----------------
# Create random effects model and dataset
gp_model <- GPModel(group_data = group_data[,1], likelihood="gaussian")
dtrain <- gpb.Dataset(X, label = y)
params <- list(learning_rate = 0.05,
max_depth = 6,
min_data_in_leaf = 5)
# Stage 1: run cross-validation to (i) determine to optimal number of iterations
# and (ii) to estimate the GPModel on the out-of-sample data
cvbst <- gpb.cv(params = params,
data = dtrain,
gp_model = gp_model,
nrounds = 100,
nfold = 4,
eval = "l2",
early_stopping_rounds = 5,
use_gp_model_for_validation = TRUE,
fit_GP_cov_pars_OOS = TRUE)
print(paste0("Optimal number of iterations: ", cvbst$best_iter))
# Estimated random effects model
# Note: ideally, one would have to find the optimal combination of
# other tuning parameters such as the learning rate, tree depth, etc.)
summary(gp_model)
# Stage 2: Train tree-boosting model while holding the GPModel fix
bst <- gpb.train(data = dtrain,
gp_model = gp_model,
nrounds = cvbst$best_iter,
learning_rate = 0.05,
max_depth = 6,
min_data_in_leaf = 5,
verbose = 0,
train_gp_model_cov_pars = FALSE)
# The GPModel has not changed:
summary(gp_model)
#--------------------Parameter tuning-------------------------
# Create random effects model, dataset, and define parameter grif
gp_model <- GPModel(group_data = group_data[,1], likelihood="gaussian")
dtrain <- gpb.Dataset(X, label = y)
param_grid = list("learning_rate" = c(0.1,0.01), "min_data_in_leaf" = c(20),
"max_depth" = c(5,10), "num_leaves" = 2^17, "max_bin" = c(255,1000))
# Parameter tuning using cross-validation and deterministic grid search
set.seed(1)
opt_params <- gpb.grid.search.tune.parameters(param_grid = param_grid,
num_try_random = NULL,
nfold = 4,
data = dtrain,
gp_model = gp_model,
verbose_eval = 1,
nrounds = 1000,
early_stopping_rounds = 5,
eval = "l2")
# Parameter tuning using cross-validation and random grid search
set.seed(1)
opt_params <- gpb.grid.search.tune.parameters(param_grid = param_grid,
params = params,
num_try_random = 4,
nfold = 4,
data = dtrain,
gp_model = gp_model,
verbose_eval = 1,
nrounds = 1000,
early_stopping_rounds = 5,
eval = "l2")
#--------------------Cross validation-------------------------
# Create random effects model and dataset
gp_model <- GPModel(group_data = group_data[,1], likelihood="gaussian")
dtrain <- gpb.Dataset(X, label = y)
params <- list(learning_rate = 0.05,
max_depth = 6,
min_data_in_leaf = 5)
# Run CV
cvbst <- gpb.cv(params = params, data = dtrain, gp_model = gp_model,
nrounds = 100, nfold = 4, eval = "l2",
early_stopping_rounds = 5, use_gp_model_for_validation = TRUE)
print(paste0("Optimal number of iterations: ", cvbst$best_iter,
", best test error: ", cvbst$best_score))
#--------------------Saving a booster with a gp_model and loading it from a file----------------
# Train model and make prediction
gp_model <- GPModel(group_data = group_data[,1], likelihood = "gaussian")
bst <- gpboost(data = X, label = y, gp_model = gp_model, nrounds = 16,
learning_rate = 0.05, max_depth = 6, min_data_in_leaf = 5,
verbose = 0)
pred <- predict(bst, data = X_test, group_data_pred = group_data_test[,1],
predict_var= TRUE, pred_latent = TRUE)
# Save model to file
filename <- tempfile(fileext = ".json")
gpb.save(bst,filename = filename)
# Load from file and make predictions again
bst_loaded <- gpb.load(filename = filename)
pred_loaded <- predict(bst_loaded, data = X_test, group_data_pred = group_data_test[,1],
predict_var= TRUE, pred_latent = TRUE)
# Check equality
pred$fixed_effect - pred_loaded$fixed_effect
pred$random_effect_mean - pred_loaded$random_effect_mean
pred$random_effect_cov - pred_loaded$random_effect_cov
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.