knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
First we will simulate and visualize some data from a 2D regression.
## uncomment the following line if you used devtools::install_github("nategarton13/sparseRGPs") library(sparseRGPs) ## comment out the following line if you used devtools::install_github("nategarton13/sparseRGPs") # devtools::load_all() library(ggplot2) ## locations set.seed(1116) xy <- matrix(data = runif(n = 400, min = 0, max = 10), ncol = 2) ## make covariance matrix cov_par_gp <- list("sigma" = 3, "l" = 3, "tau" = 0) Sigma <- make_cov_matC(x = xy, x_pred = matrix(), cov_par = cov_par_gp, cov_fun = "sqexp", delta = 1e-6) ## sample the true latent GP mu <- rep(0, times = nrow(xy)) ff <- as.numeric(mvtnorm::rmvnorm(n = 1, mean = mu, sigma = Sigma)) ## sample the data tau <- 1 y <- ff + rnorm(n = length(ff), mean = 0, sd = tau) ## plot the real observations ggplot() + geom_point(mapping = aes(x = xy[,1], y = xy[,2], colour = y))
Let's fit a sparse GP using one-at-a-time knot selection using the log-likelihood as the objective function.
############################################### ## fit the sparse GP model using OAT ############################################### ## initialize covariance parameters cov_par_start <- list("sigma" = 3, "l" = 3, "tau" = 1) ## initial knots xu_init <- matrix(data = runif(n = 10, min = 0, max = 10), ncol = 2) ## maximum number of knots maxknot <- 20 ## number of candidate knot proposals explored TTmax <- maxknot ## minimum number of candidate knot proposals to explore TTmin <- 5 ## maximum number of gradient ascent iterations maxit <- 500 ## objective function tolerance obj_tol <- 1e-4 ## initial step size parameter in ADADELTA epsilon <- 1e-4 ## Fit the sparse GP ## Should take around a minute system.time(gp_fit_fic <- optimize_gp(y = y, # response/target values xy = xy, # matrix of observed input locations cov_fun = "sqexp", # covariance function cov_par_start = cov_par_start, # initial covariance parameter values mu = mu, # marginal means of the GP at xy family = "gaussian", # conditional distribution of Y|f nugget = TRUE, # do you want to estimate the noise variance/nugget? sparse = TRUE, # do you want to estimate a sparse model? xu_opt = "oat", # method of knot selection xu = xu_init, # initial knots muu = rep(0, times = nrow(xu_init)), # marginal GP means at the knots vi = FALSE, # use variational inference instead of maximum likelihood? opt = list(maxknot = maxknot, # set algorithmic parameters TTmax = TTmax, TTmin = TTmin, maxit = 10, obj_tol = obj_tol, epsilon = epsilon), verbose = FALSE)) ## look at estimated covariance parameters gp_fit_fic$results$cov_par ## selected knots gp_fit_fic$results$xu ## plot the optimized objective function values for each added knot plot(1:length(gp_fit_fic$results$obj_fun), gp_fit_fic$results$obj_fun) ## get predictions x_pred <- as.matrix(expand.grid(seq(from = 0, to = 10, by = 0.25), seq(from = 0, to = 10, by = 0.25))) preds_fic <- predict_gp(mod = gp_fit_fic, x_pred = x_pred, mu_pred = rep(0, times = nrow(x_pred)), full_cov = FALSE, vi = FALSE) ## plot the real observations ggplot() + geom_point(mapping = aes(x = xy[,1], y = xy[,2], colour = y)) ## plot predictions over a fine grid ggplot() + geom_tile(mapping = aes(x = x_pred[,1], y = x_pred[,2], fill = preds_fic$pred$pred_mean)) ## plot the knots ggplot() + geom_point(mapping = aes(x = gp_fit_fic$results$xu[,1], y = gp_fit_fic$results$xu[,2])) + geom_point(mapping = aes(x = gp_fit_fic$xu_init[,1], y = gp_fit_fic$xu_init[,2]), colour = "blue", shape = 2, size = 2)
Now we fit a sparse GP using the one-at-a-time knot selection algorithm but using variational inference and the evidence lower bound (ELBO) as the objective function.
## fit a sparse GP using a variational approximation ## This should take 5-6 minutes system.time(gp_fit_vi <- optimize_gp(y = y, xy = xy, cov_fun = "sqexp", cov_par_start = cov_par_start, mu = mu, family = "gaussian", nugget = TRUE, sparse = TRUE, xu_opt = "oat", xu = xu_init, muu = rep(0, times = nrow(xu_init)), vi = TRUE, opt = list(maxknot = maxknot, TTmax = TTmax, TTmin = TTmin, maxit = maxit, obj_tol = obj_tol, epsilon = epsilon), verbose = FALSE)) gp_fit_vi$results$cov_par ## get predictions from a full GP preds_vi <- predict_gp(mod = gp_fit_vi, x_pred = x_pred, mu_pred = rep(0, times = nrow(x_pred)), full_cov = FALSE, vi = TRUE) ## plot the real observations ggplot() + geom_point(mapping = aes(x = xy[,1], y = xy[,2], colour = y)) ## plot the predictions #sparse vi model vi_plot <- ggplot() + geom_tile(mapping = aes(x = x_pred[,1], y = x_pred[,2], fill = preds_vi$pred$pred_mean))
Now we estimate the full GP and compare the predictions made from each model in a scatterplot matrix.
## fit a full GP ## This should take 1-2 minutes system.time(gp_fit_full <- optimize_gp(y = y, xy = xy, cov_fun = "sqexp", cov_par_start = cov_par_start, mu = mu, family = "gaussian", nugget = TRUE, sparse = FALSE, vi = FALSE, opt = list( maxit = maxit, obj_tol = obj_tol, epsilon = epsilon), verbose = FALSE)) gp_fit_full$results$cov_par ## get predictions from a full GP preds_full <- predict_gp(mod = gp_fit_full, x_pred = x_pred, mu_pred = rep(0, times = nrow(x_pred)), full_cov = FALSE, vi = FALSE) ## plot the real observations ggplot() + geom_point(mapping = aes(x = xy[,1], y = xy[,2], colour = y)) ## plot the predictions #full full_plot <- ggplot() + geom_tile(mapping = aes(x = x_pred[,1], y = x_pred[,2], fill = preds_full$pred$pred_mean)) ## plot predictions against each other to see how they match pairs(cbind(preds_full$pred$pred_mean, preds_fic$pred$pred_mean, preds_vi$pred$pred_mean))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.