knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
First we will simulate and visualize some data from a 2D classification.
## uncomment the following line if you used devtools::install_github("nategarton13/sparseRGPs") library(sparseRGPs) ## comments out the following line if you used devtools::install_github("nategarton13/sparseRGPs") # devtools::load_all() library(ggplot2) ## locations set.seed(1119) xy <- matrix(data = runif(n = 400, min = 0, max = 10), ncol = 2) ## make covariance matrix cov_par_gp <- list("sigma" = 4, "l" = 2, "tau" = 0) Sigma <- make_cov_matC(x = xy, x_pred = matrix(), cov_par = cov_par_gp, cov_fun = "sqexp", delta = 1e-6) ## sample the true latent GP mu <- rep(0, times = nrow(xy)) ff <- as.numeric(mvtnorm::rmvnorm(n = 1, mean = mu, sigma = Sigma)) ## sample the data y <- rbinom(n = 200, size = 1, prob = 1 / (1 + exp(-ff))) ## plot the observations ggplot() + geom_point(mapping = aes(x = xy[,1], y = xy[,2], colour = y ))
Let's fit a sparse GP using one-at-a-time knot selection using the log-likelihood as the objective function.
############################################### ## fit the sparse GP model using OAT ############################################### ## initialize covariance parameters cov_par_start <- list("sigma" = 5, "l" = 2, "tau" = 1e-1) ## initial knots xu_init <- matrix(data = runif(n = 10, min = 0, max = 10), ncol = 2) ## maximum number of knots maxknot <- 20 ## number of candidate knot proposals explored TTmax <- maxknot ## minimum number of candidate knot proposals to explore TTmin <- 5 ## maximum number of gradient ascent iterations maxit <- 500 ## objective function tolerance obj_tol <- 1e-4 ## initial step size parameter in ADADELTA epsilon <- 1e-4 ## set a lower bound on the nugget of the latent GP to ensure stability ## As a rough rule of thumb, sigma^2 / (tau^2 + delta) > 1e6 starts to ## enter territory where certain matrix inverses may be at risk of failing. delta <- 1e-3 ## Fit the sparse GP ## Should take < 10 minutes system.time(gp_fit_fic <- optimize_gp(y = y, # response/target values xy = xy, # matrix of observed input locations cov_fun = "sqexp", # covariance function cov_par_start = cov_par_start, # initial covariance parameter values mu = mu, # marginal means of the GP at xy family = "bernoulli", # conditional distribution of Y|f nugget = TRUE, # do you want to estimate the noise variance/nugget? sparse = TRUE, # do you want to estimate a sparse model? xu_opt = "oat", # method of knot selection xu = xu_init, # initial knots muu = rep(0, times = nrow(xu_init)), # marginal GP means at the knots vi = FALSE, # note variational inference cannot be used with binary data opt = list(maxknot = maxknot, # set algorithmic parameters TTmax = TTmax, TTmin = TTmin, maxit = maxit, obj_tol = obj_tol, epsilon = epsilon, delta = delta), verbose = FALSE)) ## look at estimated covariance parameters gp_fit_fic$results$cov_par ## selected knots gp_fit_fic$results$xu ## plot the optimized objective function values for each added knot plot(1:length(gp_fit_fic$results$obj_fun), gp_fit_fic$results$obj_fun) ## get predictions x_pred <- as.matrix(expand.grid(seq(from = 0, to = 10, by = 0.25), seq(from = 0, to = 10, by = 0.25))) preds_fic <- predict_gp(mod = gp_fit_fic, x_pred = x_pred, mu_pred = rep(0, times = nrow(x_pred)), full_cov = FALSE, vi = FALSE) ## plot the real observations ggplot() + geom_point(mapping = aes(x = xy[,1], y = xy[,2], colour = y)) ## plot predictions over a fine grid probs <- 1 / (1 + exp(-preds_fic$pred$pred_mean)) ggplot() + geom_tile(mapping = aes(x = x_pred[,1], y = x_pred[,2], fill = probs )) ## plot the knots ggplot() + geom_point(mapping = aes(x = gp_fit_fic$results$xu[,1], y = gp_fit_fic$results$xu[,2])) + geom_point(mapping = aes(x = gp_fit_fic$xu_init[,1], y = gp_fit_fic$xu_init[,2]), colour = "blue", shape = 2, size = 2)
## fit a full GP ## This should take a few minutes (especially if you increase maxit) system.time(gp_fit_full <- optimize_gp(y = y, xy = xy, cov_fun = "sqexp", cov_par_start = cov_par_gp, mu = mu, family = "bernoulli", nugget = TRUE, sparse = FALSE, vi = FALSE, opt = list( maxit = 10, obj_tol = obj_tol, epsilon = epsilon, delta = delta), verbose = FALSE)) gp_fit_full$results$cov_par ## get predictions from a full GP preds_full <- predict_gp(mod = gp_fit_full, x_pred = x_pred, mu_pred = rep(0, times = nrow(x_pred)), full_cov = FALSE, vi = FALSE) ## plot the real observations ggplot() + geom_point(mapping = aes(x = xy[,1], y = xy[,2], colour = y)) ## plot the predictions #full probs_full <- 1 / (1 + exp(-preds_full$pred$pred_mean)) full_plot <- ggplot() + geom_tile(mapping = aes(x = x_pred[,1], y = x_pred[,2], fill = probs_full)) full_plot ## plot predictions against each other to see how they match plot(x = preds_full$pred$pred_mean, y = preds_fic$pred$pred_mean)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.