inst/doc/quickstart.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  fig.width = 6,
  fig.height = 4,
  fig.retina = 2
)

## ---- results='hide', message=F-----------------------------------------------
library(gplite)
library(ggplot2)
theme_set(theme_classic())

## -----------------------------------------------------------------------------
# create some toy 1d regression data
set.seed(32004)
n <- 200
sigma <- 0.1
x <- rnorm(n)
y <- sin(3*x)*exp(-abs(x)) +  rnorm(n)*sigma 
ggplot() + 
  geom_point(aes(x=x,y=y), size=0.5) + 
  xlab('x') + ylab('y') + xlim(-4,4)

## -----------------------------------------------------------------------------
gp <- gp_init(cfs = cf_sexp(), lik = lik_gaussian())

## -----------------------------------------------------------------------------
print(gp)

## ---- results='hide'----------------------------------------------------------
gp <- gp_optim(gp, x, y)

## -----------------------------------------------------------------------------
print(gp)

## -----------------------------------------------------------------------------
# compute the predictive mean and variance in a grid of points
xt <- seq(-4,4,len=150)
pred <- gp_pred(gp, xt, var=T)

# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() + 
  geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
  geom_line(aes(x=xt, y=mu), size=1) +
  geom_point(aes(x=x, y=y), size=0.5) +
  xlab('x') + ylab('y')

## -----------------------------------------------------------------------------
# predict in a grid of points
xt <- seq(-4,4,len=150)
ndraws <- 30
draws <- gp_draw(gp, xt, draws=ndraws)


pp <- ggplot() + xlab('x') + ylab('y')
for (i in 1:ndraws) {
  pp <- pp + geom_line(data=data.frame(x=xt, y=draws[,i]), aes(x=x,y=y), color='darkgray')
}
pp <- pp + geom_point(aes(x=x,y=y), color='black', size=0.5)
pp
  

## -----------------------------------------------------------------------------
# create 1d toy binary classification data
set.seed(32004)
n <- 150
sigma <- 0.1
x <- rnorm(n)
ycont <- sin(3*x)*exp(-abs(x)) +  rnorm(n)*sigma
y <- rep(0,n)
y[ycont > 0] <- 1

ggplot() + 
  geom_point(aes(x=x,y=y), size=0.5) + 
  xlab('x') + ylab('y')

## ---- results = 'hide'--------------------------------------------------------
gp <- gp_init(cfs = cf_sexp(), lik = lik_bernoulli())
gp <- gp_optim(gp, x, y)

## -----------------------------------------------------------------------------
# predict in a grid of points
xt <- seq(-4,4,len=150)
pred <- gp_pred(gp, xt, quantiles=c(0.05, 0.95), transform=T)
pred_mean <- pred$mean
pred_lb <- pred$quantiles[,1]
pred_ub <- pred$quantiles[,2]


ggplot() + 
  geom_ribbon(aes(x=xt, ymin=pred_lb, ymax=pred_ub), fill='lightgray') +
  geom_line(aes(x=xt, y=pred_mean), color='black', size=1) +
  geom_point(aes(x=x,y=y), color='black', size=0.5) +
  xlab('x') + ylab('y')


## ---- results='hide'----------------------------------------------------------
gp <- gp_init(cfs = cf_sexp(), lik = lik_bernoulli(), approx = approx_ep())
gp <- gp_optim(gp, x, y)

## -----------------------------------------------------------------------------
# predict in a grid of points
xt <- seq(-4,4,len=150)
pred <- gp_pred(gp, xt, quantiles=c(0.05, 0.95), transform=T)
pred_mean <- pred$mean
pred_lb <- pred$quantiles[,1]
pred_ub <- pred$quantiles[,2]

ggplot() + 
  geom_ribbon(aes(x=xt,ymin=pred_lb,ymax=pred_ub), fill='lightgray') +
  geom_line(aes(x=xt,y=pred_mean), color='black', size=1) +
  geom_point(aes(x=x,y=y), color='black', size=0.5) +
  xlab('x') + ylab('y')


## -----------------------------------------------------------------------------
x <- as.numeric(time(datasets::discoveries))
y <- as.numeric(datasets::discoveries)

ggplot() + 
  geom_point(aes(x=x,y=y), size=0.5) + 
  xlab('Year') + ylab('Number of discoveries')

## ---- results='hide'----------------------------------------------------------
gp <- gp_init(cfs = cf_matern32(), lik = lik_poisson())
gp <- gp_optim(gp, x, y)

## -----------------------------------------------------------------------------

# predict in a grid of points
xt <- seq(1860, 1968, len=150)
pred <- gp_pred(gp, xt, quantiles=c(0.05, 0.95), transform=T)
pred_mean <- pred$mean
pred_lb <- pred$quantiles[,1]
pred_ub <- pred$quantiles[,2]

ggplot() + 
  geom_ribbon(aes(x=xt,ymin=pred_lb,ymax=pred_ub), fill='lightgray') +
  geom_line(aes(x=xt,y=pred_mean), color='black', size=1) +
  geom_point(aes(x=x,y=y), color='black', size=0.5) +
  xlab('Year') + ylab('Number of discoveries')


## -----------------------------------------------------------------------------
# create synthetic 2d binary classification data
set.seed(2)
n <- 6000
n_per_cluster <- n/3
d <- 2
x <- 0.5*cbind( matrix(rnorm(n_per_cluster*d), nrow=d) + c(3,3), 
                matrix(rnorm(n_per_cluster*d), nrow=d) - c(0,0),
                matrix(rnorm(n_per_cluster*d), nrow=d) + c(-3,3))
x <- t(x)
y <- c(rep(0,n_per_cluster), rep(1,n_per_cluster), rep(0,n_per_cluster))

# plot 
ggplot() +
  geom_point(data=data.frame(x=x[,1],y=x[,2]), aes(x=x,y=y), color=y+2, size=1) +
  xlab('x1') + ylab('x2')

## ---- results='hide'----------------------------------------------------------
# fit the model
gp <- gp_init(
  lik = lik_bernoulli(), 
  cfs = cf_sexp(), 
  method = method_fitc(num_inducing=50)
)
gp <- gp_optim(gp, x, y)

## -----------------------------------------------------------------------------
# predict
ng <- 20
x1g <- seq(-4,4,len=ng)
x2g <- seq(-2,4,len=ng)

xnew <- cbind( rep(x1g,each=ng), rep(x2g,ng) )
pred <- gp_pred(gp, xnew, transform=T)
prob <- pred$mean

# visualize
ggplot() + 
  geom_contour(data=data.frame(x=xnew[,1], y=xnew[,2], prob=prob),
                aes(x=x, y=y, z=prob, colour=..level..) ) +
  scale_colour_gradient(low = "red", high = "green", guide='none') +
  geom_point(data=data.frame(x=x[,1],y=x[,2]), aes(x=x,y=y), color=y+2, size=1) +
  xlab('x1') + ylab('x2')


## ---- results='hide'----------------------------------------------------------
# fit the model
gp <- gp_init(
  cfs = cf_sexp(), 
  lik = lik_bernoulli(), 
  method = method_rf(num_basis=100)
)
gp <- gp_optim(gp, x, y, tol=1e-5)

## -----------------------------------------------------------------------------
# predict
ng <- 20
x1g <- seq(-4,4,len=ng)
x2g <- seq(-2,4,len=ng)

xnew <- cbind( rep(x1g,each=ng), rep(x2g,ng) )
pred <- gp_pred(gp, xnew, transform=T)
prob <- pred$mean

# visualize
ggplot() + 
  geom_contour(data=data.frame(x=xnew[,1], y=xnew[,2], prob=prob),
                aes(x=x, y=y, z=prob, colour=..level..) ) +
  scale_colour_gradient(low = "red", high = "green", guide='none') +
  geom_point(data=data.frame(x=x[,1],y=x[,2]), aes(x=x,y=y), color=y+2, size=1) +
  xlab('x1') + ylab('x2')


## -----------------------------------------------------------------------------
y_all <- datasets::AirPassengers
x_all <- seq_along(y_all)

# hold out 2 years as a test set
nt <- 24
n <- length(x_all) - nt
x <- x_all[1:n]
y <- y_all[1:n]
xt <- tail(x_all, nt)
yt <- tail(y_all, nt)

ggplot() + 
  geom_line(aes(x=x,y=y), color='black') +
  geom_line(aes(x=xt,y=yt), color='red') +
  xlab('Time (months)') + ylab('Num. of passengers (thousands)')

## -----------------------------------------------------------------------------

# take a log transformation to get a more stationary process
yscaled <- log(y)

# set up the model
cf0 <- cf_const(magn=5)
cf1 <- cf_lin(magn=0.01)
cf2 <- cf_periodic(
  period=12, 
  prior_period = prior_fixed(),
) * cf_sexp(
  lscale=100, 
  magn=1,
  prior_magn = prior_fixed()
)
cfs <- list(cf0, cf1, cf2)
gp <- gp_init(cfs, lik=lik_gaussian(sigma=0.05))

## ---- results='hide'----------------------------------------------------------
gp <- gp_optim(gp, x, yscaled)

## -----------------------------------------------------------------------------

pred_scaled <- gp_pred(gp, xt, var=T)

pred <- exp(pred_scaled$mean)
pred_lb <- exp(pred_scaled$mean - 2*sqrt(pred_scaled$var))
pred_ub <- exp(pred_scaled$mean + 2*sqrt(pred_scaled$var))

ggplot() + 
  geom_ribbon(aes(x=xt, ymin=pred_lb, ymax=pred_ub), fill='lightgray') +
  geom_line(aes(x=xt, y=pred), color='black', alpha=0.5) +
  geom_line(aes(x=x, y=y), color='black') +
  geom_line(aes(x=xt, y=yt), color='red', alpha=0.5) +
  xlab('Time (months)') + ylab('Num. of passengers (thousands)')

Try the gplite package in your browser

Any scripts or data that you put into this service are public.

gplite documentation built on Aug. 24, 2022, 9:07 a.m.