In CollinErickson/CGGP: Composite Grid Gaussian Processes

knitr::opts_chunk$set(echo = TRUE)
library(magrittr)
library(ggplot2)
library(dplyr)

We are going to run an experiment using our SGGP code to see how the various options affect the prediction ability of the model.

Internal options

Selection method in append: UCB, TS, Greedy
Correlation function: Cauchy, CauchySQ, CauchySQT, Gaussian, Power exponential, Matern 3/2, Matern 5/2
Sampling method in fit: Laplace approx, MCMC
Grid size: fast (1 2 4 4 4 6 8 32), slow (1 2 2 2 4 4 4 4 4 6 32)
Batch size
Prediction: use MAP, full Bayesian using samples
Append blocks using RIMSE or RIMSE/point
Point location

Internal options for our experiment

(3) Selection method: UCB, TS, Greedy
(3) Corr func: CauchySQT, Gaussian, Power exp
(2) Sampling method: Laplace, MCMC
(2) Grid size: fast (1 2 4 4 4 6 8 32), slow (1 2 2 2 4 4 4 4 4 6 32)
(2) Batch size: 64 or 256 up to 1024, then 512 for both
(2) Prediction: MAP, full Bayesian
(2) Append using RIMSE, RIMSE/point

This is $2^5 3^2 = 288$ options. We can do a fractional factorial experiment to save time.

First experiment (Comparer2)

I saw that MCMC and full Bayesian prediction were far slower than their alternatives. I ran a smaller experiment on my computer with the following

(3) Selection method: UCB, TS, Greedy
(7) Corr func: CauchySQT, CauchySQ, Cauchy, Gaussian, Power exp, Matern 3/2, and Matern 5/2
(1) Sampling method: Laplace, ~~MCMC~~
(2) Grid size: fast (1 2 4 4 4 6 8 32), slow (1 2 2 2 4 4 4 4 4 6 32)
(2) Batch size: 64 or 256 up to 1024, then 512 for both
(1) Prediction: MAP, ~~full Bayesian~~
(2) Append using RIMSE, RIMSE/point

This took over a day on my laptop using 3 cores.

e2 <- readRDS("ComparerRun2_completed.rds")

e2$completed_runs %>% table

I look at the results and saw something weird going on. It turns out that some of the runs became awful after large number of points.

stripchart(RMSE ~ nallotted, e2$outcleandf)
stripchart(RMSE ~ nallotted + corr, e2$outcleandf, las=1)

ggplot(data=e2$outcleandf, mapping=aes(x=nallotted,y=RMSE, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(corr ~ .) + ggplot2::scale_y_log10()

ggplot(data=e2$outcleandf, mapping=aes(x=nallotted,y=score, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(corr ~ .)

ggplot(data=e2$outcleandf[e2$outcleandf$nallotted!=16384,], mapping=aes(x=nallotted,y=score, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(corr ~ .)

sapply(unique(e2$outcleandf$nallotted), function(nn) lm(score ~ corr + sel.method + factor(batchsize) + append.rimseperpoint + grid_size, e2$outcleandf %>% filter(nallotted==nn, corr!="gaussian", corr!="m52"))$coeff) %>% rbind(nallotted=unique(e2$outcleandf$nallotted)) %>% round(4)

sapply(unique(e2$outcleandf$nallotted), function(nn) lm(log(RMSE) ~ corr + sel.method + factor(batchsize) + append.rimseperpoint + grid_size, e2$outcleandf %>% filter(nallotted==nn, corr!="gaussian", corr!="m52"))$coeff) %>% rbind(nallotted=unique(e2$outcleandf$nallotted)) %>% round(4)

sapply(unique(e2$outcleandf$nallotted), function(nn) lm(log(elapsedtime,2) ~ corr + sel.method + factor(batchsize) + append.rimseperpoint + grid_size, e2$outcleandf %>% filter(nallotted==nn, corr!="gaussian", corr!="m52"))$coeff) %>% rbind(nallotted=unique(e2$outcleandf$nallotted)) %>% round(4)

Comparison 3

This is an experiment that only goes up to sample size 1024, taking stats at 512 and 1024. It does do fully Bayesian prediction (vs using the MAP) and MCMC (vs Laplace approx).

e3 <- readRDS("Comparison3_completed.rds")

e3$completed_runs %>% table

e3$calculate_effects()

ggplot(data=e3$outcleandf, mapping=aes(x=nallotted,y=RMSE, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(corr ~ .) + ggplot2::scale_y_log10()

ggplot(data=e3$outcleandf, mapping=aes(x=nallotted,y=score, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(corr ~ .)

ggplot(data=e3$outcleandf[e3$outcleandf$nallotted!=16384,], mapping=aes(x=nallotted,y=score, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(corr ~ .)

sapply(unique(e3$outcleandf$nallotted), function(nn) lm(score ~ sel.method + factor(batchsize) + append.rimseperpoint + grid_size + pred.fullBayes + use_laplaceapprox, e3$outcleandf %>% filter(nallotted==nn, corr!="gaussian", corr!="powerexp"))$coeff) %>% rbind(nallotted=unique(e3$outcleandf$nallotted)) %>% round(4)

sapply(unique(e3$outcleandf$nallotted), function(nn) lm(log(RMSE) ~ sel.method + factor(batchsize) + append.rimseperpoint + grid_size + pred.fullBayes + use_laplaceapprox, e3$outcleandf %>% filter(nallotted==nn, corr!="gaussian", corr!="powerexp"))$coeff) %>% rbind(nallotted=unique(e3$outcleandf$nallotted)) %>% round(4)

sapply(unique(e3$outcleandf$nallotted), function(nn) lm(log(elapsedtime,2) ~ sel.method + factor(batchsize) + append.rimseperpoint + grid_size + pred.fullBayes + use_laplaceapprox, e3$outcleandf %>% filter(nallotted==nn, corr!="gaussian", corr!="powerexp"))$coeff) %>% rbind(nallotted=unique(e3$outcleandf$nallotted)) %>% round(4)

Comparison 4

I ran experiments of five different functions, only using the fast prediction and fit options. I also changed the fast grid to make it faster, this is also what we currently have the default set to in our package. Two of the correlation functions were removed.

(3) Selection method: UCB, TS, Greedy
(5+2) Corr func: CauchySQT, CauchySQ, Cauchy, ~~Gaussian~~, Power exp, ~~Matern 3/2~~, and Matern 5/2 (I ran Gaussian and Matern 3/2 after, so all were done)
(1) Sampling method: Laplace, ~~MCMC~~
(2) Grid size: fast ~~(1 2 4 4 4 6 8 32)~~ (1,2,4,4,8,12,32), slow (1 2 2 2 4 4 4 4 4 6 32)
(2) Batch size: 64 or 256 up to 1024, then 512 for both
(1) Prediction: MAP, ~~full Bayesian~~
(2) Append using RIMSE, RIMSE/point
(5) Functions: beam, OTL, piston, borehole, wing weight

e4a <- readRDS("ComparerRun4_completed_SQTfixed.rds")
e4a$completed_runs %>% table

e4b <- readRDS("ComparerRun4b_completed.rds")
e4b$completed_runs %>% table

e4dfwg <- rbind(e4a$outcleandf, e4b$outcleandf)
summary(e4dfwg)

Each function having different scale is troublesome. Need to normalize RMSEs. Scores should be fine, maybe?

Gaussian is bad

Score should be function invariant? Not really.

ggplot(data=e4dfwg, mapping=aes(x=nallotted,y=score, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr, scales="free_y")

I had to rerun CauchySQT on wing weight since it had NA scores.

plyr::ddply(e4dfwg, c("corr", "f"), function(x) sum(is.na(x$score))) %>% filter(V1>0)

RMSE on linear scale

ggplot(data=e4dfwg, mapping=aes(x=nallotted,y=RMSE, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr, scales="free_y")

With log scale

ggplot(data=e4dfwg, mapping=aes(x=nallotted,y=RMSE, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr, scales="free_y") + scale_y_log10()

Remove Gaussian, Matern 5/2, Beam bending

e4df <- e4dfwg %>% filter(corr != "gaussian", corr != "m52", f!= "beambending")

e4df$score %>% summary

ggplot(data=e4df, mapping=aes(x=nallotted,y=RMSE, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr, scale="free_y") + ggplot2::scale_y_log10()

ggplot(data=e4df, mapping=aes(x=nallotted,y=score, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr, scale="free_y")

ggplot(data=e4df, mapping=aes(x=nallotted,y=CRPscore, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr, scale="free_y") + scale_y_log10()

Regression coefficients

sapply(unique(e4df$nallotted), function(nn) lm(score ~ corr + sel.method + factor(batchsize) + append.rimseperpoint + grid_size, e4df %>% filter(nallotted==nn))$coeff) %>% rbind(nallotted=unique(e4df$nallotted)) %>% round(4)

sapply(unique(e4df$nallotted), function(nn) lm(log(RMSE) ~ corr + sel.method + factor(batchsize) + append.rimseperpoint + grid_size, e4df %>% filter(nallotted==nn))$coeff) %>% rbind(nallotted=unique(e4df$nallotted)) %>% round(4)

sapply(unique(e4df$nallotted), function(nn) lm(log(elapsedtime,2) ~ corr + sel.method + factor(batchsize) + append.rimseperpoint + grid_size, e4df %>% filter(nallotted==nn, corr!="gaussian", corr!="m52"))$coeff) %>% rbind(nallotted=unique(e4df$nallotted)) %>% round(4)

Comparison 5

(2) correlation: Cauchy SQ, PowerExp
(3) selection method: TS, UCB, Greedy
Rimseperpoint TRUE, grid size set to medium
(5) functions: beam, OTL, piston, borehole, wing
10 replicates

e5 <- readRDS("ComparerRun5_completed.rds")
e5df <- e5$outcleandf

e5$completed_runs %>% summary

ggplot(data=e5df, mapping=aes(x=nallotted,y=RMSE, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr, scale="free_y") + ggplot2::scale_y_log10()

ggplot(data=e5df, mapping=aes(x=nallotted,y=RMSE, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr + sel.method, scale="free_y") + ggplot2::scale_y_log10()

ggplot(data=e5df, mapping=aes(x=nallotted,y=score, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr, scale="free_y")

ggplot(data=e5df, mapping=aes(x=nallotted,y=CRPscore, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr, scale="free_y") + scale_y_log10()

ggplot(data=e5df, mapping=aes(x=nallotted,y=CRPscore, shape=factor(batchsize), color=sel.method)) + geom_point() +
  facet_grid(f ~ corr+sel.method, scale="free_y") + scale_y_log10()

Regression coefficients

sapply(unique(e5df$nallotted), function(nn) lm(score ~ corr + sel.method, e5df %>% filter(nallotted==nn))$coeff) %>% rbind(nallotted=unique(e5df$nallotted)) %>% round(4)

sapply(unique(e5df$nallotted), function(nn) lm(log(RMSE) ~ corr + sel.method, e5df %>% filter(nallotted==nn))$coeff) %>% rbind(nallotted=unique(e5df$nallotted)) %>% round(4)

sapply(unique(e5df$nallotted), function(nn) lm(log(elapsedtime,2) ~ corr + sel.method, e5df %>% filter(nallotted==nn))$coeff) %>% rbind(nallotted=unique(e5df$nallotted)) %>% round(4)

Regression coefficients by function

c1 <- plyr::ddply(e5df, c("f", "nallotted"), 
            function(x) {
              lm(score ~ corr + sel.method, x)$coeff %>% round(4)
})
ggplot(data=reshape2::melt(c1, c("f", "nallotted")) %>% filter(variable!="(Intercept)"), mapping=aes(nallotted, value)) + geom_point() +facet_grid(f ~ variable)

c2 <- plyr::ddply(e5df, c("f", "nallotted"), 
            function(x) {
              lm(log(RMSE) ~ corr + sel.method, x)$coeff %>% round(4)
})
ggplot(data=reshape2::melt(c2, c("f", "nallotted")) %>% filter(variable!="(Intercept)"), mapping=aes(nallotted, value)) + geom_point() +facet_grid(f ~ variable)

c3 <- plyr::ddply(e5df, c("f", "nallotted"), 
            function(x) {
              lm(log(elapsedtime,2) ~ corr + sel.method, x)$coeff %>% round(4)
})
ggplot(data=reshape2::melt(c3, c("f", "nallotted")) %>% filter(variable!="(Intercept)"), mapping=aes(nallotted, value)) + geom_point() +facet_grid(f ~ variable)