base_name = paste(knitr::current_input(), '_figs/', sep='') knitr::opts_chunk$set( cache.path=paste('_knitr_cache/', base_name, sep='/'), fig.path=paste('figure/', base_name, sep='/'), dpi=300 ) suppressPackageStartupMessages(library(tidyverse)) suppressPackageStartupMessages(library(mvtnorm)) suppressPackageStartupMessages(library(viridis)) suppressPackageStartupMessages(library(lhs))
f = function(theta) { theta = matrix(theta, ncol=2) x = cbind(theta[,1], theta[,2]+0.03*theta[,1]^2-3) dmvnorm(x, c(0,0), diag(c(100, 1))) } df = crossing(theta1 = seq(-22, 22, .5), theta2 = seq(-12,7,.5)) %>% mutate(f = f(cbind(theta1,theta2))) ggplot(df) + geom_raster(aes(x=theta1, y=theta2, fill=f)) + geom_contour(aes(x=theta1, y=theta2, z=f), col='black', bins=10) + scale_fill_viridis()
set.seed(123) lhs = maximinLHS(n=100, k=2) %>% as_data_frame %>% setNames(c('theta1', 'theta2')) %>% mutate(theta1 = theta1 * 40 - 20, theta2 = theta2 * 15 - 10) design = lhs %>% mutate(f = f(cbind(theta1, theta2))) ggplot(design) + geom_point(aes(x=theta1, y=theta2, colour=f), cex=4) + scale_colour_viridis()
doit = function(design, sigma2=NULL) { m = nrow(design) theta = design %>% select(-f) %>% as.matrix ff = design %>% select(f) %>% pull GGfun = function(xx, yy, sigma2) { drop(exp(-0.5/sigma2[1] * outer(xx[,1], yy[,1], '-')^2 -0.5/sigma2[2] * outer(xx[,2], yy[,2], '-')^2)) } # leave-one-out MSE as function of the kernel width wmscv = function(sigma2) { GGinv_ = solve(GGfun(theta, theta, sigma2)) ee_ = drop(1/diag(GGinv_) * GGinv_ %*% sqrt(ff)) wmscv = 1/m * drop(ee_ %*% diag(diag(GGinv_)) %*% ee_) return(wmscv) } # minimise wmscv wrt sigma2 if (is.null(sigma2)) { opt = optim(c(1,1), wmscv) sigma2 = opt$par } # apply approximation GG = GGfun(theta, theta, sigma2) GGinv = solve(GG) bb = drop(solve(GG, sqrt(ff))) ee = drop(1/diag(GGinv) * GGinv %*% sqrt(ff)) ans = list(GGfun=GGfun, theta=theta, sigma2=sigma2, ff=ff, bb=bb, GG=GG, GG2=sqrt(GG), ee=ee, GGinv=GGinv) class(ans) = 'doit' return(ans) } approx.doit = function(obj, r) { # approximate target function at input points r with(obj, { bGG2b_ = drop(bb %*% GG2 %*% bb) post_r = apply(r, 1, function(rr) { gg = GGfun(matrix(rr, nrow=1), theta, sigma2) (sum(bb * gg))^2 / (sqrt(pi^2*prod(sigma2)) * bGG2b_) }) return(post_r) }) }
DOIT = doit(design) post_r_approx = df %>% mutate(fhat = approx.doit(DOIT, cbind(theta1, theta2)))
ggplot(post_r_approx, aes(x=theta1, y=theta2)) + geom_raster(aes(fill=f)) + geom_contour(aes(z=fhat, colour=..level..)) + geom_point(data=design, mapping=aes(x=theta1, y=theta2), pch=1, cex=4, colour='white') + scale_colour_viridis() + scale_fill_viridis()
pv_df = df %>% select(theta1, theta2) %>% mutate(predvar = sapply(1:n(), function(ii) { theta_ = c(theta1[ii], theta2[ii]) gg_ = with(DOIT, GGfun(matrix(theta_, nrow=1), theta, sigma2)) return(with(DOIT, drop(sum(bb*gg_)^2*(1-gg_ %*% GGinv %*% gg_)))) })) ggplot(pv_df) + geom_raster(aes(x=theta1, y=theta2, fill=predvar)) + scale_fill_viridis() + geom_point(data=design, aes(x=theta1, y=theta2), pch=1, cex=3, col='white')
... at locations with maximum predictive variance. Since the predictive variance surface is multimodal, the starting point matters. We use the design point with highest leave-one-out predictive variance as the starting point.
propose_new = function(obj) with(obj, { fn = function(r) { gg = GGfun(matrix(r, nrow=1), theta, sigma2) sum(bb * gg)^2 * drop(1 - gg %*% GGinv %*% gg) } vv = diag(1/diag(GGinv)) %*% (sqrt(ff) - ee)^2 optim(theta[which.max(vv), ], fn, control=list(fnscale=-1))$par })
theta_n = propose_new(DOIT) ggplot(pv_df) + geom_raster(aes(x=theta1, y=theta2, fill=predvar)) + scale_fill_viridis() + geom_point(data=design, aes(x=theta1, y=theta2), pch=1, cex=3, col='white') + geom_point(data=as.data.frame(as.list(theta_n)), aes(x=theta1, y=theta2), pch=1, cex=3, col='red')
Add 75 points sequentially (only re-estimating sigma2
on the last iteration):
n_new = 75 design_update = design DOIT_update = DOIT for (jj in 1:n_new) { theta_n = propose_new(DOIT_update) design_update = bind_rows( design_update, data_frame(theta1 = theta_n['theta1'], theta2 = theta_n['theta2'], f = f(theta_n))) if (jj %% 10 == 0) { sigma2_ = NULL } else { sigma2_ = DOIT_update$sigma2 } DOIT_update = doit(design_update, sigma2=sigma2_) }
post_r_approx_update = df %>% as_data_frame %>% mutate(fhat = approx.doit(DOIT_update, cbind(theta1, theta2))) ggplot(post_r_approx_update, aes(x=theta1, y=theta2)) + geom_raster(aes(fill=f)) + geom_contour(aes(z=fhat)) + geom_point(data=design_update %>% mutate(type=rep(c('old','new'), c(100, n()-100))), mapping=aes(x=theta1, y=theta2, colour=type), cex=2) + scale_fill_viridis()
DOIT2 = DOIT_update
d_ij = with(DOIT2, tcrossprod(bb) * GG2) sum_d_ij = sum(d_ij) nu_ij = map(list(theta1=1, theta2=2), ~with(DOIT2, 0.5 * outer(theta[,.], theta[,.], '+')))
theta1_eval = sort(unique(df$theta1)) theta1_design = DOIT2$theta[,1] sigma1 = DOIT2$sigma2[1] post_theta1 = sapply(theta1_eval, function(tt) { phi_ij = dnorm(tt, nu_ij[[1]], sqrt(sigma1/2)) sum(d_ij * phi_ij) / sum_d_ij }) data_frame(theta1 = theta1_eval, posterior = post_theta1) %>% ggplot(aes(x=theta1, y=posterior)) + geom_point() + geom_line()
theta2_eval = sort(unique(df$theta2)) theta2_design = DOIT2$theta[,2] sigma2 = DOIT2$sigma2[2] post_theta2 = sapply(theta2_eval, function(tt) { phi_ij = dnorm(tt, nu_ij[[2]], sqrt(sigma2/2)) sum(d_ij * phi_ij) / sum_d_ij }) data_frame(theta2 = theta2_eval, posterior = post_theta2) %>% ggplot(aes(x=theta2, y=posterior)) + geom_point() + geom_line()
e_theta = map_dbl(nu_ij, ~sum(d_ij * .) / sum(d_ij)) print(e_theta)
v_theta = matrix(0, 2, 2) v_theta[1,1] = sum(d_ij * nu_ij[[1]]^2) / sum_d_ij v_theta[1,2] = v_theta[2,1] = sum(d_ij * nu_ij[[1]] * nu_ij[[2]]) / sum_d_ij v_theta[2,2] = sum(d_ij * nu_ij[[2]]^2) / sum_d_ij rownames(v_theta) = colnames(v_theta) = names(e_theta) v_theta = v_theta + diag(DOIT2$sigma2)/2 - tcrossprod(e_theta) print(v_theta)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.