Interpolates between known points using Bayesian estimation
Description
Calculates the posterior distribution of results at a point using the
techniques outlined by Oakley. Function interpolant()
is the
primary function of the package. Function interpolant.quick()
gives the expectation of the emulator at a set of points, and function
interpolant()
gives the expectation and other information (such
as the variance) at a single point. Function int.qq()
gives a
quickquick vectorized interpolant using certain timesaving
assumptions.
Usage
1 2 3 4 5 6 7  interpolant(x, d, xold, Ainv=NULL, A=NULL, use.Ainv=TRUE,
scales=NULL, pos.def.matrix=NULL, func=regressor.basis,
give.full.list = FALSE, distance.function=corr, ...)
interpolant.quick(x, d, xold, Ainv=NULL, scales=NULL,
pos.def.matrix=NULL, func=regressor.basis, give.Z = FALSE,
distance.function=corr, ...)
int.qq(x, d, xold, Ainv, pos.def.matrix, func=regressor.basis)

Arguments
x 
Point(s) at which estimation is desired. For

d 
vector of observations, one for each row of 
xold 
Matrix with rows corresponding to points at which the function is known 
A 
Correlation matrix 
Ainv 
Inverse of correlation matrix 
use.Ainv 
Boolean, with default If If Note: if 
func 
Function used to determine basis vectors, defaulting
to 
give.full.list 
In 
scales 
Vector of “roughness” lengths used to calculate
Note that It's probably worth restating here that the elements of

pos.def.matrix 
A positive definite matrix that is used if

give.Z 
In function 
distance.function 
Function to compute distances between
points, defaulting to 
... 
Further arguments passed to the distance function,
usually 
Value
In function interpolant()
, if give.full.list
is
TRUE
, a list is returned with components
betahat 
Standard MLE of the (linear) fit, given the observations 
prior 
Estimate for the prior 
sigmahat.square 
Posterior estimate for variance 
mstar.star 
Posterior expectation 
cstar 
Prior correlation of a point with itself 
cstar.star 
Posterior correlation of a point with itself 
Z 
Standard deviation (although the distribution is actually a tdistribution with nq degrees of freedom) 
Author(s)
Robin K. S. Hankin
References

J. Oakley 2004. “Estimating percentiles of uncertain computer code outputs”. Applied Statistics, 53(1), pp8993.

J. Oakley 1999. “Bayesian uncertainty analysis for complex computer codes”, PhD thesis, University of Sheffield.

J. Oakley and A. O'Hagan, 2002. “Bayesian Inference for the Uncertainty Distribution of Computer Model Outputs”, Biometrika 89(4), pp769784

R. K. S. Hankin 2005. “Introducing BACCO, an R bundle for Bayesian analysis of computer code output”, Journal of Statistical Software, 14(16)
See Also
makeinputfiles
,corr
Examples
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176  # example has 10 observations on 6 dimensions.
# function is just sum( (1:6)*x) where x=c(x_1, ... , x_2)
data(toy)
val < toy
real.relation < function(x){sum( (0:6)*x )}
H < regressor.multi(val)
d < apply(H,1,real.relation)
d < jitter(d,amount=1e5) # to prevent numerical problems
fish < rep(1,6)
fish[6] < 4
A < corr.matrix(val,scales=fish)
Ainv < solve(A)
# now add some suitably correlated noise to d:
d.noisy < as.vector(rmvnorm(n=1, mean=d, 0.1*A))
names(d.noisy) < names(d)
# First try a value at which we know the answer (the first row of val):
x.known < as.vector(val[1,])
bayes.known < interpolant(x.known, d, val, Ainv=Ainv, scales=fish, g=FALSE)
print("error:")
print(d[1]bayes.known)
# Now try the same value, but with noisy data:
print("error:")
print(d.noisy[1]interpolant(x.known, d.noisy, val, Ainv=Ainv, scales=fish, g=FALSE))
#And now one we don't know:
x.unknown < rep(0.5 , 6)
bayes.unknown < interpolant(x.unknown, d.noisy, val, scales=fish, Ainv=Ainv,g=TRUE)
## [ compare with the "true" value of sum(0.5*0:6) = 10.5 ]
# Just a quickie for int.qq():
int.qq(x=rbind(x.unknown,x.unknown+0.1),d.noisy,val,Ainv,pos.def.matrix=diag(fish))
## (To find the best correlation lengths, use optimal.scales())
# Now we use the SAME dataset but a different set of basis functions.
# Here, we use the functional dependence of
# "A+B*(x[1]>0.5)+C*(x[2]>0.5)+...+F*(x[6]>0.5)".
# Thus the basis functions will be c(1,x>0.5).
# The coefficients will again be 1:6.
# Basis functions:
f < function(x){c(1,x>0.5)}
# (other examples might be
# something like "f < function(x){c(1,x>0.5,x[1]^2)}"
# now create the data
real.relation2 < function(x){sum( (0:6)*f(x) )}
d2 < apply(val,1,real.relation2)
# Define a point at which the function's behaviour is not known:
x.unknown2 < rep(1,6)
# Thus real.relation2(x.unknown2) is sum(1:6)=21
# Now try the emulator:
interpolant(x.unknown2, d2, val, Ainv=Ainv, scales=fish, g=TRUE)$mstar.star
# Heh, it got it wrong! (we know that it should be 21)
# Now try it with the correct basis functions:
interpolant(x.unknown2, d2, val, Ainv=Ainv,scales=fish, func=f,g=TRUE)$mstar.star
# That's more like it.
# We can tell that the coefficients are right by:
betahat.fun(val,Ainv,d2,func=f)
# Giving c(0:6), as expected.
# It's interesting to note that using the *wrong* basis functions
# gives the *correct* answer when evaluated at a known point:
interpolant(val[1,], d2, val, Ainv=Ainv,scales=fish, g=TRUE)$mstar.star
real.relation2(val[1,])
# Which should agree.
# Now look at Z. Define a function Z() which determines the
# standard deviation at a point near a known point.
Z < function(o) {
x < x.known
x[1] < x[1]+ o
interpolant(x, d.noisy, val, Ainv=Ainv, scales=fish, g=TRUE)$Z
}
Z(0) #should be zero because we know the answer (this is just Z at x.known)
Z(0.1) #nonzero error.
## interpolant.quick() should give the same results faster, but one
## needs a matrix:
u < rbind(x.known,x.unknown)
interpolant.quick(u, d.noisy, val, scales=fish, Ainv=Ainv,g=TRUE)
# Now an example from climate science. "results.table" is a dataframe
# of goldstein (a climate model) results. Each of its 100 rows shows a
# point in parameter space together with certain key outputs from the
# goldstein program. The following R code shows how we can set up an
# emulator based on the first 27 goldstein runs, and use the emulator to
# predict the output for the remaining 73 goldstein runs. The results
# of the emulator are then plotted on a scattergraph showing that the
# emulator is producing estimates that are close to the "real" goldstein
# runs.
data(results.table)
data(expert.estimates)
# Decide which column we are interested in:
output.col < 26
# extract the "important" columns:
wanted.cols < c(2:9,12:19)
# Decide how many to keep;
# 3040 is about the most we can handle:
wanted.row < 1:27
# Values to use are the ones that appear in goin.test2.comments:
val < results.table[wanted.row , wanted.cols]
# Now normalize val so that 0<results.table[,i]<1 is
# approximately true for all i:
normalize < function(x){(xmins)/(maxesmins)}
unnormalize < function(x){mins + (maxesmins)*x}
mins < expert.estimates$low
maxes < expert.estimates$high
jj < t(apply(val,1,normalize))
jj < as.data.frame(jj)
names(jj) < names(val)
val < jj
## The value we are interested in is the 19th (or 20th or ... or 26th) column.
d < results.table[wanted.row , output.col]
## Now some scales, estimated earlier from the data using
## optimal.scales():
scales.optim < exp(c( 2.917, 4.954, 3.354, 2.377, 2.457, 1.934, 3.395,
0.444, 1.448, 3.075, 0.052, 2.890, 2.832, 2.322, 3.092, 1.786))
A < corr.matrix(val,scales=scales.optim, method=2)
Ainv < solve(A)
print("and plot points used in optimization:")
d.observed < results.table[ , output.col]
A < corr.matrix(val,scales=scales.optim, method=2)
Ainv < solve(A)
print("now plot all points:")
design.normalized < as.matrix(t(apply(results.table[,wanted.cols],1,normalize)))
d.predicted < interpolant.quick(design.normalized , d , val , Ainv=Ainv,
scales=scales.optim)
jj < range(c(d.observed,d.predicted))
par(pty="s")
plot(d.observed, d.predicted, pch=16, asp=1,
xlim=jj,ylim=jj,
xlab=expression(paste(temperature," (",{}^o,C,"), model" )),
ylab=expression(paste(temperature," (",{}^o,C,"), emulator"))
)
abline(0,1)
