tests/testthat/testpred.R

context("testpred")

test_that("Prediction matches exact on small samples", {
  
  # Use borehole
  borehole <- function(x) {
    rw <- x[, 1] * (0.15 - 0.05) + 0.05
    r <-  x[, 2] * (50000 - 100) + 100
    Tu <- x[, 3] * (115600 - 63070) + 63070
    Hu <- x[, 4] * (1110 - 990) + 990
    Tl <- x[, 5] * (116 - 63.1) + 63.1
    Hl <- x[, 6] * (820 - 700) + 700
    L <-  x[, 7] * (1680 - 1120) + 1120
    Kw <- x[, 8] * (12045 - 9855) + 9855
    
    m1 <- 2 * pi * Tu * (Hu - Hl)
    m2 <- log(r / rw)
    m3 <- 1 + 2 * L * Tu / (m2 * rw ^ 2 * Kw) + Tu / Tl
    return(m1 / m2 / m3)
  }
  d = 8
  testf<-function (x) {  return(borehole(x))} 
  
  # Create covariance function for vectors
  CorrMatCauchySQTPts <- function(x1, x2, theta) {
    
    expTILT = exp((theta[3]))
    expLS = exp(3*(theta[1]))
    x1t = (x1+10^(-2))^expTILT
    x2t = (x2+10^(-2))^expTILT
    x1ts = x1t/expLS
    x2ts = x2t/expLS
    
    diffmat =abs(outer(x1ts,x2ts,'-')); 
    expHE = exp(3*(theta[2]))
    h = diffmat
    alpha = 2*exp(5)/(1+exp(5))
    halpha = h^alpha
    pow = -expHE/alpha
    
    (1+halpha)^pow
    
  }
  CorrMatCauchySQTVecs <- function(x1,x2, theta) {
    prod(sapply(1:length(x1), function(i) {
      CorrMatCauchySQTPts(x1[i], x2[i], theta[(1+3*i-3):(3*i)])
    }))
  }
  CorrMatCauchySQTFull <- function(X, theta) {
    n <- nrow(X)
    outer(1:n, 1:n, Vectorize(function(i,j) {CorrMatCauchySQTVecs(X[i,],X[j,],theta)}))
  }
  CorrMatCauchySQTFull2 <- function(X, U, theta) {
    outer(1:nrow(X), 1:nrow(U), Vectorize(function(i,j) {CorrMatCauchySQTVecs(X[i,],U[j,],theta)}))
  }
  
  SG = CGGPcreate(d=d,31, corr="CauchySQT") # Need size to be small to avoid computationally singular in solve
  Y = testf(SG$design) #the design is $design, simple enough, right?
  
  n <- 50
  xp <- matrix(runif(d*n),n,d)
  
  # Get error if not fit yet
  expect_error(CGGPpred(SG, xp))
  
  # Then fit
  SG <- CGGPfit(CGGP=SG, Y=Y)
  # Now it should work
  expect_error(SGpred <- CGGPpred(xp=xp, CGGP=SG), NA)
  # Get error if wrong order of args
  expect_error(CGGPpred(xp, SG))
  
  my <- mean(Y)
  dy <- Y - my
  Sig <- CorrMatCauchySQTFull(SG$design, theta=SG$thetaMAP)
  s <- CorrMatCauchySQTFull2(xp, SG$design, theta = SG$thetaMAP)
  expred <- my + s %*% solve(Sig, dy)
  
  # Check mean predictions
  # plot(expred, SGpred$mean); abline(a=0,b=1, col=2)
  expect_equal(SGpred$mean, expred)
  
  # Test var predictions
  # Calculating s2 like this doesn't work since we use MAP
  # s2 <- c(t(Y) %*%solve(Sig, Y) / length(Y))
  # Just use the MAP value
  s2 <- SG$sigma2MAP
  exvar <- s2 * (1 - colSums(t(s) * solve(Sig, t(s))))
  if (F) {
    print(1/SGpred$var* exvar)
    plot(exvar, SGpred$var); abline(a=0,b=1, col=2)
  }
  expect_equal(c(SGpred$var), exvar)
  
  rm(my, dy, Sig, s, expred, SGpred)
  
  # -----------------------------
  # And check grid with supp data
  # -----------------------------
  ns <- 10
  Xs <- matrix(runif(ns*d), ns, d)
  Ys <- testf(Xs)
  SGs <- CGGPfit(SG, SG$Y, Xs=Xs, Ys=Ys, HandlingSuppData="Correct")
  rm(SG)
  SGpred <- CGGPpred(xp=xp, CGGP=SGs)

  Xall <- rbind(SGs$design, Xs)
  Yall <- c(SGs$Y, Ys)
  my <- mean(Yall)
  dy <- Yall - my
  Sig <- CorrMatCauchySQTFull(Xall, theta=SGs$thetaMAP)
  s <- CorrMatCauchySQTFull2(xp, Xall, theta = SGs$thetaMAP)
  expred <- my + s %*% solve(Sig, dy)

  # Check mean predictions
  if (F) {
    plot(expred, SGpred$mean); abline(a=0,b=1, col=2)
    summary((c(SGpred$mean)- expred) / expred)
  }
  expect_equal(SGpred$mean, expred, 1e-2)

  # Test var predictions
  # Calculating s2 like this doesn't work since we use MAP
  # s2 <- c(t(Y) %*%solve(Sig, Y) / length(Y))
  # Just use the MAP value
  s2 <- SGs$sigma2MAP
  exvar <- s2 * (1 - colSums(t(s) * solve(Sig, t(s))))
  if (F) {
    print(1/SGpred$var* exvar)
    plot(exvar, SGpred$var); abline(a=0,b=1, col=2)
    summary((c(SGpred$var)- exvar) / exvar)
  }
  expect_equal(c(SGpred$var), exvar)
  
  rm(SGs, Xall, Yall, my, dy, Sig, s, expred, SGpred, s2, exvar)
  
  
  # ----------------------------
  # Pred with only supp is exact
  # ----------------------------
  sg <- CGGPcreate(d, 0, Xs=Xs, Ys=Ys, corr="CauchySQT")
  SGpred <- predict(sg, xp)
  my <- mean(Ys)
  dy <- Ys - my
  Sig <- CorrMatCauchySQTFull(Xs, theta=sg$thetaMAP)
  s <- CorrMatCauchySQTFull2(xp, Xs, theta = sg$thetaMAP)
  expred <- my + s %*% solve(Sig, dy)
  
  # Check mean predictions
  if (F) {
    plot(expred, SGpred$mean); abline(a=0,b=1, col=2)
    summary((c(SGpred$mean)- expred) / expred)
  }
  expect_equal(SGpred$mean, expred)
  
  # Test var predictions
  # Calculating s2 like this doesn't work since we use MAP
  # s2 <- c(t(Y) %*%solve(Sig, Y) / length(Y))
  # Just use the MAP value
  s2 <- sg$sigma2MAP
  exvar <- s2 * (1 - colSums(t(s) * solve(Sig, t(s))))
  if (F) {
    print(1/SGpred$var* exvar)
    plot(exvar, SGpred$var); abline(a=0,b=1, col=2)
    summary((c(SGpred$var)- exvar) / exvar)
  }
  expect_equal(c(SGpred$var), exvar, tol=1e-5)
})

test_that("predMV works", {
  SG <- CGGPcreate(d=3, batchsize=100)
  f1 <- function(x){x[1]+x[2]^2}
  f2 <- function(x){x[1]^1.3+.4*sin(6*x[2])+10}
  y1 <- apply(SG$design, 1, f1)#+rnorm(1,0,.01)
  y2 <- apply(SG$design, 1, f2)#+rnorm(1,0,.01)
  y <- cbind(y1, y2)
  SG <- CGGPfit(SG, Y=y)
  yMVpred <- CGGPpred(SG$design, CGGP=SG)$mean
  expect_equal(yMVpred[,1], y1, 1e-4)
  expect_equal(yMVpred[,2], y2, 1e-4)
  
  # Make sure predict generic works
  yMVpredS3 <- predict(SG, SG$design)$mean
  expect_equal(yMVpred, yMVpredS3)
  rm(yMVpred, yMVpredS3)
  
  # Check outdims
  # Get error if not suitable
  expect_error(CGGPpred(SG$design, CGGP=SG, outdims = 2))
  SGsep <- CGGPfit(SG, SG$Y, separateoutputparameterdimensions = T)
  yMVpred_o2 <- CGGPpred(SG$design, CGGP=SGsep, outdims = 2)$mean
  # Second dim should match, first should not.
  expect_equal(yMVpred_o2[,2], y2, 1e-4)
  # expect_true(all(abs(yMVpred_o2[,1] - y1)> 1e-4)) # This was failing on Travis
  expect_true(mean(abs(yMVpred_o2[,1] - y1)> 1e-4) > 0.9,
              info = paste(round(c(yMVpred_o2[1,1], sort(y1)),5),
                           collapse = " ")) # Maybe a couple are close
  
  # Doesn't work since there's no way to update Y without updating parameters too.
  # xpred <- matrix(runif(100*3),100,3)
  # SG1 <- CGGPfit(SG, Y=y1)
  # SG2 <- CGGPfit(SG, Y=y2)
  # y1pred <- CGGPpred(xpred, SG=SG1)$mean
  # y2pred <- CGGPpred(xpred, SG=SG2)$mean
  # yMVpred <- CGGPpred(xpred, SG=SG)$mean
  # expect_equal(yMVpred[,1], c(y1pred), tol=1e-2)
  # expect_equal(yMVpred[,2], c(y2pred), tol=1e-2)
})

test_that("Supplemented works", {
  d <- 3
  SG <- CGGPcreate(d=d, batchsize=100)
  f1 <- function(x){x[1]+sin(2*pi*x[1]) + x[2]^2}
  y1 <- apply(SG$design, 1, f1)
  SG <- CGGPfit(SG, Y=y1)
  
  # Add supplemental data
  nsup <- 20
  xsup <- matrix(runif(d*nsup), nsup, d)
  ysup <- apply(xsup, 1, f1)
  # SG$supplemented <- TRUE
  # SG$Xs <- xsup
  # SG$Ys <- ysup
  
  # # Get error when not fit
  # expect_error(CGGPpred(xp=xsup, SG=SG))
  
  # Should work after fitting
  SG <- CGGPfit(SG, Y=y1, Xs=xsup, Ys=ysup)
  
  # Predictions should match values at supplemented points
  expect_equal(c(CGGPpred(xp=xsup, CGGP=SG)$me), ysup, tol=1e-4)
  
  # # Predict at points
  # n <- 50
  # xp <- matrix(runif(d*10),n,d)
  # SGpred <- CGGPpred(xp=xp, SG=SG)
})

test_that("supplemental with MV output works", {
  
  SG <- CGGPcreate(d=3, batchsize=100)
  f1 <- function(x){x[1]+x[2]^2}
  f2 <- function(x){x[1]^1.3+.4*sin(6*x[2])+10}
  y1 <- apply(SG$design, 1, f1)#+rnorm(1,0,.01)
  y2 <- apply(SG$design, 1, f2)#+rnorm(1,0,.01)
  y <- cbind(y1, y2)
  
  xsup <- matrix(runif(3*30), ncol=3)
  ysup1 <- apply(xsup, 1, f1)
  ysup2 <- apply(xsup, 1, f2)
  ysup <- cbind(ysup1, ysup2)
  
  SG <- CGGPfit(SG, Y=y, Xs=xsup, Ys=ysup)
  yMVpred <- CGGPpred(SG$design, CGGP=SG)$mean
  # Making tol as big as 1e-3 since 1e-4 gives errors on Travis
  expect_equal(yMVpred[,1], y1, 1e-3)
  expect_equal(yMVpred[,2], y2, 1e-3)
  ysuppred <- CGGPpred(xsup, CGGP=SG)$mean
  expect_equal(ysuppred[,1], ysup1, 1e-3)
  expect_equal(ysuppred[,2], ysup2, 1e-3)
  
  # Doesn't work when giving in theta
  expect_error(CGGPpred(SG, xsup, theta=SG$thetaPostSamples[,1]))
  
})

test_that("pred with theta works", {
  d <- 5
  sg <- CGGPcreate(d=d, batchsize=1500)
  expect_error(CGGPpred(sg$design, sg))
  f <- function(x){x[1]^1.3+.4*sin(6*x[2])+4*exp(x[3])}
  y <- apply(sg$design, 1, f)
  sg <- CGGPfit(sg, y)
  
  npred <- 10
  xpred <- matrix(runif(d*npred), npred)
  p1 <- CGGPpred(xpred, CGGP=sg)
  p2 <- CGGPpred(xpred, CGGP=sg, theta=sg$thetaMAP)
  expect_error(CGGPpred(xpred, CGGP=sg, theta=sg$thetaPostSamples[1:8,1]))
  p3 <- CGGPpred(xpred, CGGP=sg, theta=sg$thetaPostSamples[,1])
  expect_equal(p1$me, p2$me)
  expect_false(isTRUE(all.equal(p1$me, p3$me)))
  
  # Check full Bayesian. Not easy, just check if it's close to MAP prediction
  expect_error(CGGPpred(xp=xpred, sg, theta = sg$thetaPostSamples[1,]))
  pb <- CGGPpred(xp=xpred, sg)
  expect_is(pb, "list")
  expect_equal(p1$me, p3$mean, tol=1e-2)
  # expect_equal(c(p1$me /p3$mean), rep(1,10), tol=1e-2)
  # Variances can be near zero, so use relative since all.equal won't for small values
  expect_equal(c(p1$var /p3$var), rep(1,10), tol=.5) # Huge tolerance since I don't know how to check it
  
})
CollinErickson/CGGP documentation built on Feb. 6, 2024, 2:24 a.m.