R/validateEmulator.R

#' @title Validate the Emulator

#' @description Uses diagnostics described in Bastos and O'Hagan (2009) to validate the emulator. 
#' @param emulator  Object of class \code{emulatorFit} generated by \code{\link{fitEmulator}} or \code{\link{fitEmulatorData}}
#' @param new.outputs A matrix outputs used to validate the emulator predictions. This Matrix must have columns equal to the number of outputs. 
#' @param emulator.predictions Output generated from the \code{\link{predict.emulatorFit}} function. 
#' @param which.output Defaults to 1. Is an indicative integer that validates the output stated based on column number from the left. 
#' @param plot Optionally plot validation plots. Default is set to true. 
#' @details \code{validateEmulator} returns a grid of 4 plots and prints out a value. These are described below: 
#' \tabular{ll}{ 
#' Top Left plot \tab  Plots validation output against the posterior mean generated using the emulator. The error bars are the 95percent confidence intervals of the predicted mean. Ideally, the plot should show all points close to the diagonal line with small error bars. \cr
#' Top Right plot \tab Shows the QQ plot of cholesky residuals.   
#' If the points lie close to the 45 degree line then the normality assumptions for the simulator output is reasonable. If the gradient of the points is greater than 1 (less than 1), it suggests that the predictive variability was underestimated (over estimated). 
#' Curvature in the plot indicates nonnormality, and outliers at either end of the plot suggests local fitting problems or nonstationarity.    \cr
#' Bottom Left plot \tab Plot of Pivoted cholesky prediciton errors against the pivoting index. 
#' The pivoting index gives the order of the Pivoted cholesky prediciton errors with the largest conditional predictive variance. No patterns are expected. Too many large errors indicate an underestimation of variance and vice versa.Both cases can aslo suggest a nonstationary process. 
#' Either large or very small errors at the beginning of the plot (i.e., on the left side) indicates poor estimation of predictive variance or nonstationarity. 
#' However, large (or very small) errors at the end of the plot (i.e., on the right side) indicates overestimation (or underestimation) of the correlation length parameters, or that the chosen correlation structure is unsuitable. \cr
#' Bottom right Plot \tab The red line on the plot is the value of the calculated Mahalanobis distance taking into account correlation among outputs. Ideally it should be between the boundaries of the curve. Extreme values (large or small) indicate a conflict between the emulator and simulator.   \cr } 

#' @return The class of '\code{validateEmulator}' is a list containting atleast the following components: 
#' \tabular{ll}{
#'  \code{coverage} \tab Proportion of validation outputs in 95 percent confidence intervals of the emulator predicted output \cr 
#'  \code{RMSE}  \tab    The root mean square error of the predicted values   \cr
#'  \code{normRMSE} \tab The normalised value of \code{RMSE} over the range of the data \cr}

#' @references Bastos, L. S. and O'Hagan, A. (2009). Diagnostics for gaussian process emulators, Technometrics, 51 (4): 425-438.

#' @author Originally written by Jeremy Oakley. Modified by Sajni Malde
#' @seealso \code{\link{predict.emulatorFit}} and \code{\link{fitEmulator}}

#' @export
validateEmulator <- function(emulator, new.outputs, emulator.predictions, which.output = 1, plot = TRUE) {
    
    # ensure new and old fits are backward compatible
    emulator <- updateFit(emulator)
    
    # currently not working for LMC method
    if (any(class(emulator) == "emulatorFitLMC"))
        stop("validateEmulator currently not available for LMC method")
    
    # extract output from newdata
    new.outputs <- as.matrix(new.outputs)
    
    n.validation <- nrow(new.outputs)
    
    # check if training.output and validation output are of same size
    if (emulator$n.outputs != ncol(new.outputs)) 
        stop("new.outputs is not the same size as outputs used in fitEmulator pls check new.outputs with emulator.predictions$posterior.mean")
    
    # defining em.pred.sd and post.var where available
    em.pred.sd <- NULL
    post.var.for.defined.output <- NULL
    if (emulator$n.outputs == 1) {
        if (!is.null(emulator.predictions$posterior.variance)) {
            post.var.for.defined.output <- emulator.predictions$posterior.variance
            em.pred.sd <- sqrt(diag(emulator.predictions$posterior.variance))
        } else if (!is.null(emulator.predictions$standard.deviation))
            em.pred.sd <- emulator.predictions$standard.deviation
    } else if (emulator$n.outputs > 1) {
        # exctracting particular posterior variance matrix given which.output
        emulator.predictions$posterior.mean <- emulator.predictions$posterior.mean[, which.output, drop = FALSE]
        new.outputs <- new.outputs[, which.output, drop = FALSE]
        post.var.for.defined.output <- emulator.predictions$sigmasq[which.output, which.output] %x% emulator.predictions$correlation.matrix  
        em.pred.sd <- as.vector(sqrt(diag(post.var.for.defined.output)))
    }
    
    # calculate RMSE and NormRMSE in all cases
    residuals <- new.outputs - emulator.predictions$posterior.mean
    rmse <- sqrt(mean((residuals) ^ 2))
    normrmse <- rmse/(max(new.outputs) - min(new.outputs))
    
    # saving RMSE and NormRMSE to a variable to be outputed
    val <- list(RMSE = rmse, Norm.RMSE = normrmse)
    
    # calculate coverage value if em.pred.sd is not null
    if (!is.null(em.pred.sd)) 
        val$coverage <- mean(abs(new.outputs - emulator.predictions$posterior.mean) < qnorm(0.975) * em.pred.sd, na.rm = TRUE)
    
    if (plot) {
        # setting plot window appropriately
        if (!is.null(post.var.for.defined.output)) {
            par.old <- par(mfrow = c(2, 2))
            on.exit(par(par.old))
        } 
        
        # plot of predictions against true values ... 
        if (!is.null(em.pred.sd))
            y.lim <- c(min(emulator.predictions$posterior.mean - 2 * em.pred.sd, na.rm = TRUE), max(emulator.predictions$posterior.mean + 2 * em.pred.sd, na.rm = TRUE))
        else  
            y.lim <- c(min(emulator.predictions$posterior.mean, na.rm = TRUE), max(emulator.predictions$posterior.mean, na.rm = TRUE))
        
        plot(new.outputs, emulator.predictions$posterior.mean, xlab = "simulator output", ylab = "emulator prediction", main = "Emulator means and 95% intervals", 
             ylim = y.lim)
        abline(0, 1)
        
        # ... adding error bars if em.pred.sd is not null 
        if (!is.null(em.pred.sd)) {
            
            #adding errbars
            # plotting points
            points(new.outputs, emulator.predictions$posterior.mean, pch = 20)
            
            xstart <- new.outputs - (0.015 * diff(par()$usr[1:2])/2)
            xend <- new.outputs + (0.015 * diff(par()$usr[1:2])/2)
            yminus <- emulator.predictions$posterior.mean + qnorm(0.025) * em.pred.sd
            yplus <-  emulator.predictions$posterior.mean + qnorm(0.975) * em.pred.sd   
            
            # plotting vertical lines
            segments(new.outputs, yminus, new.outputs, yplus, col = "Black")
            
            # plotting horizontal lines
            segments(xstart, yminus, xend, yminus, col = "Black")
            segments(xstart, yplus, xend, yplus, col = "Black")
            
        } else {
            # if em.pred.sd is missing there is nothing else this function can do
            warning("either emulator.prediction$posterior.variance or emulator.predictions$standard.deviation must be provided for more information from this function.")
            return(val)
        }
        
        # if posterior variance is given then 3 additional diagnostic plots are shown
        if (!is.null(post.var.for.defined.output)) {
            
            # (pivoted) Cholesky Q-Q plot
            GP.resid <- new.outputs - emulator.predictions$posterior.mean
            C <- chol(post.var.for.defined.output, pivot = TRUE)
            chol.resid <- solve(C, GP.resid[attr(C, "pivot")], tol = 1e-100)
            qqnorm(chol.resid)
            abline(0, 1)
            
            # plot of pivoted Cholesky errors
            plot(chol.resid, ylim = c(-1, 1) * max(abs(chol.resid), qnorm(0.99)), xlab = "Pivot index", ylab = "Pivoted Cholesky residual", 
                 main = "Pivoted Cholesky prediction errors")
            abline(h = c(-1, 0, 1) * qnorm(0.975), lty = c(3, 1, 3))
            
            # Mahalanobis distance
            MD.stat <- t(GP.resid) %*% solve(post.var.for.defined.output, GP.resid, tol = 1e-100) * 
                (emulator$n.train - emulator$n.regressors)/n.validation/(emulator$n.train - emulator$n.regressors - 2)
            x.end <- max(MD.stat, qf(0.999, n.validation, emulator$n.train - emulator$n.regressors)) * 1.1
            x.plot <- seq(0, x.end, length = 200)
            plot(x.plot, df(x.plot, n.validation, emulator$n.train - emulator$n.regressors), type = "l", xlab = "x", ylab = "Density", 
                 main = "Mahalanobis test")
            x.low <- seq(0, qf(0.025, n.validation, emulator$n.train - emulator$n.regressors), length = 100)
            x.upp <- seq(qf(0.975, n.validation, emulator$n.train - emulator$n.regressors), x.end, length = 100)
            polygon(c(x.low, rev(x.low)), c(df(x.low, n.validation, emulator$n.train - emulator$n.regressors), rep(0, 100)), col = "black")
            polygon(c(x.upp, rev(x.upp)), c(df(x.upp, n.validation, emulator$n.train - emulator$n.regressors), rep(0, 100)), col = "black")
            abline(v = MD.stat, col = "red")
            
        }
    }
    
    return(val)
    
} 
OakleyJ/MUCM documentation built on May 7, 2019, 9:01 p.m.