R/crossVal.R

#' @title Cross Validation
#' @description Leave-one-out cross validation
#' @param fit  Object of class \code{emulatorFit} generated by \code{\link{fitEmulator}} or \code{\link{fitEmulatorData}}
#' @param lmcompare  Compare with a linear model fitted with OLS. 
#' @param lmformula  The formula for the linear model to be used in the comparison, if \code{lmcompare} is \code{TRUE}.
#' @return This function returns the mean RMSE of fits where one training point is left out for each fit.
#' @note This function requires R package \pkg{Hmisc} to be installed.
#' @author Jeremy Oakley.
#' @export 
crossVal <- function(fit, lmcompare = FALSE, lmformula = fit$formula, plot = TRUE) {
    if (!requireNamespace("Hmisc", quietly = TRUE))
        stop("this function requires Hmisc to be installed")
    
    # ensure new and old fits are backward compatible
    fit <- updateFit(fit)
    
    cvmeans <- matrix(0, fit$n.train, fit$n.outputs)
    cvvars <- cvmeans
    
    if (is.null(fit$nugget))
        sigmasq.hat <- NULL
    else 
        sigmasq.hat <- fit$sigmasq.hat
    
    for (i in 1:fit$n.train) {
        if (is.null(fit$nugget))
            nugget <- NULL
        else 
            nugget <- fit$nugget[-i]
        
        reduced.inputs <- as.matrix(fit$training.inputs[-i, , drop = FALSE])
        colnames(reduced.inputs) <- colnames(fit$training.inputs)
        
        cvfit <- fitEmulator(inputs = reduced.inputs, outputs = as.matrix(fit$training.outputs[-i, , drop = FALSE]), 
                             prior.mean = fit$formula, cor.function = fit$cor.function, phi.opt = fit$phi.hat, 
                             sigmasq.opt = sigmasq.hat, nugget = nugget)
        
        m <- matrix(fit$training.inputs[i, , drop = FALSE], nrow = 1)
        
        colnames(m) <- colnames(fit$training.inputs)
        cvprediction <- predict(cvfit, m, var.cov = TRUE)
        cvmeans[i, ] <- cvprediction$posterior.mean
        cvvars[i, ] <- diag(cvprediction$posterior.variance)
    }
    
    emulator.df <- fit$n.train - fit$n.regressors
    
    if (!lmcompare && plot) {
        par(mfrow = c(1,1))
        Hmisc::errbar(fit$training.outputs, cvmeans, cvmeans + qt(0.975, emulator.df) * sqrt(cvvars),
                      cvmeans - qt(0.975, emulator.df) * sqrt(cvvars), pch = 20,
                      xlab = "true value", ylab = "emulator mean")
        abline(0, 1)
    }
    
    if (lmcompare) {
        lm1 <- lm(lmformula, data = data.frame(fit$training.outputs, fit$training.inputs))
        lmpredictions <- predict.lm(lm1, data.frame(fit$training.inputs), 
                                    interval = "prediction", se.fit = TRUE)
        par(mfrow = c(2,2))
        
        Hmisc::errbar(fit$training.outputs, cvmeans, cvmeans + qt(0.975, emulator.df) * sqrt(cvvars),
                      cvmeans - qt(0.975, emulator.df) * sqrt(cvvars), pch = 16,
                      xlab = "true value", ylab = "emulator mean")
        abline(0, 1)
        
        Hmisc::errbar(fit$training.outputs, lmpredictions$fit[, 1], lmpredictions$fit[, 2],
                      lmpredictions$fit[, 3], pch = 20, xlab = "true value", ylab = "linear model fit")
        abline(0, 1)
        
        plot(cvmeans, lmpredictions$fit[, 1], pch = 16, xlab = "emulator mean", 
             ylab = "linear model fit")
        abline(0,1)
        
        lm.standard.errors <- sqrt(lmpredictions$se.fit ^ 2 + lmpredictions$residual.scale ^ 2)
        
        allsds <- c(sqrt(cvvars), lm.standard.errors)
        
        plot(sqrt(cvvars), lm.standard.errors, pch = 16, xlab = "emulator s.d.",
             ylab = "linear model prediction s.e.", xlim = range(allsds), ylim = range(allsds))
        abline(0,1)
    }
    
    colnames(cvmeans) <- colnames(cvvars) <- colnames(fit$training.outputs)
    
    
    rmse <- mean((cvmeans - fit$training.outputs) ^ 2) ^ 0.5
    ret <- list(CV.mean = cvmeans, CV.var = cvvars, mean.RMSE = rmse)
    ret
} 
OakleyJ/MUCM documentation built on May 7, 2019, 9:01 p.m.