Nothing
#' Perform leave-one-out cross validation
#'
#' @description Perform leave-one-out cross validation with options for computationally
#' efficient approximations for big data.
#'
#' @param object A fitted model object from [splm()], [spautor()], [spglm()], or [spgautor()].
#' @param cv_predict A logical indicating whether the leave-one-out fitted values
#' should be returned. Defaults to \code{FALSE}. If \code{object} is from [spglm()] or [spgautor()],
#' the fitted values returned are on the link scale.
#' @param se.fit A logical indicating whether the leave-one-out
#' prediction standard errors should be returned. Defaults to \code{FALSE}.
#' If \code{object} is from [spglm()] or [spgautor()],
#' the standard errors correspond to the fitted values returned on the link scale.
#' @param local A list or logical. If a list, specific list elements described
#' in [predict.spmodel()] control the big data approximation behavior.
#' If a logical, \code{TRUE} chooses default list elements for the list version
#' of \code{local} as specified in [predict.spmodel()]. Defaults to \code{FALSE},
#' which performs exact computations.
#' @param ... Other arguments. Not used (needed for generic consistency).
#'
#' @details Each observation is held-out from the data set and the remaining data
#' are used to make a prediction for the held-out observation. This is compared
#' to the true value of the observation and several fit statistics are computed:
#' bias, mean-squared-prediction error (MSPE), root-mean-squared-prediction
#' error (RMSPE), and the squared correlation (cor2) between the observed data
#' and leave-one-out predictions (regarded as a prediction version of r-squared
#' appropriate for comparing across spatial and nonspatial models). Generally,
#' bias should be near zero for well-fitting models. The lower the MSPE and RMSPE,
#' the better the model fit (according to the leave-out-out criterion).
#' The higher the cor2, the better the model fit (according to the leave-out-out
#' criterion). cor2 is not returned when \code{object} was fit using
#' \code{spglm()} or \code{spgautor()}, as it is only applicable here for linear models.
#'
#' @return If \code{cv_predict = FALSE} and \code{se.fit = FALSE},
#' a fit statistics tibble (with bias, MSPE, RMSPE, and cor2; see Details).
#' If \code{cv_predict = TRUE} or \code{se.fit = TRUE},
#' a list with elements: \code{stats}, a fit statistics tibble
#' (with bias, MSPE, RMSPE, and cor2; see Details); \code{cv_predict}, a numeric vector
#' with leave-one-out predictions for each observation (if \code{cv_predict = TRUE});
#' and \code{se.fit}, a numeric vector with leave-one-out prediction standard
#' errors for each observation (if \code{se.fit = TRUE}).
#'
#' @order 1
#' @export
#'
#' @examples
#' spmod <- splm(z ~ water + tarp,
#' data = caribou,
#' spcov_type = "exponential", xcoord = x, ycoord = y
#' )
#' loocv(spmod)
#' loocv(spmod, cv_predict = TRUE, se.fit = TRUE)
loocv <- function(object, ...) {
UseMethod("loocv", object)
}
#' @rdname loocv
#' @method loocv splm
#' @order 2
#' @export
loocv.splm <- function(object, cv_predict = FALSE, se.fit = FALSE, local, ...) {
if (missing(local)) {
local <- NULL
}
# iid if relevant otherwise pass
if (inherits(coef(object, type = "spcov"), c("none", "ie")) && is.null(object$random)) {
return(loocv_iid(object, cv_predict, se.fit, local))
}
# local prediction list
# local stuff
if (is.null(local)) {
if (object$n > 5000) {
local <- TRUE
message("Because the sample size exceeds 5000, we are setting local = TRUE to perform computationally efficient approximations. To override this behavior and compute the exact solution, rerun loocv() with local = FALSE. Be aware that setting local = FALSE may result in exceedingly long computational times.")
} else {
local <- FALSE
}
}
local_list <- get_local_list_prediction(local)
if (local_list$method == "all") {
# spcov_params_val <- coef(object, type = "spcov")
# dist_matrix <- spdist(object$obdata, object$xcoord, object$ycoord)
# randcov_params_val <- coef(object, type = "randcov")
# if (is.null(object$random)) {
# randcov_names <- NULL
# randcov_Zs <- NULL
# } else {
# randcov_names <- get_randcov_names(object$random)
# randcov_Zs <- get_randcov_Zs(object$obdata, randcov_names)
# }
# partition_matrix_val <- partition_matrix(object$partition_factor, object$obdata)
# cov_matrix_val <- cov_matrix(
# spcov_params_val, dist_matrix, randcov_params_val,
# randcov_Zs, partition_matrix_val
# )
cov_matrix_val <- covmatrix(object)
# actually need inverse because of HW blocking
cov_matrixInv_val <- chol2inv(chol(forceSymmetric(cov_matrix_val)))
model_frame <- model.frame(object)
X <- model.matrix(object)
y <- model.response(model_frame)
yX <- cbind(y, X)
SigInv_yX <- cov_matrixInv_val %*% yX
# parallel stuff
if (local_list$parallel) {
cl <- parallel::makeCluster(local_list$ncores)
cv_predict_val_list <- parallel::parLapply(cl, seq_len(object$n), get_loocv,
Sig = cov_matrix_val,
SigInv = cov_matrixInv_val, Xmat = X, y = y, yX = yX,
SigInv_yX = SigInv_yX, se.fit = se.fit
)
cl <- parallel::stopCluster(cl)
} else {
cv_predict_val_list <- lapply(seq_len(object$n), get_loocv,
Sig = cov_matrix_val,
SigInv = cov_matrixInv_val, Xmat = X, y = y, yX = yX,
SigInv_yX = SigInv_yX, se.fit = se.fit
)
}
# cv_predict_val <- unlist(cv_predict_val_list)
cv_predict_val <- vapply(cv_predict_val_list, function(x) x$pred, numeric(1))
if (se.fit) {
cv_predict_se <- vapply(cv_predict_val_list, function(x) x$se.fit, numeric(1))
}
} else {
model_frame <- model.frame(object)
y <- model.response(model_frame)
extra_randcov_list <- get_extra_randcov_list(object, object$obdata, newdata = object$obdata)
extra_partition_list <- get_extra_partition_list(object, object$obdata, newdata = object$obdata)
if (local_list$parallel) {
# turn of parallel as it is used different in predict
local_list$parallel <- FALSE
cl <- parallel::makeCluster(local_list$ncores)
cv_predict_val_list <- parallel::parLapply(cl, seq_len(object$n), loocv_local, object, se.fit, local_list, extra_randcov_list = extra_randcov_list, extra_partition_list = extra_partition_list)
cl <- parallel::stopCluster(cl)
} else {
cv_predict_val_list <- lapply(seq_len(object$n), loocv_local, object, se.fit, local_list,
extra_randcov_list = extra_randcov_list, extra_partition_list = extra_partition_list)
}
if (se.fit) {
cv_predict_val <- vapply(cv_predict_val_list, function(x) x$fit, numeric(1))
cv_predict_se <- vapply(cv_predict_val_list, function(x) x$se.fit, numeric(1))
} else {
cv_predict_val <- unlist(cv_predict_val_list)
}
}
cv_predict_error <- y - cv_predict_val
bias <- mean(cv_predict_error)
MSPE <- mean((cv_predict_error)^2)
RMSPE <- sqrt(MSPE)
cor2 <- cor(cv_predict_val, y)^2
loocv_stats <- tibble(
bias = bias,
MSPE = MSPE,
RMSPE = RMSPE,
cor2 = cor2
)
if (!cv_predict && ! se.fit) {
return(loocv_stats)
} else {
loocv_out <- list()
loocv_out$stats <- loocv_stats
if (cv_predict) {
loocv_out$cv_predict <- cv_predict_val
}
if (se.fit) {
loocv_out$se.fit <- as.vector(cv_predict_se)
}
return(loocv_out)
}
# if (cv_predict) {
# if (se.fit) {
# cv_output <- list(mspe = mean((cv_error_val)^2), cv_predict = as.vector(cv_predict_val), se.fit = as.vector(cv_predict_se))
# } else {
# cv_output <- list(mspe = mean((cv_error_val)^2), cv_predict = as.vector(cv_predict_val))
# }
# } else {
# if (se.fit) {
# cv_output <- list(mspe = mean((cv_error_val)^2), se.fit = as.vector(cv_predict_se))
# } else {
# cv_output <- mean((cv_error_val)^2)
# }
# }
# cv_output
}
#' @rdname loocv
#' @method loocv spautor
#' @order 3
#' @export
loocv.spautor <- function(object, cv_predict = FALSE, se.fit = FALSE, local, ...) {
if (missing(local)) {
local <- NULL
}
local_list <- get_local_list_prediction(local)
# local not used but needed for S3
# spcov_params_val <- coef(object, type = "spcov")
# dist_matrix <- object$W
# randcov_params_val <- coef(object, type = "randcov")
# if (is.null(object$random)) {
# randcov_names <- NULL
# randcov_Zs <- NULL
# } else {
# randcov_names <- get_randcov_names(object$random)
# randcov_Zs <- get_randcov_Zs(object$data, randcov_names)
# }
# partition_matrix_val <- partition_matrix(object$partition_factor, object$data)
# cov_matrix_val <- cov_matrix(
# spcov_params_val, dist_matrix, randcov_params_val,
# randcov_Zs, partition_matrix_val, object$M
# )
# cov_matrix_obs_val <- cov_matrix_val[object$observed_index, object$observed_index, drop = FALSE]
cov_matrix_obs_val <- covmatrix(object)
# actually need inverse because of HW blocking
cov_matrixInv_obs_val <- chol2inv(chol(forceSymmetric(cov_matrix_obs_val)))
model_frame <- model.frame(object)
X <- model.matrix(object)
y <- model.response(model_frame)
yX <- cbind(y, X)
SigInv_yX <- cov_matrixInv_obs_val %*% yX
# parallel stuff
if (local_list$parallel) {
cl <- parallel::makeCluster(local_list$ncores)
cv_predict_val_list <- parallel::parLapply(cl, seq_len(object$n), get_loocv,
Sig = cov_matrix_obs_val,
SigInv = cov_matrixInv_obs_val, Xmat = X, y = y, yX = yX,
SigInv_yX = SigInv_yX, se.fit = se.fit
)
cl <- parallel::stopCluster(cl)
} else {
cv_predict_val_list <- lapply(seq_len(object$n), get_loocv,
Sig = cov_matrix_obs_val,
SigInv = cov_matrixInv_obs_val, Xmat = X, y = y, yX = yX,
SigInv_yX = SigInv_yX, se.fit = se.fit
)
}
# cv_predict_val <- unlist(cv_predict_val_list)
cv_predict_val <- vapply(cv_predict_val_list, function(x) x$pred, numeric(1))
if (se.fit) {
cv_predict_se <- vapply(cv_predict_val_list, function(x) x$se.fit, numeric(1))
}
cv_predict_error <- y - cv_predict_val
bias <- mean(cv_predict_error)
MSPE <- mean((cv_predict_error)^2)
RMSPE <- sqrt(MSPE)
cor2 <- cor(cv_predict_val, y)^2
loocv_stats <- tibble(
bias = bias,
MSPE = MSPE,
RMSPE = RMSPE,
cor2 = cor2
)
if (!cv_predict && ! se.fit) {
return(loocv_stats)
} else {
loocv_out <- list()
loocv_out$stats <- loocv_stats
if (cv_predict) {
loocv_out$cv_predict <- cv_predict_val
}
if (se.fit) {
loocv_out$se.fit <- as.vector(cv_predict_se)
}
return(loocv_out)
}
#
#
#
# if (cv_predict) {
# if (se.fit) {
# cv_output <- list(mspe = mean((cv_predict_val - y)^2), cv_predict = as.vector(cv_predict_val), se.fit = as.vector(cv_predict_se))
# } else {
# cv_output <- list(mspe = mean((cv_predict_val - y)^2), cv_predict = as.vector(cv_predict_val))
# }
# } else {
# if (se.fit) {
# cv_output <- list(mspe = mean((cv_predict_val - y)^2), se.fit = as.vector(cv_predict_se))
# } else {
# cv_output <- mean((cv_predict_val - y)^2)
# }
# }
# cv_output
}
loocv_local <- function(row, object, se.fit, local_list,
extra_randcov_list = NULL, extra_partition_list = NULL) {
newdata <- object$obdata[row, , drop = FALSE]
object$obdata <- object$obdata[-row, , drop = FALSE]
# this is all so the randcov and partition steps are not repeated for each iteration
if (!is.null(extra_randcov_list)) {
extra_randcov_list$Z_index_obdata_list <- lapply(extra_randcov_list$Z_index_obdata_list,
function(x) {
x$reform_bar2_vals <- x$reform_bar2_vals[-row]
x
})
}
if (!is.null(extra_partition_list)) {
extra_partition_list$partition_index_obdata$reform_bar2_vals <- extra_partition_list$partition_index_obdata$reform_bar2_vals[-row]
}
predict(object, newdata = newdata, se.fit = se.fit, local = local_list,
extra_randcov_list = extra_randcov_list,
extra_partition_list = extra_partition_list
)
}
loocv_iid <- function(object, cv_predict, se.fit, local) {
# set to FALSE unless it is a list with parallel
if (is.null(local) || is.logical(local)) local <- FALSE
local_list <- get_local_list_prediction(local)
model_frame <- model.frame(object)
X <- model.matrix(object)
y <- model.response(model_frame)
cv_predict_error <- residuals(object) / (1 - hatvalues(object))
cv_predict_val <- y - cv_predict_error
# parallel stuff
if (se.fit) {
total_var <- coef(object, type = "spcov")[["ie"]]
if (local_list$parallel) {
cl <- parallel::makeCluster(local_list$ncores)
cv_predict_se_list <- parallel::parLapply(cl, seq_len(object$n), get_loocv_iid_se,
vcov(object), Xmat = X, total_var = total_var)
cl <- parallel::stopCluster(cl)
} else {
cv_predict_se_list <- lapply(seq_len(object$n), get_loocv_iid_se, vcov(object),
Xmat = X, total_var = total_var)
}
cv_predict_se <- vapply(cv_predict_se_list, function(x) x$se.fit, numeric(1))
}
bias <- mean(cv_predict_error)
MSPE <- mean((cv_predict_error)^2)
RMSPE <- sqrt(MSPE)
cor2 <- cor(cv_predict_val, y)^2
loocv_stats <- tibble(
bias = bias,
MSPE = MSPE,
RMSPE = RMSPE,
cor2 = cor2
)
if (!cv_predict && ! se.fit) {
return(loocv_stats)
} else {
loocv_out <- list()
loocv_out$stats <- loocv_stats
if (cv_predict) {
loocv_out$cv_predict <- cv_predict_val
}
if (se.fit) {
loocv_out$se.fit <- as.vector(cv_predict_se)
}
return(loocv_out)
}
# if (cv_predict) {
# if (se.fit) {
# cv_output <- list(mspe = mean((loocv_error)^2), cv_predict = as.vector(cv_predict_val), se.fit = as.vector(cv_predict_se))
# } else {
# cv_output <- list(mspe = mean((loocv_error)^2), cv_predict = as.vector(cv_predict_val))
# }
# } else {
# if (se.fit) {
# cv_output <- list(mspe = mean((loocv_error)^2), se.fit = as.vector(cv_predict_se))
# } else {
# cv_output <- mean((loocv_error)^2)
# }
# }
# cv_output
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.