Nothing
#' Utility Functions for the cv Package
#'
#' These functions are primarily useful for writing methods for the
#' \code{\link{cv}()} generic function. They are used internally in the package
#' and can also be used for extensions (see the vignette "Extending the cv package,
#' \code{vignette("cv-extend", package="cv")}).
#'
#' @param model a regression model object.
#' @param data data frame to which the model was fit (not usually necessary,
#' except for \code{cvSelect()}).
#' @param criterion cross-validation criterion ("cost" or lack-of-fit) function of form \code{f(y, yhat)}
#' where \code{y} is the observed values of the response and
#' \code{yhat} the predicted values; the default is \code{\link{mse}}
#' (the mean-squared error).
#' @param k perform k-fold cross-validation (default is \code{10}); \code{k}
#' may be a number or \code{"loo"} or \code{"n"} for n-fold (leave-one-out)
#' cross-validation; for \code{folds()}, \code{k} must be a number.
#' @param reps number of times to replicate k-fold CV (default is \code{1}).
#' @param confint if \code{TRUE} (the default if the number of cases is 400
#' or greater), compute a confidence interval for the bias-corrected CV
#' criterion, if the criterion is the average of casewise components.
#' @param level confidence level (default \code{0.95}).
#' @param seed for R's random number generator; optional, if not
#' supplied a random seed will be selected and saved; not needed
#' for n-fold cross-validation.
#' @param details if \code{TRUE} (the default if the number of
#' folds \code{k <= 10}), save detailed information about the value of the
#' CV criterion for the cases in each fold and the regression coefficients
#' with that fold deleted.
#' @param ncores number of cores to use for parallel computations
#' (default is \code{1}, i.e., computations aren't done in parallel).
#' @param method computational method to apply; use by some \code{\link{cv}()}
#' methods.
#' @param type used by some \code{\link{cv}()} methods, such as the default method,
#' where \code{type} is passed to the \code{type} argument of \code{predict()};
#' the default is \code{type="response"}, which is appropriate, e.g., for a \code{"glm"} model
#' and may be recognized or ignored by \code{predict()} methods for other model classes.
#' @param start used by some \code{\link{cv}()} methods;
#' if \code{TRUE} (the default is \code{FALSE}), the \code{start} argument,
#' set to the vector of regression coefficients for the model fit to the full data, is passed
#' to \code{update()}, possibly making the CV updates faster, e.g. for a GLM.
#' @param n number of cases, for constructed folds.
#' @param folds an object of class \code{"folds"}.
#' @param i a fold number for an object of class \code{"folds"}.
#' @param ... to match generic; passed to \code{predict()} for the default method,
#' and to \code{fPara()} (for parallel computations) in \code{cvCompute()}.
#' @param f function to be called by \code{cvCompute()} for each fold.
#' @param fPara function to be called by \code{cvCompute()} for each fold
#' using parallel computation.
#' @param locals a named list of objects that are required in the local environment
#' of \code{cvCompute()} for \code{f()} or \code{fPara()}.
#' @param criterion.name a character string giving the name of the CV criterion function
#' in the returned \code{"cv"} object).
#' @param procedure a model-selection procedure function (see Details).
#' @param package the name of the package in which mixed-modeling function (or functions) employed resides;
#' used to get the namespace of the package.
#' @param clusterVariables a character vector of names of the variables
#' defining clusters for a mixed model with nested or crossed random effects;
#' if missing, cross-validation is performed for individual cases rather than
#' for clusters
#' @param predict.clusters.args a list of arguments to be used to predict
#' the whole data set from a mixed model when performing CV on clusters;
#' the first two elements should be
#' \code{model} and \code{newdata}; see the "Extending the cv package" vignette
#' (\code{vignette("cv-extend", package="cv")}).
#' @param predict.cases.args a list of arguments to be used to predict
#' the whole data set from a mixed model when performing CV on cases;
#' the first two elements should be
#' \code{model} and \code{newdata}; see the "Extending the cv package" vignette
#' (\code{vignette("cv-extend", package="cv")}).
#' @param fixed.effects a function to be used to compute fixed-effect
#' coefficients for cluster-based CV when \code{details = TRUE}.
#' @param save.coef save the coefficients from the selected models? Deprecated
#' in favor of the \code{details} argument; if specified, \code{details} is set
#' is set to the value of \code{save.coef}.
#' @param y.expression normally the response variable is found from the
#' \code{model} argument; but if, for a particular selection procedure, the
#' \code{model} argument is absent, or if the response can't be inferred from the
#' model, the response can be specified by an expression, such as \code{expression(log(income))},
#' to be evaluated within the data set provided by the \code{data} argument.
#' @param save.model save the model that's selected using the \emph{full} data set.
#' @param x a \code{"cv"}, \code{"cvList"}, or \code{"folds"} object to be printed
#' @param model.function a regression function, typically for a new \code{cv()} method,
#' residing in a package that's not a declared dependency of the \pkg{cv} package,
#' e.g., \code{nnet::multinom}.
#' @param model.function.name the quoted name of the regression function, e.g.,
#' \code{"multinom"}.
#' @returns
#' The utility functions return various kinds of objects:
#' \itemize{
#' \item \code{cvCompute()} returns an object of class \code{"cv"}, with the CV criterion
#' (\code{"CV crit"}), the bias-adjusted CV criterion (\code{"adj CV crit"}),
#' the criterion for the model applied to the full data (\code{"full crit"}),
#' the confidence interval and level for the bias-adjusted CV criterion (\code{"confint"}),
#' the number of folds (\code{"k"}), and the seed for R's random-number
#' generator (\code{"seed"}). If \code{details=TRUE}, then the returned object
#' will also include a \code{"details"} component, which is a list of two
#' elements: \code{"criterion"}, containing the CV criterion computed for the
#' cases in each fold; and \code{"coefficients"}, regression coefficients computed
#' for the model with each fold deleted. Some \code{cv()} methods calling \code{cvCompute()}
#' may return a subset of these components and may add additional information.
#' If \code{reps} > \code{1}, then an object of class \code{"cvList"} is returned,
#' which is literally a list of \code{"cv"} objects.
#'
#' \item \code{cvMixed()} also returns an object of class \code{"cv"} or
#' \code{"cvList"}.
#'
#' \item \code{cvSelect} returns an object of class
#' \code{"cvSelect"} inheriting from \code{"cv"}, or an object of
#' class \code{"cvSelectList"} inheriting from \code{"cvList"}.
#'
#' \item \code{folds()} returns an object of class folds, for which
#' there are \code{fold()} and \code{print()} methods.
#'
#' \item \code{GetResponse()} returns the (numeric) response variable
#' from the model.
#'
#' The supplied \code{default} method returns the \code{model$y} component
#' of the model object, or, if \code{model} is an S4 object, the result
#' returned by the \code{\link[insight]{get_response}()} function in
#' the \pkg{insight} package. If this result is \code{NULL}, the result of
#' \code{model.response(model.frame(model))} is returned, checking in any case whether
#' the result is a numeric vector.
#'
#' There are also \code{"lme"}, \code{"merMod"}
#' and \code{"glmmTMB"} methods that convert factor
#' responses to numeric 0/1 responses, as would be appropriate
#' for a generalized linear mixed model with a binary response.
#'}
#' @examples
#' fit <- lm(mpg ~ gear, mtcars)
#' GetResponse(fit)
#'
#' set.seed(123)
#' (ffs <- folds(n=22, k=5))
#' fold(ffs, 2)
#'
#' @seealso \code{\link{cv}}, \code{\link{cv.merMod}},
#' \code{\link{cv.function}}.
#'
#' @describeIn cvCompute used internally by \code{cv()} methods (not for direct use);
#' exported to support new \code{cv()} methods.
#' @export
cvCompute <- function(model,
data = insight::get_data(model),
criterion = mse,
criterion.name,
k = 10L,
reps = 1L,
seed,
details = k <= 10L,
confint,
level = 0.95,
method = NULL,
ncores = 1L,
type = "response",
start = FALSE,
f,
fPara = f,
locals = list(),
model.function = NULL,
model.function.name = NULL,
...) {
# put function and variable args in the local environment
env <- environment()
environment(f) <- env
environment(fPara) <- env
localsNames <- names(locals)
for (i in seq_along(locals)) {
assign(localsNames[i], locals[[i]])
}
se.cv <- NA
if (missing(criterion.name) ||
is.null(criterion.name))
criterion.name <- deparse(substitute(criterion))
y <- GetResponse(model)
b <- coef(model)
n <- nrow(data)
if (is.character(k)) {
if (k == "n" || k == "loo") {
k <- n
}
}
if (!is.numeric(k) ||
length(k) > 1L || k > n || k < 2L || k != round(k)) {
stop('k must be an integer between 2 and n or "n" or "loo"')
}
if (k != n) {
if (missing(seed) || is.null(seed))
seed <- sample(1e6, 1L)
set.seed(seed)
message("R RNG seed set to ", seed)
} else {
if (reps > 1L)
stop("reps should not be > 1 for n-fold CV")
if (!missing(seed) &&
!is.null(seed))
message("Note: seed ignored for n-fold CV")
seed <- NULL
}
folds <- folds(n, k)
yhat <- if (is.factor(y)) {
factor(rep(NA, n), levels = levels(y))
} else if (is.character(y)) {
character(n)
} else {
numeric(n)
}
if (details) {
crit.i <- numeric(k)
coef.i <- vector(k, mode = "list")
names(crit.i) <- names(coef.i) <- paste("fold", 1L:k, sep = ".")
} else {
crit.i <- NULL
coef.i <- NULL
}
if (ncores > 1L) {
dots <- list(...)
cl <- makeCluster(ncores)
registerDoParallel(cl)
result <- foreach(i = 1L:k) %dopar% {
fPara(i,
model.function = model.function,
model.function.name = model.function.name,
...)
}
stopCluster(cl)
for (i in 1L:k) {
yhat[fold(folds, i)] <- result[[i]]$fit.i
if (details) {
crit.i[i] <- criterion(y[fold(folds, i)],
yhat[fold(folds, i)])
coef.i[[i]] <- result[[i]]$coef.i
}
}
} else {
result <- vector(k, mode = "list")
for (i in 1L:k) {
result[[i]] <- f(i)
yhat[fold(folds, i)] <- result[[i]]$fit.i
if (details) {
crit.i[i] <- criterion(y[fold(folds, i)],
yhat[fold(folds, i)])
coef.i[[i]] <- result[[i]]$coef.i
}
}
}
cv <- criterion(y, yhat)
cv.full <- criterion(y, predict(model, type = type, ...))
loss <- getLossFn(cv) # casewise loss function
if (!is.null(loss)) {
adj.cv <- cv + cv.full -
weighted.mean(sapply(result, function(x)
x$crit.all.i), folds$folds)
se.cv <- sd(loss(y, yhat)) / sqrt(n)
halfwidth <- qnorm(1 - (1 - level) / 2) * se.cv
ci <-
if (confint)
c(
lower = adj.cv - halfwidth,
upper = adj.cv + halfwidth,
level = round(level * 100)
)
else
NULL
} else {
adj.cv <- NULL
ci <- NULL
}
result <- list(
"CV crit" = cv,
"adj CV crit" = adj.cv,
"full crit" = cv.full,
"confint" = ci,
"SE adj CV crit" = se.cv,
"k" = if (k == n)
"n"
else
k,
"seed" = seed,
"method" = method,
"criterion" = criterion.name,
"coefficients" = if (details) coef(model) else NULL,
"details" = list(criterion = crit.i,
coefficients = coef.i)
)
if (missing(method) || is.null(method))
result$method <- NULL
class(result) <- "cv"
if (reps == 1L) {
return(result)
} else {
res <- cvCompute(
model = model,
data = data,
criterion = criterion,
criterion.name = criterion.name,
k = k,
reps = reps - 1L,
details = details,
confint = confint,
level = level,
method = method,
ncores = ncores,
type = type,
start = start,
f = f,
fPara = fPara,
locals = locals,
model.function = model.function,
model.function.name = model.function.name,
...
)
if (reps > 2L) {
res[[length(res) + 1L]] <- result
} else {
res <- list(res, result)
}
for (i in 1L:(length(res) - 1L)) {
res[[i]]["criterion"] <- res[[length(res)]]["criterion"]
}
class(res) <- "cvList"
return(res)
}
}
#' @describeIn cvCompute used internally by \code{cv()} methods
#' for mixed-effect models (not for direct use);
#' exported to support new \code{cv()} methods.
#' @export
cvMixed <- function(model,
package,
data = insight::get_data(model),
criterion = mse,
criterion.name,
k,
reps = 1L,
confint,
level = 0.95,
seed,
details,
ncores = 1L,
clusterVariables,
predict.clusters.args = list(object = model, newdata =
data),
predict.cases.args = list(object = model, newdata =
data),
fixed.effects,
...) {
pkg.env <- getNamespace(package)
se.cv <- NA
if (missing(criterion.name) ||
is.null(criterion.name))
criterion.name <- deparse(substitute(criterion))
f.clusters <-
function(i,
predict.clusters.args,
predict.cases.args,
...) {
indices.i <- fold(folds, i)
index <-
selectClusters(clusters[-indices.i, , drop = FALSE], data = data)
update.args <- list(...)
update.args$object <- model
update.args$data <- data[index,]
predict.clusters.args$object <-
do.call(update, update.args, envir = pkg.env)
fit.all.i <- do.call(predict, predict.clusters.args)
fit.i <- fit.all.i[index <- !index]
list(
fit.i = fit.i,
crit.all.i = criterion(y, fit.all.i),
indices.i = index,
coef.i = fixed.effects(predict.clusters.args$object)
)
}
f.cases <-
function(i,
predict.clusters.args,
predict.cases.args,
...) {
indices.i <- fold(folds, i)
update.args <- list(...)
update.args$object <- model
update.args$data <- data[-indices.i,]
predict.cases.args$object <-
do.call(update, update.args, envir = pkg.env)
fit.all.i <- do.call(predict, predict.cases.args)
fit.i <- fit.all.i[indices.i]
list(
fit.i = fit.i,
crit.all.i = criterion(y, fit.all.i),
indices.i = indices.i,
coef.i = fixed.effects(predict.cases.args$object)
)
}
y <- GetResponse(model)
if (missing(clusterVariables))
clusterVariables <- NULL
if (is.null(clusterVariables)) {
n <- nrow(data)
if (missing(k) || is.null(k))
k <- 10L
if (is.character(k)) {
if (k == "n" || k == "loo") {
k <- n
}
}
f <- f.cases
} else {
clusters <- defineClusters(clusterVariables, data)
n <- nrow(clusters)
if (missing(k) || is.null(k))
k <- nrow(clusters)
f <- f.clusters
}
if (!is.numeric(k) ||
length(k) > 1L || k > n || k < 2L || k != round(k)) {
stop("k must be an integer between 2 and number of",
if (is.null(clusterVariables))
"cases"
else
"clusters")
}
if (k != n) {
if (missing(seed) || is.null(seed))
seed <- sample(1e6, 1L)
set.seed(seed)
message("R RNG seed set to ", seed)
} else {
if (reps > 1L)
stop("reps should not be > 1 for n-fold CV")
if (!missing(seed) &&
!is.null(seed))
message("Note: seed ignored for n-fold CV")
seed <- NULL
}
if (missing(details) || is.null(details))
details <- k <= 10L
if (details) {
crit.i <- numeric(k)
coef.i <- vector(k, mode = "list")
names(crit.i) <- names(coef.i) <- paste("fold", 1L:k, sep = ".")
} else {
crit.i <- NULL
coef.i <- NULL
}
folds <- folds(n, k)
yhat <- if (is.factor(y)) {
factor(rep(NA, n), levels = levels(y))
} else if (is.character(y)) {
character(n)
} else {
numeric(n)
}
if (ncores > 1L) {
cl <- makeCluster(ncores)
registerDoParallel(cl)
result <- foreach(i = 1L:k) %dopar% {
f(i, predict.clusters.args, predict.cases.args, ...)
}
stopCluster(cl)
for (i in 1L:k) {
indices.i <- result[[i]]$indices.i
# yhat[result[[i]]$indices.i] <- result[[i]]$fit.i
yhat[indices.i] <- result[[i]]$fit.i
if (details) {
# crit.i[i] <- criterion(y[fold(folds, i)],
# yhat[fold(folds, i)])
crit.i[i] <- criterion(y[indices.i],
yhat[indices.i])
coef.i[[i]] <- result[[i]]$coef.i
}
}
} else {
result <- vector(k, mode = "list")
for (i in 1L:k) {
result[[i]] <- f(i, predict.clusters.args, predict.cases.args, ...)
indices.i <- result[[i]]$indices.i
# yhat[result[[i]]$indices.i] <- result[[i]]$fit.i
yhat[indices.i] <- result[[i]]$fit.i
if (details) {
# crit.i[i] <- criterion(y[fold(folds, i)],
# yhat[fold(folds, i)])
crit.i[i] <- criterion(y[indices.i],
yhat[indices.i])
coef.i[[i]] <- result[[i]]$coef.i
}
}
}
cv <- criterion(y, yhat)
cv.full <- criterion(y,
do.call(predict,
if (is.null(clusterVariables)) {
predict.cases.args
} else {
predict.clusters.args
}))
if (missing(confint))
confint <- length(y) >= 400
loss <- getLossFn(cv) # casewise loss function
if (!is.null(loss)) {
adj.cv <- cv + cv.full -
weighted.mean(sapply(result, function(x)
x$crit.all.i), folds$folds)
se.cv <- sd(loss(y, yhat)) / sqrt(length(y))
halfwidth <- qnorm(1 - (1 - level) / 2) * se.cv
ci <-
if (confint)
c(
lower = adj.cv - halfwidth,
upper = adj.cv + halfwidth,
level = round(level * 100)
)
else
NULL
} else {
adj.cv <- NULL
ci <- NULL
}
result <- list(
"CV crit" = cv,
"adj CV crit" = adj.cv,
"full crit" = cv.full,
"confint" = ci,
"SE adj CV crit" = se.cv,
"k" = if (k == n)
"n"
else
k,
"seed" = seed,
"criterion" = criterion.name,
"coefficients" = if (details) fixed.effects(model) else NULL,
clusters = clusterVariables,
"n clusters" = if (!is.null(clusterVariables))
n
else
NULL,
"details" = list(criterion = crit.i,
coefficients = coef.i)
)
class(result) <- "cv"
if (reps == 1L) {
return(result)
} else {
res <- cv(
model = model,
data = data,
criterion = criterion,
k = k,
ncores = ncores,
reps = reps - 1L,
clusterVariables = clusterVariables,
...
)
if (reps > 2L) {
res[[length(res) + 1L]] <- result
} else {
res <- list(res, result)
}
for (i in 1L:(length(res) - 1L)) {
res[[i]]["criterion"] <- res[[length(res)]]["criterion"]
}
class(res) <- "cvList"
return(res)
}
}
#' @describeIn cvCompute used internally by \code{cv()} methods for
#' cross-validating a model-selection procedure; may also be called
#' directly for this purpose, but use via \code{cv()} is preferred.
#' \code{cvSelect()} is exported primarily to support new model-selection procedures.
#' @export
cvSelect <- function(procedure,
data,
criterion = mse,
criterion.name,
model,
y.expression,
k = 10L,
confint = n >= 400,
level = 0.95,
reps = 1L,
save.coef,
details = k <= 10L,
save.model = FALSE,
seed,
ncores = 1L,
...) {
if (missing(criterion.name) ||
is.null(criterion.name))
criterion.name <- deparse(substitute(criterion))
selectModelListP <- isTRUE(all.equal(procedure, selectModelList))
if (!missing(save.coef))
details <- save.coef
se.cv <- NA
n <- nrow(data)
y <- if (!missing(model)) {
GetResponse(model)
} else {
eval(y.expression, envir = data)
}
if (missing(model))
model <- NULL
k.save <- k
if (is.character(k)) {
if (k == "n" || k == "loo") {
k <- n
}
}
if (!is.numeric(k) ||
length(k) > 1L || k > n || k < 2L || k != round(k)) {
stop('k must be an integer between 2 and n or "n" or "loo"')
}
if (k != n) {
if (missing(seed) || is.null(seed))
seed <- sample(1e6, 1L)
set.seed(seed)
message("R RNG seed set to ", seed)
} else {
if (reps > 1L)
stop("reps should not be > 1 for n-fold CV")
if (k == n) {
if (reps > 1L)
stop("reps should not be > 1 for n-fold CV")
if (!missing(seed) &&
!is.null(seed) &&
!isFALSE(seed))
message("Note: seed ignored for n-fold CV")
seed <- NULL
}
seed <- NULL
}
folds <- folds(n, k)
yhat <- if (is.factor(y)) {
factor(rep(NA, n), levels = levels(y))
} else if (is.character(y)) {
character(n)
} else {
numeric(n)
}
crit.all.i <- numeric(k)
if (details) {
crit.i <- numeric(k)
coef.i <- vector(k, mode = "list")
model.name.i <- character(k)
names(crit.i) <- names(coef.i) <- names(model.name.i) <-
paste("fold", 1L:k, sep = ".")
} else {
crit.i <- NULL
coef.i <- NULL
model.name.i <- NULL
}
if (ncores > 1L) {
cl <- makeCluster(ncores)
registerDoParallel(cl)
arglist <- c(
list(
data = data,
indices = 1L,
details = k <= 10L,
criterion = criterion,
model = model
),
list(...)
)
if (selectModelListP)
arglist <- c(arglist, list(k = k.save))
selection <- foreach(i = 1L:k) %dopar% {
# the following deals with a scoping issue that can
# occur with args passed via ...
arglist$indices <- fold(folds, i)
do.call(procedure, arglist)
}
stopCluster(cl)
for (i in 1L:k) {
yhat[fold(folds, i)] <- selection[[i]]$fit.i
crit.all.i[i] <- selection[[i]]$crit.all.i
if (details) {
crit.i[i] <- criterion(y[fold(folds, i)],
yhat[fold(folds, i)])
coef.i[[i]] <- selection[[i]]$coefficients
model.name.i[[i]] <-
if (!is.null(selection[[i]]$model.name))
selection[[i]]$model.name
else
""
}
}
} else {
for (i in 1L:k) {
indices.i <- fold(folds, i)
selection <- if (selectModelListP) {
procedure(
data,
indices.i,
details = k <= 10L,
criterion = criterion,
model = model,
k = k.save,
...
)
} else {
procedure(
data,
indices.i,
details = k <= 10L,
criterion = criterion,
model = model,
...
)
}
crit.all.i[i] <- selection$crit.all.i
yhat[indices.i] <- selection$fit.i
if (details) {
crit.i[i] <- criterion(y[fold(folds, i)],
yhat[fold(folds, i)])
coef.i[[i]] <- selection$coefficients
model.name.i[[i]] <-
if (!is.null(selection$model.name))
selection$model.name
else
""
}
}
}
cv <- criterion(y, yhat)
result.full <- if (selectModelListP) {
procedure(
data,
model = model,
criterion = criterion,
# seed = seed, # to use same folds if necessary
save.model = save.model,
k = k.save,
...
)
} else {
procedure(
data,
model = model,
criterion = criterion,
seed = seed,
# to use same folds if necessary
save.model = save.model,
...
)
}
if (is.list(result.full)) {
cv.full <- result.full$criterion
selected.model <- result.full$model
coef.full <- coef(result.full$model)
if (!is.null(result.full$model$additional.coefficients)){
coef.full <- c(coef.full,
result.full$model$additional.coefficients)
}
} else {
cv.full <- result.full
selected.model <- NULL
coef.full <- NULL
}
loss <- getLossFn(cv) # casewise loss function
if (!is.null(loss)) {
adj.cv <- cv + cv.full - weighted.mean(crit.all.i, folds$folds)
se.cv <- sd(loss(y, yhat)) / sqrt(n)
halfwidth <- qnorm(1 - (1 - level) / 2) * se.cv
ci <-
if (confint)
c(
lower = adj.cv - halfwidth,
upper = adj.cv + halfwidth,
level = round(level * 100)
)
else
NULL
} else {
adj.cv <- NULL
ci <- NULL
}
result <-
list(
"CV crit" = cv,
"adj CV crit" = adj.cv,
"full crit" = cv.full,
"criterion" = criterion.name,
"confint" = ci,
"SE adj CV crit" = se.cv,
"k" = if (k == n)
"n"
else
k,
"seed" = seed,
"coefficients" = if (details) coef.full else NULL,
"details" = list(
criterion = crit.i,
coefficients = coef.i,
model.name = model.name.i
),
"selected.model" = selected.model
)
class(result) <- c("cvSelect", "cv")
if (reps == 1L) {
return(result)
} else {
if (missing(y.expression))
y.expression <- NULL
res <- cvSelect(
procedure = procedure,
data = data,
criterion = criterion,
criterion.name = criterion.name,
model = model,
y.expression = y.expression,
k = k,
confint = confint,
level = level,
reps = reps - 1L,
save.coef = save.coef,
details = details,
save.model = save.model,
ncores = ncores,
...
)
if (reps > 2L) {
res[[length(res) + 1L]] <- result
} else {
res <- list(res, result)
}
class(res) <- c("cvSelectList", "cvList")
return(res)
}
}
#' @describeIn cvCompute used internally by \code{cv()} methods (not for direct use).
#' @export
folds <- function(n, k) {
nk <- n %/% k # number of cases in each fold
rem <- n %% k # remainder
folds <-
rep(nk, k) + c(rep(1L, rem), rep(0L, k - rem)) # allocate remainder
ends <- cumsum(folds) # end of each fold
starts <- c(1L, ends + 1L)[-(k + 1L)] # start of each fold
indices <- if (n > k)
sample(n, n)
else
1L:n # permute cases
result <- list(
n = n,
k = k,
folds = folds,
starts = starts,
ends = ends,
indices = indices
)
class(result) <- "folds"
result
}
#' @describeIn cvCompute to extract a fold from a \code{"folds"} object.
#' @export
fold <- function(folds, i, ...)
UseMethod("fold")
#' @describeIn cvCompute \code{fold()} method for \code{"folds"} objects.
#' @export
fold.folds <-
function(folds, i, ...)
folds$indices[folds$starts[i]:folds$ends[i]]
#' @describeIn cvCompute \code{print()} method for \code{"folds"} objects.
#' @export
print.folds <- function(x, ...) {
if (x$k == x$n) {
cat("LOO:", x$k, "folds for", x$n, "cases")
return(invisible(x))
}
cat(x$k, "folds of approximately", floor(x$n / x$k),
"cases each")
for (i in 1L:min(x$k, 10L)) {
cat("\n fold", paste0(i, ": "))
fld <- fold(x, i)
if (length(fld) <= 10L)
cat(fld)
else
cat(fld[1L:10L], "...")
}
if (x$k > 10L)
cat("\n ...")
cat("\n")
invisible(x)
}
#' @export
#' @describeIn cvCompute function to return the response variable
#' from a regression model.
GetResponse <- function(model, ...) {
UseMethod("GetResponse")
}
#' @describeIn cvCompute default method.
#' @export
GetResponse.default <- function(model, ...) {
y <- if (!isS4(model))
model$y
else
insight::get_response(model)
if (is.null(y))
y <- model.response(model.frame(model))
if (!is.vector(y))
stop("non-vector response")
if (!is.numeric(y))
stop("non-numeric response")
y
}
#' @describeIn cvCompute \code{"merMod"} method.
#' @export
GetResponse.merMod <- function(model, ...) {
y <- insight::get_response(model)
if (is.factor(y)) {
levels <- levels(y)
failure <- levels[1L]
if (length(levels) > 2L) {
message(
"Note: the response has more than 2 levels.\n",
" The first level ('",
failure,
"') denotes failure (0),\n",
" the others success (1)"
)
}
y <- as.numeric(y != failure)
}
if (!is.vector(y))
stop("non-vector response")
if (!is.numeric(y))
stop("non-numeric response")
y
}
#' @describeIn cvCompute \code{"lme"} method.
#' @export
GetResponse.lme <- function(model, ...)
insight::get_response(model)
#' @describeIn cvCompute \code{"glmmTMB"} method.
#' @export
GetResponse.glmmTMB <- function (model, ...){
y <- insight::get_response(model)
if (length(dim(y)) == 2 && dim(y)[2] == 1) y <- y[, 1]
if (!is.vector(y))
stop("non-vector response")
if (!is.numeric(y))
stop("non-numeric response")
y
}
#' @describeIn cvCompute \code{"modList"} method.
#' @export
GetResponse.modList <- function(model, ...)
GetResponse(model[[1L]])
# not exported
summarizeReps <- function(x) {
CVcrit <- mean(sapply(x, function(x)
x[["CV crit"]]))
CVcritSD <- sd(sapply(x, function(x)
x[["CV crit"]]))
CVcritRange <- range(sapply(x, function(x)
x[["CV crit"]]))
if (!is.null(x[[1L]][["adj CV crit"]])) {
adjCVcrit <- mean(sapply(x, function(x)
x[["adj CV crit"]]))
adjCVcritSD <- sd(sapply(x, function(x)
x[["adj CV crit"]]))
adjCVcritRange <-
range(sapply(x, function(x)
x[["adj CV crit"]]))
} else {
adjCVcrit <- adjCVcritSD <- adjCVcritRange <- NULL
}
list(
"CV crit" = CVcrit,
"adj CV crit" = adjCVcrit,
"CV crit range" = CVcritRange,
"SD CV crit" = CVcritSD,
"SD adj CV crit" = adjCVcritSD,
"adj CV crit range" = adjCVcritRange
)
}
getLossFn <- function(cv) {
fn.body <- attr(cv, "casewise loss")
if (is.null(fn.body))
return(NULL)
eval(parse(text = paste0(
"function(y, yhat) {\n",
paste(fn.body, collapse = "\n"),
"\n}"
)))
}
Merge <- function(...) {
Ds <- lapply(list(...), as.data.frame, optional=TRUE)
names <- unique(unlist(lapply(Ds, colnames)))
for (i in 1L:length(Ds)) {
missing <- setdiff(names, colnames(Ds[[i]]))
Ds[[i]][, missing] <- NA
}
do.call("rbind", Ds)
}
utils::globalVariables(c("b", "y", "dots"))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.