Nothing
# --------------------------------------
# Author: Andreas Alfons
# Erasmus Universiteit Rotterdam
# --------------------------------------
#' Resampling-based prediction error for a sequential regression model
#'
#' Estimate the prediction error of a previously fit sequential regression
#' model such as a robust least angle regression model or a sparse least
#' trimmed squares regression model.
#'
#' The prediction error can be estimated via (repeated) \eqn{K}-fold
#' cross-validation, (repeated) random splitting (also known as random
#' subsampling or Monte Carlo cross-validation), or the bootstrap. In
#' each iteration, the optimal model is thereby selected from the training
#' data and used to make predictions for the test data.
#'
#' @method perry seqModel
#' @aliases perry.rlars
#'
#' @param object the model fit for which to estimate the prediction error.
#' @param splits an object of class \code{"cvFolds"} (as returned by
#' \code{\link[perry]{cvFolds}}) or a control object of class
#' \code{"foldControl"} (see \code{\link[perry]{foldControl}}) defining the
#' folds of the data for (repeated) \eqn{K}-fold cross-validation, an object of
#' class \code{"randomSplits"} (as returned by
#' \code{\link[perry]{randomSplits}}) or a control object of class
#' \code{"splitControl"} (see \code{\link[perry]{splitControl}}) defining
#' random data splits, or an object of class \code{"bootSamples"} (as returned
#' by \code{\link[perry]{bootSamples}}) or a control object of class
#' \code{"bootControl"} (see \code{\link[perry]{bootControl}}) defining
#' bootstrap samples.
#' @param fit a character string specifying for which fit to estimate the
#' prediction error. Possible values are \code{"reweighted"} (the default) for
#' the prediction error of the reweighted fit, \code{"raw"} for the prediction
#' error of the raw fit, or \code{"both"} for the prediction error of both
#' fits.
#' @param cost a cost function measuring prediction loss. It should expect
#' vectors to be passed as its first two arguments, the first corresponding to
#' the observed values of the response and the second to the predicted values,
#' and must return a non-negative scalar value. The default is to use the root
#' mean squared prediction error for non-robust models and the root trimmed
#' mean squared prediction error for robust models (see
#' \code{\link[perry]{cost}}).
#' @param ncores a positive integer giving the number of processor cores to be
#' used for parallel computing (the default is 1 for no parallelization). If
#' this is set to \code{NA}, all available processor cores are used.
#' @param cl a \pkg{parallel} cluster for parallel computing as generated by
#' \code{\link[parallel]{makeCluster}}. If supplied, this is preferred over
#' \code{ncores}.
#' @param seed optional initial seed for the random number generator (see
#' \code{\link{.Random.seed}}). Note that also in case of parallel computing,
#' resampling is performed on the manager process rather than the worker
#' processes. On the parallel worker processes, random number streams are
#' used and the seed is set via \code{\link{clusterSetRNGStream}}.
#' @param \dots additional arguments to be passed to the prediction loss
#' function \code{cost}.
#'
#' @return An object of class \code{"perry"} with the following components:
#' \describe{
#' \item{\code{pe}}{a numeric vector containing the estimated prediction
#' errors for the requested model fits. In case of more than one replication,
#' this gives the average value over all replications.}
#' \item{\code{se}}{a numeric vector containing the estimated standard errors
#' of the prediction loss for the requested model fits.}
#' \item{\code{reps}}{a numeric matrix in which each column contains the
#' estimated prediction errors from all replications for the requested model
#' fits. This is only returned in case of more than one replication.}
#' \item{\code{splits}}{an object giving the data splits used to estimate the
#' prediction error.}
#' \item{\code{y}}{the response.}
#' \item{\code{yHat}}{a list containing the predicted values from all
#' replications.}
#' \item{\code{call}}{the matched function call.}
#' }
#'
#' @author Andreas Alfons
#'
#' @seealso \code{\link{rlars}}, \code{\link{sparseLTS}},
#' \code{\link[=predict.seqModel]{predict}}, \code{\link[perry]{perry}},
#' \code{\link[perry]{cost}}
#'
#' @example inst/doc/examples/example-perry.R
#'
#' @keywords utilities
#'
#' @export
#' @import perry
perry.seqModel <- function(object, splits = foldControl(), cost, ncores = 1,
cl = NULL, seed = NULL, ...) {
## initializations
matchedCall <- match.call()
matchedCall[[1]] <- as.name("perry")
# retrieve data from model fit
if(is.null(x <- object$x) || is.null(y <- object$y)) {
if(is.null(x)) x <- try(model.matrix(object$terms), silent=TRUE)
if(is.null(y)) y <- try(model.response(object$terms), silent=TRUE)
if(inherits(x, "try-error") || inherits(y, "try-error")) {
stop("model data not available")
}
}
# predictor matrix is stored with column for intercept
x <- removeIntercept(x)
# get default cost function depending on whether model fit is robust
if(missing(cost)) cost <- if(object$robust) rtmspe else rmspe
## construct call to fit models in prediction error estimation
call <- object$call
# if the model was fitted with formula method, 'formula' and 'data'
# arguments are removed from call and 'x' and 'y' are used instead
call$formula <- NULL
call$data <- NULL
## call function perryFit() to perform prediction error estimation
out <- perryFit(call, x=x, y=y, splits=splits, cost=cost, costArgs=list(...),
envir=parent.frame(), ncores=ncores, cl=cl, seed=seed)
out$call <- matchedCall
out
}
#' @rdname perry.seqModel
#' @method perry sparseLTS
#' @export
perry.sparseLTS <- function(object, splits = foldControl(),
fit = c("reweighted", "raw", "both"),
cost = rtmspe, ncores = 1, cl = NULL,
seed = NULL, ...) {
## initializations
matchedCall <- match.call()
matchedCall[[1]] <- as.name("perry")
fit <- match.arg(fit)
if(is.null(x <- object$x) || is.null(y <- object$y)) {
if(is.null(x)) x <- try(model.matrix(object$terms), silent=TRUE)
if(is.null(y)) y <- try(model.response(object$terms), silent=TRUE)
if(inherits(x, "try-error") || inherits(y, "try-error")) {
stop("model data not available")
}
}
# predictor matrix is stored with column for intercept (if any)
x <- removeIntercept(x)
## prepare prediction error estimation
# extract function call for fitting the model
call <- object$call
# if the model was fitted with formula method, 'formula' and 'data'
# arguments are removed from call and 'x' and 'y' are used instead
call$formula <- NULL
call$data <- NULL
call$intercept <- object$intercept
## call function perryFit() to perform prediction error estimation
out <- perryFit(call, x=x, y=y, splits=splits, predictArgs=list(fit=fit),
cost=cost, costArgs=list(...), envir=parent.frame(),
ncores=ncores, cl=cl, seed=seed)
if(fit == "both") peNames(out) <- c("reweighted", "raw")
out$call <- matchedCall
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.