#' Cross Validate Robust Minimax Concave Penalized Regression
#'
#' @param formula a model formula
#' @param data a training data set
#' @param cv.method one of "boot632" (the default), "boot", "cv", "repeatedcv", or "LOOCV".
#' @param nfolds the number of bootstrap or cross-validation folds to use. defaults to 5.
#' @param folds a vector of pre-set cross-validation or bootstrap folds from caret::createResample or
#' caret::createFolds.
#' @param nrep the number of repetitions for cv.method = "repeatedcv". defaults to 4.
#' @param tunlen the number of values for the unknown hyperparameter to test. defaults to 25.
#' @param crit the criterion by which to evaluate the model performance. must be one of "TauScale2" (the default),
#' "RobustMAE", or "RobustMSE". "TauScale2" gives the squared tau estimate of the scale of the residuals.
#' "RobustMAE" and "RobustMSE" are the tau estimates of mean absolute and squared errors respectively.
#'
#' @return
#' a train object
#' @export
#'
cv_rmcp = function(formula, data, cv.method = c("boot632", "boot", "cv", "repeatedcv", "LOOCV"), nfolds = 5, nrep = 4, folds = NULL, tunlen = 25, crit = c("TauScale2","RobustMAE","RobustMSE")){
cv.method <- match.arg(cv.method)
crit <- match.arg(crit)
if (!is.null(folds)) {
nfolds = NULL
}
RMCP <- list(type = "Regression",
library = "cvreg",
loop = NULL)
RMCP$alpha <- alpha
RMCP$parameters <- data.frame(parameter = "lambda",
class = "numeric",
label = "lambda")
RMCPGrid <- function(x, y, len = NULL, search = "grid") {
## use grid search:
if(search == "grid"){
search = "grid"
} else {
search = "grid"
}
max.lambda <- function(y, x){
n = length(y)
max(abs(y %*% x)) / n
}
lambda = seq(max(0.005, max.lambda(y, x)/10), max.lambda(y, x) * 2.5, len = tunlen)
grid <- expand.grid(lambda = lambda)
out <- grid
return(out)
}
RMCP$grid <- RMCPGrid
RMCPFit <- function(x, y, param, ...) {
dat <- as.data.frame(x)
dat$.outcome <- y
cvreg::robpr(.outcome ~ ., data = dat, lambda = param$lambda, penalty = "MCP")
}
RMCP$fit <- RMCPFit
RMCP$prob <- RMCPFit
RMCPPred <- function(modelFit, newdata, preProc = NULL, submodels = NULL){
newdata <- as.matrix(cbind(rep(1, nrow(newdata)), newdata))
as.matrix(newdata %*% modelFit$coefficients)
}
RMCP$predict <- RMCPPred
postRobResamp = function(pred, obs) {
isNA <- is.na(pred)
pred <- pred[!isNA]
obs <- obs[!isNA]
if (!is.factor(obs) && is.numeric(obs)) {
if (length(obs) + length(pred) == 0) {
out <- rep(NA, 2)
}
else {
lta <- function(obs, pred){
h <- ceiling(0.75 * length(pred))
res<-abs(obs - pred)
res<-sort(res)
val<-sum(res[1:h])
val
}
tau.se <- tauLocScale((obs - pred)^2, mu = T)
tau.ae <- tauLocScale(abs(obs - pred), mu = T)
rmse <- tau.se[1]
rmae <- tau.ae[1]
scale.ae <- tau.ae[2]^2
out <- c(rmse, rmae, scale.ae)
}
names(out) <- c("RobustMSE", "RobustMAE", "TauScale2")
}
else {
if (length(obs) + length(pred) == 0) {
out <- rep(NA, 2)
}
else {
pred <- factor(pred, levels = levels(obs))
requireNamespaceQuietStop("e1071")
out <- unlist(e1071::classAgreement(table(obs, pred)))[c("diag", "kappa")]
}
names(out) <- c("Accuracy", "Kappa")
}
if (any(is.nan(out)))
out[is.nan(out)] <- NA
out
}
robustSummary = function (data, lev = NULL, model = NULL)
{
if (is.character(data$obs))
data$obs <- factor(data$obs, levels = lev)
postRobResamp(data[, "pred"], data[, "obs"])
}
if (cv.method == "repeatedcv") {
fitControl <- trainControl(method = cv.method,
number = nfolds,
repeats = nrep,
index = folds,
savePredictions = "all",
summaryFunction = robustSummary,
search = "grid")
} else {
fitControl <- trainControl(method = cv.method,
number = nfolds,
index = folds,
savePredictions = "all",
summaryFunction = robustSummary,
search = "grid")
}
fitted.models <- train(formula, data,
method = RMCP,
metric = crit,
tuneLength = tunlen,
maximize = FALSE,
preProcess = c("center", "scale"),
trControl = fitControl)
return(fitted.models)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.