cvreg: Cross Validation and Robust Estimation Utilities

Documented in cv_APC

#' Cross Validate Power Parameter of Adaptive Powered Correlation Prior Regression Model
#'
#' @param formula a model formula
#' @param data a training data set
#' @param cv.method preferably one of "boot632" (the default), "cv", or "repeatedcv".
#' @param nfolds the number of bootstrap or cross-validation folds to use. defaults to 5.
#' @param nrep the number of repetitions for cv.method = "repeatedcv". defaults to 4.
#' @param tunlen the number of values for the unknown hyperparameter to test. defaults to 6.
#' @param crit the criterion by which to evaluate the model performance. must be one of "MAE" (the default)
#' or "MSE".
#' @return
#' a train object
#' @export
#'
cv_APC = function(formula,data,cv.method="boot632",nfolds=5,nrep=4,tunlen=10,crit = c("MAE","MSE")){

  crit <- match.arg(crit)
  APC <- list(type = "Regression",library = "cvreg",loop = NULL)
  prm <- data.frame(parameter="lambda",class="numeric",label="lambda")
  apcLambda = function(formula, data){
    pdcheck = function(formula, data, lambda){
      X = model.matrix(formula, data)[,-1]
      cormat = cov2cor(fBasics::makePositiveDefinite(cor(X)))
      L = eigen(cormat)$vectors
      D = eigen(cormat)$values
      Dpower = matrix(0, length(D), length(D))
      for (i in 1:length(D)){Dpower[i,i] <- D[i]^lambda}
      fBasics::isPositiveDefinite(L %*% Dpower %*% t(L))
    }
    pd = as.numeric(sapply(seq(-9,9,by=0.25), function(l) pdcheck(formula, data, l)))
    l = seq(-9,9,by=0.25)[which(pd == 1)]
    c(lower.limit = min(l), upper.limit = max(l))
  }

  limits = apcLambda(formula, data)
  APC$parameters <- prm
  APC$lower <- limits[1]
  APC$upper <- limits[2]

  APCGrid <- function(x, y, lower = APC$lower, upper = APC$upper, len = NULL, search = "grid") {
    lambdas <- seq(lower, upper, length.out = len)
    ## use grid search:
    if(search == "grid"){search = "grid"} else {search = "grid"}
    grid <- data.frame(lambda = lambdas)
    out <- grid
    return(out)
  }

  APCFit <- function(x, y, param, ...) {
    dat <- as.data.frame(x)
    dat$.outcome <- y
    model.out <- apclm(.outcome ~ ., data = dat, lambda = param$lambda)
    model.out
  }

  APC$grid <- APCGrid
  APC$fit <- APCFit
  APC$prob <- APCFit

  APCPred <- function(modelFit, newdata, preProc = NULL, submodels = NULL){
    betas <- modelFit$coefficients
    newx = as.matrix(cbind(y = rep(1, nrow(newdata)), newdata))
    as.vector(newx%*%betas)
  }

  APC$predict <- APCPred

  postRobResamp = function(pred, obs) {
    isNA <- is.na(pred)
    pred <- pred[!isNA]
    obs <- obs[!isNA]
    if (!is.factor(obs) && is.numeric(obs)) {
      if (length(obs) + length(pred) == 0) {
        out <- rep(NA, 3)
      }
      else {
        mse <- mean((pred - obs)^2)
        mae <- mean(abs(pred - obs))
        out <- c(mse, mae)
      }
      names(out) <- c("MSE", "MAE")
    }
    else {
      if (length(obs) + length(pred) == 0) {
        out <- rep(NA, 2)
      }
      else {
        pred <- factor(pred, levels = levels(obs))
        requireNamespaceQuietStop("e1071")
        out <- unlist(e1071::classAgreement(table(obs, pred)))[c("diag","kappa")]
      }
      names(out) <- c("Accuracy", "Kappa")
    }
    if (any(is.nan(out)))
      out[is.nan(out)] <- NA
    out
  }

  basicSummary = function (data, lev = NULL, model = NULL){
    if (is.character(data$obs))
      data$obs <- factor(data$obs, levels = lev)
    postRobResamp(data[, "pred"], data[, "obs"])
  }


  if (cv.method == "repeatedcv") {
    fitControl <- trainControl(method = cv.method,
                               number = nfolds,
                               repeats = nrep,
                               savePredictions = "all",
                               summaryFunction = basicSummary,
                               search = "grid")
  } else {

    fitControl <- trainControl(method = cv.method,
                               number = nfolds,
                               savePredictions = "all",
                               summaryFunction = basicSummary,
                               search = "grid")
  }


  fitted.models <- train(formula, data,
                         method = APC,
                         metric = crit,
                         tuneLength = tunlen,
                         maximize = FALSE,
                         preProcess = c("center", "scale"),
                         trControl = fitControl)

  return(fitted.models)

}