R/cv_genet.R

Defines functions cv_genet

Documented in cv_genet

#' Cross validate generalized elastic net tuning parameters
#'
#' @param formula a model formula
#' @param data a training data set
#' @param family a glm family
#' @param cv.method preferably one of "adaptive_boot" or "adaptive_cv"
#' @param nfolds the number of bootstrap or cross-validation folds to use. defaults to 15.
#' @param nrep the number of repetitions for cv.method = "repeatedcv". defaults to 4.
#' @param folds a vector of pre-set cross-validation or bootstrap folds from caret::createResample or
#' caret::createFolds.
#' @param tunlen the number of values of lambda to test. defaults to 10.
#' @param crit the criterion by which to evaluate the model performance. must be one of "MAE" (the default)
#' or "MSE".
#' @param max.c the largest value of lambda to try.
#'
#' @return
#' a train object
#' @export
#'
cv_genet = function(formula, data, family = gaussian(), cv.method = c("adaptive_boot", "adaptive_cv"), nfolds = 15, nrep = 4, tunlen = 10, folds = NULL, max.lambda = 3,crit = c("MSE","MAE","Accuracy","kappa")){

  cv.method<-match.arg(cv.method)
  crit <- match.arg(crit)

  if (!is.null(folds)) {
    nfolds = NULL
  }

  ELASTICNET <- list(type = "Regression",
                     library = "cvreg",
                     loop = NULL)

  if (family$family=="binomial"){ELASTICNET$type<-"Classification"}

  ELASTICNET$parameters <- data.frame(parameter = c("kappa", "alpha", "lambda"),
                                      class = rep("numeric", 3),
                                      label = c("kappa", "alpha", "lambda"))

  ELASTICNET$family <- family

  elasticnetGrid <- function(x, y, len = NULL, search = "grid") {
    alpha <- seq(0.05, 0.95, length.out = 6)
    kappa <- c(0.5, 1, 1.4, 2, 2.75, 3.5)
    lambda <- exp(seq(log(0.001464844), log(max.lambda), len = len))
    ## use grid search:
    if(search == "grid"){search = "grid"} else {search = "grid"}
    grid <- expand.grid(kappa = kappa, alpha = alpha, lambda = lambda)
    out <- grid
    return(out)
  }

  ELASTICNET$grid <- elasticnetGrid

  elasticnetFit <- function(x, y, param, family = ELASTICNET$family, ...) {
    dat <- as.data.frame(x)
    dat$.outcome <- y
    out <- cvreg::genet(
      .outcome ~ .,
      data = dat,
      kappa = param$kappa,
      alpha = param$alpha,
      lambda = param$lambda,
      family = family,
      standardize = FALSE
    )
    out$family <- family
    return(out)
  }

  ELASTICNET$fit <- elasticnetFit
  ELASTICNET$prob <- elasticnetFit

  make.predfun <- function(invlink){
   function(modelFit, newdata, preProc = NULL, submodels = NULL){
    newdata <- cbind(.outcome = rep(1, nrow(newdata)), newdata)
    eta <- drop(newdata%*%modelFit$coefficients)
    invlink(eta)
   }
  }

  elasticnetPred <- make.predfun(family$linkinv)
  ELASTICNET$predict <- elasticnetPred

  make.postresamp <- function(family){
    dev.res <- family$dev.res

    function(pred, obs) {
      resfun <- dev.res
      isNA <- is.na(pred)
      pred <- pred[!isNA]
      obs <- obs[!isNA]
      if (length(obs) + length(pred) == 0) {
          out <- rep(NA, 2)
        }
        else {
          devres <- resfun(obs,pred,rep(1,length(obs)))
          mse <- mean(devres^2)
          mae <- mean(abs(devres))
          out <- c(mse, mae)
        }
        names(out) <- c("MSE", "MAE")
      if (any(is.nan(out)))
        out[is.nan(out)] <- NA
      out
    }

  }

  postRobResamp <- make.postresamp(family)

  glmnetSummary = function (data, lev = NULL, model = NULL){
    #if (is.character(data$obs))
     # data$obs <- factor(data$obs, levels = lev)
    postRobResamp(data[, "pred"], data[, "obs"])
  }

    fitControl <- trainControl(method = cv.method,
                               number = nfolds,
                               index = folds,
                               adaptive = list(min = 4, alpha = 0.05,
                                               method = "gls", complete = TRUE),
                               allowParallel = TRUE,
                               savePredictions = "all",
                               summaryFunction = glmnetSummary,
                               search = "grid")

  fitted.models <- train(formula, data,
                         method = ELASTICNET,
                         metric = crit,
                         tuneLength = tunlen,
                         maximize = FALSE,
                         preProcess = c("center", "scale"),
                         trControl = fitControl)

    return(fitted.models)

}
abnormally-distributed/cvreg documentation built on May 3, 2020, 3:45 p.m.