R/glmnetVIF.R

Defines functions glmnetVIF.glm glmnetVIF.lm glmnetVIF

Documented in glmnetVIF

#' @title fit a GLM/LM with lasso or elasticnet regularization when VIF is also a metric of interest
#' @param object An lm or glm object.
#' @param cv.lambda c("min","1se") the tuning factor for lambda in glmnet cross validation. Default value is "min". See glmnet
#' documentation for more information.
#' @param VIFThreshold maximum VIF threshold. Default value is 5.
#' @param verbose If true, prints the steps of the model selection. Default value is FALSE.
#' @param ... See glmnet package's documentation for additional parameters.
#' @examples
#' data("donor.kidney")
#' #select distinct donors
#' donor.kidney.distinct<-donor.kidney[!duplicated(donor.kidney[c("id")]),]
#'
#' fitLM<-with(donor.kidney, lm(log(creatinine) ~ log(KDRI) + glomerulosclerosis +race+
#'                   anti_HCV + on_pump + DCD + laterality +
#'                   init_pump_resistance + terminal_pump_resistance +
#'                   init_pump_flow+ diabetes + smoking +
#'                   blood_type + HBsAg + MI + clinical_infection +
#'                   anti_HBs + Tattoos + cancer +
#'                   CMV + anti_HBc + HTLV))
#' fitLM<-glmnetVIF(object=fitLM)
#'
#' fitGLM<-glm(discard ~ log(KDRI) + log(creatinine)+ glomerulosclerosis +race+
#'                   anti_HCV + on_pump + DCD + laterality +
#'                   init_pump_resistance + terminal_pump_resistance +
#'                   init_pump_flow+ diabetes + smoking +
#'                   blood_type + HBsAg + MI + clinical_infection +
#'                   anti_HBs + Tattoos + cancer +
#'                   CMV + anti_HBc + HTLV,family =  binomial(link="logit"),data=donor.kidney)
#' summary(fitGLM)
#'
#' fitGLM<-glmnetVIF(object=fitGLM,VIFThreshold = 5, verbose = TRUE)
#' summary.glm(fitGLM)
#' @export
glmnetVIF<-function(object, VIFThreshold=5.0, cv.lambda="min", verbose=FALSE, ...){
  VIFglmnet<-function (x, ...){
    v <- cov(x)
    terms <- colnames(x)
    n.terms <- length(terms)
    if (n.terms < 2)
      stop("model contains fewer than 2 terms")
    R <- cov2cor(v)
    detR <- det(R)
    result <- matrix(0, n.terms, 3)
    rownames(result) <- terms
    colnames(result) <- c("GVIF", "Df", "GVIF^(1/(2*Df))")
    for (term in terms) {
      subs <- which(terms == term)
      result[term, 1] <- det(as.matrix(R[subs, subs])) * det(as.matrix(R[-subs,
                                                                         -subs]))/detR
      result[term, 2] <- length(subs)
    }
    if (all(result[, 2] == 1)){
      result <- result[, 1]}
    else {result[, 3] <- result[, 1]^(1/(2 * result[, 2]))}
    result
  }
  UseMethod("glmnetVIF",object)
}

#' @export
glmnetVIF.lm<-function(object, VIFThreshold=5.0, cv.lambda="min", verbose=FALSE, ...){
  response<-with(attributes(terms(object)), as.character(variables[response+1]))
  vars_all<-attr(object$terms, "term.labels")
  df<-object$model
  if(is.null(df)){
    stop("data argument should be given.")
  }
  y<-object$model[,response]
  x<-model.matrix(df)[,-1]
  exit=F
  while (!exit) {
    fit.glmnet<-glmnet::cv.glmnet(x,y=y,...)
    if(cv.lambda=="min"){
      coefs<-coef(fit.glmnet, s = fit.glmnet$lambda.min)
    }else{
      coefs<-coef(fit.glmnet, s = fit.glmnet$lambda.1se)
    }

    vars<-row.names(coefs)[as.numeric(coefs)!=0][-1]
    vif<-VIFglmnet(x[,vars])
    if (verbose==TRUE){
      df<-data.frame(VAR=names(vif), VIF=vif)
      print(df[order(-df$VIF),],quote=F,row.names = F, right = F)
      cat("\n")
    }
    if (max(vif)>VIFThreshold){
      vars<-vif[vif!=max(vif)]
      x<-x[,names(vars)]
    }else{
      exit=T
    }
  }
  return(lm.fit(x,y=y))
}
#' @export
glmnetVIF.glm<-function(object, VIFThreshold=5.0, cv.lambda="min", verbose=FALSE, ...){
  VIFglmnet<-function (x, ...){
    v <- cov(x)
    terms <- colnames(x)
    n.terms <- length(terms)
    if (n.terms < 2)
      stop("model contains fewer than 2 terms")
    R <- cov2cor(v)
    detR <- det(R)
    result <- matrix(0, n.terms, 3)
    rownames(result) <- terms
    colnames(result) <- c("GVIF", "Df", "GVIF^(1/(2*Df))")
    for (term in terms) {
      subs <- which(terms == term)
      result[term, 1] <- det(as.matrix(R[subs, subs])) * det(as.matrix(R[-subs,
                                                                         -subs]))/detR
      result[term, 2] <- length(subs)
    }
    if (all(result[, 2] == 1)){
      result <- result[, 1]}
    else {result[, 3] <- result[, 1]^(1/(2 * result[, 2]))}
    result
  }
  response<-with(attributes(terms(object)), as.character(variables[response+1]))
  vars_all<-attr(object$terms, "term.labels")
  family<-object$family
  df<-object$data
  if(is.null(df)){
    stop("data argument should be specified in the model.")
  }
  y<-object$y
  x<-with(df,model.matrix(as.formula(paste0(response,"~",paste(vars_all,collapse = '+')))))[,-1]
  exit=F
  while (!exit) {
    fit.glmnet<-glmnet::cv.glmnet(x=x,y=y,family=family[[1]], ...)
    if(cv.lambda=="min"){
      coefs<-coef(fit.glmnet, s = fit.glmnet$lambda.min)
    }else{
      coefs<-coef(fit.glmnet, s = fit.glmnet$lambda.1se)
    }
    vars<-row.names(coefs)[as.numeric(coefs)!=0][-1]
    vif<-VIFglmnet(x[,vars])
    if (verbose==TRUE){
      df<-data.frame(VAR=names(vif), VIF=vif)
      print(df[order(-df$VIF),],quote=F,row.names = F, right = F)
      cat("\n")
    }
    if (max(vif)>VIFThreshold){
      vars<-vif[vif!=max(vif)]
      x<-x[,names(vars)]
    }else{
      exit=T
    }
  }
  return(glm.fit(x,y=y,family = family))
}
mbarah/stepVIF documentation built on Feb. 19, 2021, 8:06 p.m.