#' @title fit a GLM/LM with lasso or elasticnet regularization when VIF is also a metric of interest
#' @param object An lm or glm object.
#' @param cv.lambda c("min","1se") the tuning factor for lambda in glmnet cross validation. Default value is "min". See glmnet
#' documentation for more information.
#' @param VIFThreshold maximum VIF threshold. Default value is 5.
#' @param verbose If true, prints the steps of the model selection. Default value is FALSE.
#' @param ... See glmnet package's documentation for additional parameters.
#' @examples
#' data("donor.kidney")
#' #select distinct donors
#' donor.kidney.distinct<-donor.kidney[!duplicated(donor.kidney[c("id")]),]
#'
#' fitLM<-with(donor.kidney, lm(log(creatinine) ~ log(KDRI) + glomerulosclerosis +race+
#' anti_HCV + on_pump + DCD + laterality +
#' init_pump_resistance + terminal_pump_resistance +
#' init_pump_flow+ diabetes + smoking +
#' blood_type + HBsAg + MI + clinical_infection +
#' anti_HBs + Tattoos + cancer +
#' CMV + anti_HBc + HTLV))
#' fitLM<-glmnetVIF(object=fitLM)
#'
#' fitGLM<-glm(discard ~ log(KDRI) + log(creatinine)+ glomerulosclerosis +race+
#' anti_HCV + on_pump + DCD + laterality +
#' init_pump_resistance + terminal_pump_resistance +
#' init_pump_flow+ diabetes + smoking +
#' blood_type + HBsAg + MI + clinical_infection +
#' anti_HBs + Tattoos + cancer +
#' CMV + anti_HBc + HTLV,family = binomial(link="logit"),data=donor.kidney)
#' summary(fitGLM)
#'
#' fitGLM<-glmnetVIF(object=fitGLM,VIFThreshold = 5, verbose = TRUE)
#' summary.glm(fitGLM)
#' @export
glmnetVIF<-function(object, VIFThreshold=5.0, cv.lambda="min", verbose=FALSE, ...){
VIFglmnet<-function (x, ...){
v <- cov(x)
terms <- colnames(x)
n.terms <- length(terms)
if (n.terms < 2)
stop("model contains fewer than 2 terms")
R <- cov2cor(v)
detR <- det(R)
result <- matrix(0, n.terms, 3)
rownames(result) <- terms
colnames(result) <- c("GVIF", "Df", "GVIF^(1/(2*Df))")
for (term in terms) {
subs <- which(terms == term)
result[term, 1] <- det(as.matrix(R[subs, subs])) * det(as.matrix(R[-subs,
-subs]))/detR
result[term, 2] <- length(subs)
}
if (all(result[, 2] == 1)){
result <- result[, 1]}
else {result[, 3] <- result[, 1]^(1/(2 * result[, 2]))}
result
}
UseMethod("glmnetVIF",object)
}
#' @export
glmnetVIF.lm<-function(object, VIFThreshold=5.0, cv.lambda="min", verbose=FALSE, ...){
response<-with(attributes(terms(object)), as.character(variables[response+1]))
vars_all<-attr(object$terms, "term.labels")
df<-object$model
if(is.null(df)){
stop("data argument should be given.")
}
y<-object$model[,response]
x<-model.matrix(df)[,-1]
exit=F
while (!exit) {
fit.glmnet<-glmnet::cv.glmnet(x,y=y,...)
if(cv.lambda=="min"){
coefs<-coef(fit.glmnet, s = fit.glmnet$lambda.min)
}else{
coefs<-coef(fit.glmnet, s = fit.glmnet$lambda.1se)
}
vars<-row.names(coefs)[as.numeric(coefs)!=0][-1]
vif<-VIFglmnet(x[,vars])
if (verbose==TRUE){
df<-data.frame(VAR=names(vif), VIF=vif)
print(df[order(-df$VIF),],quote=F,row.names = F, right = F)
cat("\n")
}
if (max(vif)>VIFThreshold){
vars<-vif[vif!=max(vif)]
x<-x[,names(vars)]
}else{
exit=T
}
}
return(lm.fit(x,y=y))
}
#' @export
glmnetVIF.glm<-function(object, VIFThreshold=5.0, cv.lambda="min", verbose=FALSE, ...){
VIFglmnet<-function (x, ...){
v <- cov(x)
terms <- colnames(x)
n.terms <- length(terms)
if (n.terms < 2)
stop("model contains fewer than 2 terms")
R <- cov2cor(v)
detR <- det(R)
result <- matrix(0, n.terms, 3)
rownames(result) <- terms
colnames(result) <- c("GVIF", "Df", "GVIF^(1/(2*Df))")
for (term in terms) {
subs <- which(terms == term)
result[term, 1] <- det(as.matrix(R[subs, subs])) * det(as.matrix(R[-subs,
-subs]))/detR
result[term, 2] <- length(subs)
}
if (all(result[, 2] == 1)){
result <- result[, 1]}
else {result[, 3] <- result[, 1]^(1/(2 * result[, 2]))}
result
}
response<-with(attributes(terms(object)), as.character(variables[response+1]))
vars_all<-attr(object$terms, "term.labels")
family<-object$family
df<-object$data
if(is.null(df)){
stop("data argument should be specified in the model.")
}
y<-object$y
x<-with(df,model.matrix(as.formula(paste0(response,"~",paste(vars_all,collapse = '+')))))[,-1]
exit=F
while (!exit) {
fit.glmnet<-glmnet::cv.glmnet(x=x,y=y,family=family[[1]], ...)
if(cv.lambda=="min"){
coefs<-coef(fit.glmnet, s = fit.glmnet$lambda.min)
}else{
coefs<-coef(fit.glmnet, s = fit.glmnet$lambda.1se)
}
vars<-row.names(coefs)[as.numeric(coefs)!=0][-1]
vif<-VIFglmnet(x[,vars])
if (verbose==TRUE){
df<-data.frame(VAR=names(vif), VIF=vif)
print(df[order(-df$VIF),],quote=F,row.names = F, right = F)
cat("\n")
}
if (max(vif)>VIFThreshold){
vars<-vif[vif!=max(vif)]
x<-x[,names(vars)]
}else{
exit=T
}
}
return(glm.fit(x,y=y,family = family))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.