R/stepVIF.R

Defines functions stepVIF.gee stepVIF.glm stepVIF.lm stepVIF

Documented in stepVIF

  #' @title Stepwise model selection using AIC/QIC for lm, glm/geeglm objects.
  #' @param object A lm, glm, or geeglm object. If the object is defined, the scope argument should be a formula.
  #' @param scope Defines the range of models explored in the stepwise search. See details for how to calibrate the scope.
  #' @param direction The direction (mode) of the stepwise model. Only "forward"" direction is supported.
  #' @param VIFThreshold maximum VIF threshold. Default value is 5.
  #' @param parallel If TRUE, run the function in parallel mode.
  #' @param cl.type Cluster type ("FORK" or "PSOCK") in parallel mode. Default value is "PSOCK." See parallel package's documentation for more information.
  #' @param n.cores Number of cores in parallel mode. Default value is the total number of cores (including logical cores) minus 1.
  #' @param verbose If true, prints the steps of the model selection. Default value is FLASE.
  #' @param AUC If true, computes the model's AUC along with AIC/QIC (binary reponse only).
  #' @param ... See lm/glm/gee packages' documentations for additional parameters.
  #' @examples
  #' data("donor.kidney")
  #'
  #' #select distinct donors
  #' donor.kidney.distinct<-donor.kidney[!duplicated(donor.kidney[c("id")]),]
  #' fitLM<-lm(log(creatinine) ~ 1,data=donor.kidney.distinct)
  #' summary(fitLM)
  #'
  #' frm<-as.formula(log(creatinine) ~ log(KDRI) + glomerulosclerosis +race+
  #'                   anti_HCV + on_pump + DCD + laterality +
  #'                   init_pump_resistance + terminal_pump_resistance +
  #'                   init_pump_flow+ diabetes + smoking +
  #'                   blood_type + HBsAg + MI + clinical_infection +
  #'                   anti_HBs + Tattoos + cancer +
  #'                   CMV + anti_HBc + HTLV)
  #'
  #' fitLM<-stepVIF(object=fitLM,scope=frm)
  #'
  #' fitGLM<-with(donor.kidney.distinct, glm(discard ~ 1, family =  binomial(link="logit")))
  #' frm<-as.formula(discard ~ log(KDRI) + log(creatinine)+ glomerulosclerosis +race+
  #'                   anti_HCV + on_pump + DCD + laterality +
  #'                   init_pump_resistance + terminal_pump_resistance +
  #'                   init_pump_flow+ diabetes + smoking +
  #'                   blood_type + HBsAg + MI + clinical_infection +
  #'                   anti_HBs + Tattoos + cancer +
  #'                   CMV + anti_HBc + HTLV)
  #'
  #' fitGLM<-stepVIF(object=fitGLM,scope=frm, AUC= FALSE)
  #' summary(fitGLM)
  #'
  #' library(geepack)
  #' fit.gee<-geepack::geeglm(discard ~ 1, family =  binomial(link="logit"), data=donor.kidney,
  #' corstr="independence", id=id)
  #' fitGEE<-stepVIF(object=fit.gee,scope=frm, AUC = TRUE, verbose = FALSE)
  #' summary(attr(fitGEE,"model")
  #' @export
  stepVIF<-function(object, scope,  direction="forward", VIFThreshold=5, parallel=FALSE, cl.type=NULL, n.cores=NULL, verbose=TRUE, AUC=FALSE, ...){
   if(is.null(object) & !is.null(scope)){
      if (!is.list(scope)){
        stop("When object is null, scope should be a list (paired) of lm, glm, or gee objects")
      }
      fit0<-scope[[1]]
      if(!inherits(fit0,c("lm","glm","gee")) | !inherits(fit0,c("lm","glm","gee"))){
        stop("When input argument 'object' is null, scope  should be a list of class lm, glm, or geeglm object")
      }
      if(!all.equal(fit0$data,fit1$data)){
        stop("Lower and upper models is scope should have identical datasets.")
      }
      if(length(attr(fit0$terms, "term.labels")>0)){
        if (attr(fit0$terms, "term.labels") %in% attr(fit1$terms, "term.labels")){
          stop("All varaibles in fit0 are not in fit1")
        }
      }
    }else if(!is.null(object)){
      fit0<-object
      if(inherits(scope,"formula")){
        fit1<-all.vars(scope)
      }else{
        stop("when input argument 'object' is not null, input argument 'scope' must be a formula.")
      }
    }
    UseMethod("stepVIF",fit0)
    S3method("stepVIF", fit0)
  }
  #' @export
  stepVIF.lm<-function(object, scope, direction="forward", VIFThreshold=5, parallel=FALSE, cl.type=NULL, n.cores=NULL, verbose=FALSE, AUC=FALSE, ...){
    out<-tryCatch({
      if(is.null(object) & !is.null(scope)){
        fit0<-scope[[1]]
        fit1<-scope[[2]]
        if(!all.equal(fit0$data,fit1$data)){
          stop("Lower and upper models is scope should have identical datasets.")
        }
        response<-with(attributes(terms(fit1)), as.character(variables[response+1]))
        vars_all<-attr(fit1$terms, "term.labels")
        vars_init<-attr(fit0$terms, "term.labels")

      }else if(!is.null(object)){
        fit0<-object
        if(inherits(scope,"formula")){
          response<-with(attributes(terms(scope)), as.character(variables[response+1]))
          vars_init<-attr(fit0$terms, "term.labels")
          vars_all<-attr(terms(frm2), "term.labels") [!(attr(terms(frm2), "term.labels") %in% vars_init)]
        }
      }
      df<-eval(fit0$call$data)
      if (is.null(df)){
        stop("data argument should be given.")
      }
      steps<-data.frame(Var=character(), AIC=numeric(), VIF.max=numeric())

      if(parallel==TRUE){
        if(is.null(n.cores)){
          n.cores<-parallel::detectCores(all.tests = FALSE, logical = TRUE)-1
        }else{
          if(n.cores>parallel::detectCores(all.tests = FALSE, logical = TRUE)){
            warning(paste0("n.cores (", n.cores,") is set greater than the machines's available cores (", parallel::detectCores(all.tests = FALSE, logical = TRUE) , "): Using default n.cores(", parallel::detectCores(all.tests = FALSE, logical = TRUE)-1 , ") value."))
            n.cores<-parallel::detectCores(all.tests = FALSE, logical = TRUE)-1
          }
        }
        if (is.null(cl.type)){
          cl.type="PSOCK"
        }
        cl <- parallel::makeCluster(n.cores,type=cl.type)
        if(cl.type=="PSOCK"){
          parallel::clusterExport(cl,
                                  varlist = c(
                                    "df", "response",
                                    "vars_all",
                                    "vars_init"
                                  ),
                                  envir = environment())
        }
        aic<-Reduce("rbind", parallel::parLapply(cl,vars_all, function(x){
          vars_init<-c(vars_init,x)
          frm<-as.formula(paste0(response, "~", paste(vars_init,collapse = "+")))
          fit<-lm(frm, data=df, ...)
          aic<-AIC(fit)

          if(length(vars_init)>1){
            vif.max<-max(car::vif(fit))
            return(data.frame(Var=x, AIC=aic,VIF.max=vif.max))
          }else{
            return(data.frame(Var=x, AIC=aic, VIF.max=0))
          }
        }))
      }else{
        aic<-Reduce(rbind,lapply(vars_all, function(x){
          vars_init<-c(vars_init,x)
          frm<-as.formula(paste0(response, "~", paste(vars_init,collapse = "+")))
          fit<-lm(frm, data=df, ...)
          aic<-AIC(fit)
          if(length(vars_init)>1){
            vif.max<-max(car::vif(fit))
            return(data.frame(Var=x, AIC=aic,VIF.max=vif.max))
          }else{
            return(data.frame(Var=x, AIC=aic, VIF.max=0))
          }
        }))
      }
      aic<-aic[order(aic$AIC),]
      if(verbose){
        print(aic, row.names = F, right = F, quote = F)
        utils::flush.console()
      }
      v<-aic[which.min(aic$AIC),]
      steps<-rbind(steps,v)
      # frm<-as.formula(paste0(response,"~", v$Var))
      # fit<-lm(frm, data=df, ...)
      # mtc<-AIC(fit)
      # chqTest<-anova(fit,test = "Chisq")
      mtc_level<-v$AIC
      vars_init<-c(vars_init,as.character(v$Var))
      vars_all<-vars_all[vars_all !=vars_init]
      vars_final<-vars_init
      vv<-v
      vv$Var<-"<none>"
      vv$VIF.max<-0
      if(verbose){
        cat("\n")
        cat(paste0(response, "~", paste(vars_final, collapse="+")))
        cat("\n\n")
        utils::flush.console()
      }
      for(j in 1:length(vars_all)){
        if(parallel==TRUE){
          aic<-Reduce(rbind,
                      tryCatch(parallel::parLapply(cl,vars_all[!(vars_all %in% vars_final)], function(x){
                      frm<-as.formula(paste(response,"~", paste(append(vars_final,x), collapse="+")))
                      fit<-lm(frm, data=df, ...)
                      aic<-AIC(fit)
                      vif.max<-max(car::vif(fit))
                      rm(fit)
                      return(data.frame(Var=x,AIC=aic,VIF.max=vif.max))
                    })))
        }else{
          aic<-Reduce(rbind,
                      tryCatch(lapply(vars_all[!(vars_all %in% vars_final)], function(x){
                      frm<-as.formula(paste(response,"~", paste(append(vars_final,x), collapse="+")))
                      fit<-lm(frm, data=df, ...)
                      aic<-AIC(fit)
                      vif.max<-max(car::vif(fit))
                      rm(fit)
                      return(data.frame(Var=x,AIC=aic,VIF.max=vif.max))
                    })))
        }
        aic<-rbind(aic, vv)
        print(aic[order(aic$AIC),], row.names=F, right = F, quote=F)
        utils::flush.console()
        v<-aic[aic$AIC==min(aic$AIC[aic$VIF.max<=VIFThreshold],na.rm = T),]
        steps<-rbind(steps,v)
        if (v$AIC < mtc_level){
          vars_final<-c(vars_final,as.character(v$Var))
          if(verbose){
            cat("\n")
            cat(paste0(response, "~", paste(vars_final, collapse="+")))
            cat("\n\n")
            utils::flush.console()
          }
          vars_all<-vars_all[!(vars_all %in% v$Var)]
          vv<-v
          vv$Var<-"<none>"
        # frm<-as.formula(paste0(response,"~", v$Var))
        # fit<-lm(frm, data=df, ...)
        # mtc<-AIC(fit)
        # chqTest<-anova(fit,test = "Chisq")
          mtc_level<-v$AIC
        }else{
          frm<-as.formula(paste(response,"~", paste(vars_final, collapse="+")))
          model<-lm(frm, data=df, ...)
          r<-list()
          r<-structure(r, model=model, stepStats=steps)
          class(r)<-"stepVIF"
          return(r)
        }
      }
    },finally = {
      if(parallel==TRUE){
        parallel::stopCluster(cl)
      }
    })
    return(out)
  }
  #' @export
  stepVIF.glm<-function(object, scope, direction="forward", VIFThreshold=5, parallel=FALSE, cl.type=NULL, n.cores=NULL, verbose=FALSE, AUC=FALSE, ...){
    out<-tryCatch({
      if(is.null(object) & !is.null(scope)){
        fit0<-scope[[1]]
        fit1<-scope[[2]]
        if(!all.equal(fit0$data,fit1$data)){
          stop("Lower and upper models in scope should have identical datasets.")
        }
        response<-with(attributes(terms(fit1)), as.character(variables[response+1]))
        vars_all<-attr(fit1$terms, "term.labels")
        vars_init<-attr(fit0$terms, "term.labels")
      }else if(!is.null(object)){
        fit0<-object
        if(inherits(scope,"formula")){
          response<-with(attributes(terms(scope)), as.character(variables[response+1]))
          vars_all<-attr(terms(frm2), "term.labels") [!(attr(terms(frm2), "term.labels") %in% vars_init)]
          vars_init<-attr(fit0$terms, "term.labels")
        }
      }
      df<-fit0$data
      family<-fit0$family
      y<-fit0$y

      if(AUC==TRUE){
        steps<-data.frame(Var=character(), AIC=numeric(), VIF.max=numeric(), AUC=numeric())
      }else{
        steps<-data.frame(Var=character(), AIC=numeric(), VIF.max=numeric())
      }

      if(parallel==TRUE){
        if(is.null(n.cores)){
          n.cores<-parallel::detectCores(all.tests = FALSE, logical = TRUE)-1
        }else{
          if(n.cores>parallel::detectCores(all.tests = FALSE, logical = TRUE)){
            warning(paste0("n.cores (", n.cores,") is greater than the system's cores (", parallel::detectCores(all.tests = FALSE, logical = TRUE) , "): Using defautl n.cores value."))
            n.cores<-parallel::detectCores(all.tests = FALSE, logical = TRUE)-1
          }
        }
        if (is.null(cl.type)){
          cl.type="PSOCK"
        }
        cl <- parallel::makeCluster(n.cores,type=cl.type)
        if(cl.type=="PSOCK"){
          parallel::clusterExport(cl,
                                  varlist = c(
                                    "df", "family", "y",
                                    "response", "vars_all",
                                    "vars_init"
                                  ),
                                  envir = environment())
        }
        aic<-Reduce("rbind", parallel::parLapply(cl,vars_all, function(x){
          vars_init<-c(vars_init,x)
          frm<-as.formula(paste0(response, "~", paste(vars_init,collapse = "+")))
          fit<-glm(frm, data=df,family = family, ...)
          aic<-fit$aic
          if (AUC){
            model.prob <- predict(fit, newdata =df , type = "response")
            model.pred <- ROCR::prediction(model.prob, y)
            auc<-round(ROCR::performance(model.pred, measure = "auc")@y.values[[1]],digits = 3)
            if(length(vars_init)>1){
              vif.max<-max(car::vif(fit))
              return(data.frame(Var=x, AIC=aic, VIF.max=vif.max, AUC=auc))
            }else{
              return(data.frame(Var=x, AIC=aic, VIF.max=0, AUC=auc))
            }
          }else{
            if(length(vars_init)>1){
              vif.max<-max(car::vif(fit))
              return(data.frame(Var=x, AIC=aic,VIF.max=vif.max))
            }else{
              return(data.frame(Var=x, AIC=aic, VIF.max=0))
            }
          }
        }))
      }else{
        aic<-Reduce(rbind,lapply(vars_all, function(x){
          vars_init<-c(vars_init,x)
          frm<-as.formula(paste0(response, "~", paste(vars_init,collapse = "+")))
          fit<-glm(frm, data=df,family = family, ...)
          aic<-fit$aic
          if (AUC){
            model.prob <- predict(fit, newdata =df , type = "response")
            model.pred <- ROCR::prediction(model.prob, y)
            auc<-round(ROCR::performance(model.pred, measure = "auc")@y.values[[1]],digits = 3)
            if(length(vars_init)>1){
              vif.max<-max(car::vif(fit))
              return(data.frame(Var=x, AIC=aic, VIF.max=vif.max, AUC=auc))
            }else{
              return(data.frame(Var=x, AIC=aic, VIF.max=0, AUC=auc))
            }
          }else{
            if(length(vars_init)>1){
              vif.max<-max(car::vif(fit))
              return(data.frame(Var=x, AIC=aic,VIF.max=vif.max))
            }else{
              return(data.frame(Var=x, AIC=aic, VIF.max=0))
            }
          }
        }))
      }
      aic<-aic[order(aic$AIC),]
      if(verbose){
        print(aic, row.names = F, right = F, quote = F)
        utils::flush.console()
      }
      v<-aic[which.min(aic$AIC),]
      steps<-rbind(steps,v)
      # frm<-as.formula(paste0(response,"~", v$Var))
      # fit<-glm(frm, data=df,family = family, ...)
      # mtc<-fit$aic
      # chqTest<-anova(fit,test = "Chisq")
      mtc_level<-v$AIC
      vars_init<-c(vars_init,as.character(v$Var))
      vars_all<-vars_all[vars_all !=vars_init]
      vars_final<-vars_init
      vv<-v
      vv$Var<-"<none>"
      vv$VIF.max<-0
      if(verbose){
        cat("\n")
        cat(paste0(response, "~", paste(vars_final, collapse="+")))
        cat("\n\n")
        utils::flush.console()
      }
      for(j in 1:length(vars_all)){
        if(parallel==TRUE){
          aic<-Reduce(rbind,
                      tryCatch(parallel::parLapply(cl,vars_all[!(vars_all %in% vars_final)], function(x){
                      frm<-as.formula(paste(response,"~", paste(append(vars_final,x), collapse="+")))
                      fit<-glm(frm, data=df,family = family, ...)
                      aic<-fit$aic
                      vif.max<-max(car::vif(fit))
                      if(AUC){
                        model.prob <- predict(fit, newdata =df , type = "response")
                        model.pred <- ROCR::prediction(model.prob, y)
                        auc<-round(ROCR::performance(model.pred, measure = "auc")@y.values[[1]],digits = 3)
                        rm(fit)
                        return(data.frame(Var=x,AIC=aic,VIF.max=vif.max, AUC=auc))
                      }else{
                        rm(fit)
                        return(data.frame(Var=x,AIC=aic,VIF.max=vif.max))
                      }
                    })))
        }else{
          aic<-Reduce(rbind,
                      tryCatch(lapply(vars_all[!(vars_all %in% vars_final)], function(x){
                      frm<-as.formula(paste(response,"~", paste(append(vars_final,x), collapse="+")))
                      fit<-glm(frm, data=df,family = family, ...)
                      aic<-fit$aic
                      vif.max<-max(car::vif(fit))
                      if(AUC){
                        model.prob <- predict(fit, newdata =df , type = "response")
                        model.pred <- ROCR::prediction(model.prob, y)
                        auc<-round(ROCR::performance(model.pred, measure = "auc")@y.values[[1]],digits = 3)
                        rm(fit)
                        return(data.frame(Var=x,AIC=aic,VIF.max=vif.max, AUC=auc))
                      }else{
                        rm(fit)
                        return(data.frame(Var=x,AIC=aic,VIF.max=vif.max))
                      }
                    })))
        }
        aic<-rbind(aic, vv)
        print(aic[order(aic$AIC),], row.names=F, right = F, quote=F)
        utils::flush.console()
        v<-aic[aic$AIC==min(aic$AIC[aic$VIF.max<=VIFThreshold],na.rm = T),]
        steps<-rbind(steps,v)
        if (v$AIC < mtc_level){
          vars_final<-c(vars_final,as.character(v$Var))
          if(verbose){
            cat("\n")
            cat(paste0(response, "~", paste(vars_final, collapse="+")))
            cat("\n\n")
            utils::flush.console()
          }
          vars_all<-vars_all[!(vars_all %in% v$Var)]
          vv<-v
          vv$Var<-"<none>"
        # frm<-as.formula(paste0(response,"~", v$Var))
        # fit<-glm(frm, data=df,family = family, ...)
        # mtc<-fit$aic
        # chqTest<-anova(fit,test = "Chisq")
          mtc_level<-v$AIC
        }else{
          frm<-as.formula(paste(response,"~", paste(vars_final, collapse="+")))
          model<-glm(frm, data=df,family = family, ...)
          r<-list()
          r<-structure(r, model=model, stepStats=steps)
          class(r)<-"stepVIF"
          return(r)
        }
      }
    },finally = {
      if(parallel==TRUE){
        parallel::stopCluster(cl)
      }
    })
    return(out)
  }
  #' @export
  stepVIF.gee<-function(object, scope, direction="forward", VIFThreshold=5, parallel=FALSE, cl.type=NULL, n.cores=NULL, verbose=FALSE, AUC=FALSE, ...){
    out<-tryCatch({
      if(is.null(object) & !is.null(scope)){
        fit0<-scope[[1]]
        fit1<-scope[[2]]
        if(!all.equal(fit0$data,fit1$data)){
          stop("Lower and upper models is scope should have identical datasets.")
        }
        response<-with(attributes(terms(fit1)), as.character(variables[response+1]))
        vars_all<-attr(fit1$terms, "term.labels")
        vars_init<-attr(fit0$terms, "term.labels")
      }else if(!is.null(object)){
        fit0<-object
        if(inherits(scope,"formula")){
          response<-with(attributes(terms(scope)), as.character(variables[response+1]))
          vars_all<-attr(terms(frm2), "term.labels") [!(attr(terms(frm2), "term.labels") %in% vars_init)]
          vars_init<-attr(fit0$terms, "term.labels")
        }
      }
      df<-fit0$data
      family<-fit0$family
      y<-fit0$y
      corstrx<-fit0$corstr
      idx<-fit0$id

      if(AUC==TRUE){
        steps<-data.frame(Var=character(), QIC=numeric(), VIF.max=numeric(), AUC=numeric())
      }else{
        steps<-data.frame(Var=character(), QIC=numeric(), VIF.max=numeric())
      }
      if(parallel==TRUE){
        if(is.null(n.cores)){
          n.cores<-parallel::detectCores(all.tests = FALSE, logical = TRUE)-1
        }else{
          if(n.cores>parallel::detectCores(all.tests = FALSE, logical = TRUE)){
            warning(paste0("n.cores (", n.cores,") is greater than the system's cores (", parallel::detectCores(all.tests = FALSE, logical = TRUE) , "): Using defautl n.cores value."))
            n.cores<-parallel::detectCores(all.tests = FALSE, logical = TRUE)-1
          }
        }
        if (is.null(cl.type)){
          cl.type="PSOCK"
        }
        cl <- parallel::makeCluster(n.cores,type=cl.type)
        if(cl.type=="PSOCK"){
          parallel::clusterExport(cl,
                                  varlist = c(
                                    "df", "family", "y", "corstrx",
                                    "idx", "response", "vars_all",
                                    "vars_init"
                                  ),
                                  envir = environment())
        }
        qic<-Reduce("rbind", parallel::parLapply(cl,vars_all, function(x){
          vars_init<-c(vars_init,x)
          frm<-as.formula(paste0(response, "~", paste(vars_init,collapse = "+")))
          fit<-geepack::geeglm(frm, data=df,family = family, corstr=corstrx, id=idx)
          qic<-geepack::QIC(fit)[[1]]
          if (AUC){
            model.prob <- predict(fit, newdata =df , type = "response")
            model.pred <- ROCR::prediction(model.prob, y)
            auc<-round(ROCR::performance(model.pred, measure = "auc")@y.values[[1]],digits = 3)
            if(length(vars_init)>1){
              vif.max<-max(car::vif(fit))
              return(data.frame(Var=x, QIC=qic, VIF.max=vif.max, AUC=auc))
            }else{
              return(data.frame(Var=x, QIC=qic, VIF.max=0, AUC=auc))
            }
          }else{
            if(length(vars_init)>1){
              vif.max<-max(car::vif(fit))
              return(data.frame(Var=x, QIC=qic,VIF.max=vif.max))
            }else{
              return(data.frame(Var=x, QIC=qic, VIF.max=0))
            }
          }
        }))
      }else{
        qic<-Reduce(rbind,lapply(vars_all, function(x){
          vars_init<-c(vars_init,x)
          frm<-as.formula(paste0(response, "~", paste(vars_init,collapse = "+")))
          fit<-geepack::geeglm(frm, data=df,family = family, corstr=corstrx, id=idx)
          qic<-geepack::QIC(fit)[[1]]
          if (AUC){
            model.prob <- predict(fit, newdata =df , type = "response")
            model.pred <- ROCR::prediction(model.prob, y)
            auc<-round(ROCR::performance(model.pred, measure = "auc")@y.values[[1]],digits = 3)
            if(length(vars_init)>1){
              vif.max<-max(car::vif(fit))
              rm(fit)
              return(data.frame(Var=x,QIC=qic,VIF.max=vif.max, AUC=auc))
            }else{
              rm(fit)
              return(data.frame(Var=x,QIC=qic, VIF.max=0, AUC=auc))
            }
          }else{
            if(length(vars_init)>1){
              vif.max<-max(car::vif(fit))
              rm(fit)
              return(data.frame(Var=x, QIC=qic, VIF.max=vif.max))
            }else{
              rm(fit)
              return(data.frame(Var=x, QIC=qic, VIF.max=0))
            }
          }
        }))
      }
      qic<-qic[order(qic$QIC),]
      if(verbose){
        print(qic, row.names = F, right = F, quote = F)
        utils::flush.console()
      }
      v<-qic[which.min(qic$QIC),]
      steps<-rbind(steps,v)
      # frm<-as.formula(paste0(response,"~", v$Var))
      # fit<-geepack::geeglm(frm, data=df,family = family, corstr=corstrx, id=idx, ...)
      # qic<-geepack::QIC(fit)[[1]]
      # waldTest<-anova(fit,test = "Wald")
      mtc_level<-v$QIC
      vars_init<-c(vars_init,as.character(v$Var))
      vars_all<-vars_all[vars_all !=vars_init]
      vars_final<-vars_init
      vv<-v
      vv$Var<-"<none>"
      vv$VIF.max<-0
      if(verbose){
        cat("\n")
        cat(paste0(response, "~", paste(vars_final, collapse="+")))
        cat("\n\n")
        utils::flush.console()
      }
      for(j in 1:length(vars_all)){
        if(parallel==TRUE){
          qic<-Reduce(rbind,
                      tryCatch(parallel::parLapply(cl,vars_all[!(vars_all %in% vars_final)], function(x){
                      frm<-as.formula(paste(response,"~", paste(append(vars_final,x), collapse="+")))
                      fit<-geepack::geeglm(frm, data=df,family = family, corstr=corstrx, id=idx, ...)
                      qic<-geepack::QIC(fit)[[1]]
                      vif.max<-max(car::vif(fit))
                      if(AUC){
                        model.prob <- predict(fit, newdata =df , type = "response")
                        model.pred <- ROCR::prediction(model.prob, y)
                        auc<-round(ROCR::performance(model.pred, measure = "auc")@y.values[[1]],digits = 3)
                        rm(fit)
                        return(data.frame(Var=x,QIC=qic,VIF.max=vif.max, AUC=auc))
                      }else{
                        rm(fit)
                        return(data.frame(Var=x,QIC=qic,VIF.max=vif.max))
                      }
                    })))
        }else{
          qic<-Reduce(rbind,
                      tryCatch(lapply(vars_all[!(vars_all %in% vars_final)], function(x){
                      frm<-as.formula(paste(response,"~", paste(append(vars_final,x), collapse="+")))
                      fit<-geepack::geeglm(frm, data=df,family = family, corstr=corstrx, id=idx, ...)
                      qic<-geepack::QIC(fit)[[1]]
                      vif.max<-max(car::vif(fit))
                      if(AUC){
                        model.prob <- predict(fit, newdata =df , type = "response")
                        model.pred <- ROCR::prediction(model.prob, y)
                        auc<-round(ROCR::performance(model.pred, measure = "auc")@y.values[[1]],digits = 3)
                        rm(fit)
                        return(data.frame(Var=x,QIC=qic,VIF.max=vif.max, AUC=auc))
                      }else{
                        rm(fit)
                        return(data.frame(Var=x,QIC=qic,VIF.max=vif.max))
                      }
                    })))
        }
        qic<-rbind(qic, vv)
        print(qic[order(qic$QIC),], row.names=F, right = F, quote=F)
        utils::flush.console()
        v<-qic[qic$QIC==min(qic$QIC[qic$VIF.max<=VIFThreshold],na.rm = T),]
        steps<-rbind(steps,v)
        if (v$QIC < mtc_level){
          vars_final<-c(vars_final,as.character(v$Var))
          if(verbose){
            cat("\n")
            cat(paste0(response, "~", paste(vars_final, collapse="+")))
            cat("\n\n")
            utils::flush.console()
          }
          vars_all<-vars_all[!(vars_all %in% v$Var)]
          vv<-v
          vv$Var<-"<none>"
          # frm<-as.formula(paste(response,"~", paste(vars_final, collapse="+")))
          # fit<-geepack::geeglm(frm, data=df,family = family, corstr=corstrx, id=idx, ...)
          # qic<-geepack::QIC(fit)[[1]]
          # waldTest<-anova(fit,test = "Wald")
          mtc_level<-v$QIC
        }else{
          frm<-as.formula(paste(response,"~", paste(vars_final, collapse="+")))
          model<-geepack::geeglm(frm, data=df,family = family, corstr=corstrx, id=idx, ...)
          r<-list()
          r<-structure(r, model=model, stepStats=steps)
          class(r)<-"stepVIF"
          return(r)
        }
      }
    },finally = {
      if(parallel==TRUE){
        parallel::stopCluster(cl)
      }
    })
    return(out)
  }
mbarah/stepVIF documentation built on Feb. 19, 2021, 8:06 p.m.