R/summaryBranchGLMVS.R
In BranchGLM: Efficient Branch and Bound Variable Selection for GLMs using 'RcppArmadillo'

Documented in fit fit.summary.BranchGLMVS plot.summary.BranchGLMVS print.summary.BranchGLMVS summary.BranchGLMVS

#' Summary Method for BranchGLMVS
#' @param object a \code{BranchGLMVS} object.
#' @param ... further arguments passed to other methods.
#' @return An object of class \code{summary.BranchGLMVS} which is a list with the 
#' following components
#' \item{\code{results}}{ a data frame which has the metric values for the best models along 
#' with the variables included in each model}
#' \item{\code{initmodel}}{ the initial \code{BranchGLM} object that was supplied to the 
#' \code{VariableSelection} function}
#' \item{\code{formulas}}{ a list containing the formulas of the best models}
#' \item{\code{metric}}{ the metric used to perform variable selection}
#' @examples
#' 
#' Data <- iris
#' Fit <- BranchGLM(Sepal.Length ~ ., data = Data, family = "gaussian", link = "identity")
#' 
#' # Doing branch and bound selection 
#' VS <- VariableSelection(Fit, type = "branch and bound", metric = "BIC", 
#' bestmodels = 10, showprogress = FALSE)
#' VS
#' 
#' ## Getting summary of the process
#' Summ <- summary(VS)
#' Summ
#' 
#' ## Plotting the BIC of the best models
#' plot(Summ, type = "b")
#' 
#' ## Plotting the variables in the best models
#' plot(Summ, ptype = "variables")
#' 
#' ## Getting the best model according to BIC
#' FinalModel <- fit(Summ, which = 1)
#' FinalModel
#'  
#' @export

summary.BranchGLMVS <- function(object, ...){
  
  # Getting whether each variables is included in each model
  BestModels <- t(object$bestmodels)
  BestModels[BestModels == -1] <- "kept"
  BestModels[BestModels == 0] <- "no"
  BestModels[BestModels == 1] <- "yes"
    
  # Creating data frame with results
  df <- data.frame(BestModels, object$bestmetrics)
  colnames(df) <- c(object$names, object$metric)
  
  # Creating formulas for each model
  Models <- object$bestmodels
  if(!is.matrix(Models)){
    # if Models is a vector, then change it to a matrix
    Models <- matrix(Models, ncol = 1)
  }
  
  # Generating formulas for each of the best models
  formulas <- apply(Models, 2, FUN = function(x){
    tempnames <- object$names[x != 0]
    tempnames <- tempnames[which(tempnames != "(Intercept)")]
    if(length(tempnames) > 0){
      MyFormula <- as.formula(paste0(object$initmodel$yname, " ~ ", paste0(tempnames, collapse = "+")))
      if(!("(Intercept)" %in% object$names[x != 0])){
        MyFormula <- deparse1(MyFormula) |>
          paste0(" - 1") |>
          as.formula()
      }
    }else{
      # We can do this since we only include non-null models in bestmodels
      MyFormula <- formula(paste0(object$initmodel$yname, " ~ 1")) 
    }
    MyFormula
  }
  )
  MyList <- list("results" = df, 
                 "initmodel" = object$initmodel, 
                 "formulas" = formulas, 
                 "metric" = object$metric)
  
  return(structure(MyList, class = "summary.BranchGLMVS"))
}

#' Fits GLMs for summary.BranchGLMVS and BranchGLMVS objects
#' @name fit
#' @param object a \code{summary.BranchGLMVS} or \code{BranchGLMVS} object.
#' @param which a positive integer indicating which model to fit, 
#' the default is to fit the first model .
#' @param keepData Whether or not to store a copy of data and design matrix, the default 
#' is TRUE. If this is FALSE, then the results from this cannot be used inside of \code{VariableSelection}.
#' @param keepY Whether or not to store a copy of y, the default is TRUE. If 
#' this is FALSE, then the binomial GLM helper functions may not work and this 
#' cannot be used inside of \code{VariableSelection}.
#' @param useNA Whether or not to use observations that had missing values in the 
#' full model, but not for this specific model. The default is FALSE.
#' @param ... further arguments passed to other methods.
#' @details The information needed to fit the GLM is taken from the original information 
#' supplied to the \code{VariableSelection} function.
#' 
#' The fitted models do not have standard errors or p-values since these are 
#' biased due to the selection process.
#' @examples
#' Data <- iris
#' Fit <- BranchGLM(Sepal.Length ~ ., data = Data, family = "gaussian", link = "identity")
#' 
#' # Doing branch and bound selection 
#' VS <- VariableSelection(Fit, type = "branch and bound", metric = "BIC", 
#' bestmodels = 10, showprogress = FALSE)
#' 
#' ## Getting summary of the process
#' Summ <- summary(VS)
#' 
#' ## Getting the best model according to BIC
#' FinalModel <- fit(Summ, which = 1)
#' FinalModel
#' 
#' ## Getting the 8th best model according to BIC
#' EighthModel <- fit(Summ, which = 8)
#' EighthModel
#' 
#' @return An object of class \link{BranchGLM}. 
#' @export
#' 
fit <- function(object, ...) {
  UseMethod("fit")
}

#' @rdname fit 
#' @export
fit.summary.BranchGLMVS <- function(object, which = 1, keepData = TRUE, keepY = TRUE, 
                                    useNA = FALSE, ...){
  if(!is.numeric(which) || which < 0 || which > length(object$formulas) || 
     which != as.integer(which)){
    stop("which must be a positive integer denoting the rank of the model to fit")
  }
  FinalModel <- BranchGLM(object$formulas[[which]], data = object$initmodel$mf, 
                          family = object$initmodel$family, link = object$initmodel$link, 
                          offset = object$initmodel$offset,
                          method = object$initmodel$method, 
                          tol = object$initmodel$tol, maxit = object$initmodel$maxit, 
                          keepData = keepData, keepY = keepY)
  
  # Removing standard errors and p-values along with vcov
  FinalModel$coefficients[, 2:4] <- NA
  FinalModel$vcov <- NA
  FinalModel$numobs <- object$initmodel$numobs
  FinalModel$missing <- object$initmodel$missing
  return(FinalModel)
}

#' Print Method for summary.BranchGLMVS
#' @param x a \code{summary.BranchGLMVS} object.
#' @param digits number of digits to display for information in the table.
#' @param ... further arguments passed to other methods.
#' @return The supplied \code{summary.BranchGLMVS} object.
#' @export

print.summary.BranchGLMVS <- function(x, digits = 4, ...){
  temp <- x$results
  temp[, ncol(temp)] <- round(temp[ncol(temp)], digits = digits)
  print(temp)
  return(invisible(x))
}

#' Plot Method for summary.BranchGLMVS and BranchGLMVS objects
#' @param x a \code{summary.BranchGLMVS} or \code{BranchGLMVS} object.
#' @param ptype the type of plot to produce, look at details for more explanation.
#' @param marnames value used to determine how large to make margin of axis with variable 
#' names, this is only for the "variables" plot. If variable names are cut-off, 
#' consider increasing this from the default value of 7.
#' @param addLines logical value to indicate whether or not to add black lines to 
#' separate the models for the "variables" plot. This is typically useful for smaller 
#' amounts of models, but can be annoying if there are many models.
#' @param type what type of plot to draw for the "metrics" plot, see more details at \link{plot.default}. 
#' @param horiz whether models should be displayed horizontally or vertically in the "variables" plot.
#' @param cex.names how big to make variable names in the "variables" plot.
#' @param cex.lab how big to make axis labels.
#' @param cex.axis how big to make axis annotation.
#' @param cex.legend how big to make legend labels.
#' @param ... arguments passed to the generic plot and image methods.
#' @details The different values for ptype are as follows
#' \itemize{
#'  \item "metrics" for a plot that displays the metric values ordered by rank
#'  \item "variables" for a plot that displays which variables are in each of the top models
#'  \item "both" for both plots
#' }
#' @examples
#' Data <- iris
#' Fit <- BranchGLM(Sepal.Length ~ ., data = Data, family = "gaussian", link = "identity")
#' 
#' # Doing branch and bound selection 
#' VS <- VariableSelection(Fit, type = "branch and bound", metric = "BIC", bestmodels = 10, 
#' showprogress = FALSE)
#' VS
#' 
#' ## Getting summary of the process
#' Summ <- summary(VS)
#' Summ
#' 
#' ## Plotting the BIC of the best models
#' plot(Summ, type = "b")
#' 
#' @return This only produces plots, nothing is returned.
#' @export

plot.summary.BranchGLMVS <- function(x, ptype = "both", marnames = 7, addLines = TRUE, 
                                     type = "b", horiz = FALSE,
                                     cex.names = 1, cex.lab = 1, 
                                     cex.axis = 1, cex.legend = 1,
                                     ...){
  if(!ptype %in% c("metrics", "both", "variables")){
    stop("supplied ptype is not supported")
  }
  if(ptype %in% c("metrics", "both")){
    plot(1:nrow(x$results), x$results[, ncol(x$results)], 
         xlab = "Rank", ylab = x$metric, 
         main = paste0("Best Models Ranked by ", x$metric),
         type = type, cex.lab = cex.lab, cex.axis = cex.axis,
         ...)
  }
  if(ptype %in% c("variables", "both") && !horiz){
    # This is inspired by the plot.regsubsets function
    n <- length(x$formulas)
    Names <- colnames(x$results)[-(ncol(x$results))]
    z <- x$results[, -(ncol(x$results))]
    z[z == "kept"] <- 2
    z[z == "no"] <- 1
    z[z == "yes"] <- 0
    z <- apply(z, 2, as.numeric)
    if(!is.matrix(z)){
      z <- matrix(z, ncol = length(z))
    }
    y <- 1:ncol(z)
    x1 <- 1:nrow(z)
    
    # Creating image  
    oldmar <- par("mar")
    on.exit(par(mar = oldmar))
    par(mar = c(5, marnames, 3, 6) + 0.1)
    
    if(all(z != 2)){
      # Do this if there were no variable kept
      image(x1, y, z, ylab = "", 
            xaxt = "n", yaxt = "n", xlab = "", 
            main = paste0("Best Models Ranked by ", x$metric), 
            col = c("deepskyblue", "indianred"), ...)
      legend(grconvertX(1, from = "npc"), grconvertY(1, from = "npc"), 
             legend = c("Included", "Excluded"), 
             fill = c("deepskyblue", "indianred"), 
             xpd = TRUE, cex = cex.legend)
    }else{
      # Do this if there were any kept variables
      image(x1, y, z, ylab = "", 
            xaxt = "n", yaxt = "n", xlab = "", 
            main = paste0("Best Models Ranked by ", x$metric), 
            col = c("deepskyblue", "indianred", "forestgreen"), ...)
      legend(grconvertX(1, from = "npc"), grconvertY(1, from = "npc"), 
             legend = c("Included", "Excluded", "Kept"), 
             fill = c("deepskyblue", "indianred", "forestgreen"), 
             xpd = TRUE, cex = cex.legend)
    }
    
    # Adding lines
    if(addLines){
      abline(h = y + 0.5, v = x1 + 0.5)
    }else{
      abline(h = y + 0.5)
    }
    
    # Adding axis labels
    axis(1, at = x1, labels = x1, line = 1, las = 1, cex.axis = cex.axis)
    axis(2, at = y, labels = Names, line = 1, las = 2, cex.axis = cex.names)
    
    # Adding y-axis title, this is used to avoid overlapping of axis title and labels
    mtext(paste0("Rank According to ", x$metric), side = 1, line = 4, cex = cex.lab)
    
  }else if(ptype %in% c("variables", "both") && horiz){
    # This is inspired by the plot.regsubsets function
    n <- length(x$formulas)
    Names <- colnames(x$results)[-(ncol(x$results))]
    z <- x$results[, -(ncol(x$results))]
    z[z == "kept"] <- 2
    z[z == "no"] <- 1
    z[z == "yes"] <- 0
    z <- apply(z, 2, as.numeric)
    if(is.matrix(z)){
      z <- t(z)
    }else{
      z <- matrix(z, nrow = length(z))
    }
    y <- 1:ncol(z)
    x1 <- 1:nrow(z)
    
    # Creating image  
    oldmar <- par("mar")
    on.exit(par(mar = oldmar))
    par(mar = c(marnames, 5, 3, 6) + 0.1)
    
    if(all(z != 2)){
      # Do this if there were no variable kept
      image(x1, y, z, ylab = "", 
            xaxt = "n", yaxt = "n", xlab = "", 
            main = paste0("Best Models Ranked by ", x$metric), 
            col = c("deepskyblue", "indianred"), ...)
      legend(grconvertX(1, from = "npc"), grconvertY(1, from = "npc"), 
             legend = c("Included", "Excluded"), 
             fill = c("deepskyblue", "indianred"), 
             xpd = TRUE, cex = cex.legend)
    }else{
      # Do this if there were any kept variables
      image(x1, y, z, ylab = "", 
            xaxt = "n", yaxt = "n", xlab = "", 
            main = paste0("Best Models Ranked by ", x$metric), 
            col = c("deepskyblue", "indianred", "forestgreen"), ...)
      legend(grconvertX(1, from = "npc"), grconvertY(1, from = "npc"), 
             legend = c("Included", "Excluded", "Kept"), 
             fill = c("deepskyblue", "indianred", "forestgreen"),
             xpd = TRUE, cex = cex.legend)
    }
    
    # Adding lines
    if(addLines){
      abline(v = x1 + 0.5, h = y + 0.5)
    }else{
      abline(v = x1 + 0.5)
    }
    
    # Adding axis labels
    axis(1, at = x1, labels = Names, line = 1, las = 2, cex.axis = cex.names)
    axis(2, at = y, labels = y, line = 1, las = 2, cex.axis = cex.axis)
    
    # Adding y-axis title, this is used to avoid overlapping of axis title and labels
    mtext(paste0("Rank According to ", x$metric), side = 2, line = 4, cex = cex.lab)
    
  }
}

Any scripts or data that you put into this service are public.

BranchGLM documentation built on Aug. 31, 2023, 5:17 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

BranchGLM
Efficient Branch and Bound Variable Selection for GLMs using 'RcppArmadillo'

R/summaryBranchGLMVS.R
In BranchGLM: Efficient Branch and Bound Variable Selection for GLMs using 'RcppArmadillo'

Defines functions plot.summary.BranchGLMVS print.summary.BranchGLMVS fit.summary.BranchGLMVS fit summary.BranchGLMVS

Documented in fit fit.summary.BranchGLMVS plot.summary.BranchGLMVS print.summary.BranchGLMVS summary.BranchGLMVS

Try the BranchGLM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BranchGLM Efficient Branch and Bound Variable Selection for GLMs using 'RcppArmadillo'

R/summaryBranchGLMVS.R In BranchGLM: Efficient Branch and Bound Variable Selection for GLMs using 'RcppArmadillo'

Defines functions plot.summary.BranchGLMVS print.summary.BranchGLMVS fit.summary.BranchGLMVS fit summary.BranchGLMVS

Documented in fit fit.summary.BranchGLMVS plot.summary.BranchGLMVS print.summary.BranchGLMVS summary.BranchGLMVS

Try the BranchGLM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BranchGLM
Efficient Branch and Bound Variable Selection for GLMs using 'RcppArmadillo'

R/summaryBranchGLMVS.R
In BranchGLM: Efficient Branch and Bound Variable Selection for GLMs using 'RcppArmadillo'