R/surr_rsq_rank.R

Defines functions print.surr_rsq_rank surr_rsq_rank

Documented in print.surr_rsq_rank surr_rsq_rank

#' The contribution of each variable in the final model
#'
#' @description This function calculates reduction of the surrogate R-squared goodness-of-fit of each variable to measure their relative explanatory power. This function creates a table containing the reductions of surrogate R-squared by removing each one of variables in the model.
#' @param object A object of class `"surr_rsq"` that is generated by the function `"surr_rsq"`.
#' It contains the following components: `surr_rsq`, `reduced_model`, `full_model`, and `data`.
#' @param avg.num The number of replication for the averaging of surrogate R-square.
#' @param var.set A list that contains a few sets. Each component of these sets represents the variables
#' that you want to examine for the contribution of goodness of fit. Then, for one component of this list,
#' a model will fit by removing the specified variables.
#' @param ... Additional optional arguments.
#'
#' @importFrom PAsso surrogate
#' @importFrom MASS polr
#' @importFrom scales percent
#'
#' @return The default return is a list that contains the contribution of Surrogate R-squared for each
#' variable in the `full_model`. If the `var.set` is specified, the return is a list of the
#' contribution of the groups of variables in the `var.set`.
#' @examples
#' data("WhiteWine")
#'
#' sele_formula <- as.formula(quality ~ fixed.acidity + volatile.acidity +
#'                           residual.sugar +  + free.sulfur.dioxide +
#'                           pH + sulphates + alcohol)
#'
#' sele_mod <- polr(formula = sele_formula,
#'               data = WhiteWine,
#'               method = "probit")
#'
#' sur1 <- surr_rsq(model = sele_mod,
#'               full_model = sele_mod,
#'               avg.num = 100)
#' \donttest{
#' rank_tab_sur1 <- surr_rsq_rank(object  = sur1,
#'                                avg.num = 30)
#' print(rank_tab_sur1)
#' }
#'
#' @export
#'
surr_rsq_rank <-
  function(object,
           avg.num = 30,
           var.set = NA,
           ...){
    # Collect the full model from the "surr_rsq" object
    # reduced_model <- object$reduced_model
    full_model <- object$full_model

    final_model_formula <- formula(full_model$terms)

    if (is.na(var.set)[1]) {
      # final_variables <- names(object$coefficients)
      show_variables <- final_variables <-
        attr(full_model$terms, "term.labels")
    } else {
      final_variables <- lapply(X = var.set, FUN = function(x) paste(x, collapse = "-"))
      show_variables <- lapply(X = var.set, FUN = function(x) paste(x, collapse = "+"))
    }

    a <- length(final_variables)

    # Calculate the surrogate R-squared of the full model
    surr_rsq_full <- surr_rsq(model = full_model,
                              full_model = full_model,
                              avg.num = avg.num)[[1]]

    # Initialize the results
    result <- c()
    surr_rsq_temp <- surr_rsq_redu <- c()

    for(i in 1:a){
      # update(final_model_formula,~.-final_variables[i])

      tryCatch({
        # Here has bug, some update cannot be execute so will trigger a bug
        # "full_model 'model_temp_for' not found"
        model_temp_for <- update(full_model, paste("~ . -", final_variables[i]))
        surr_rsq_temp[i] <- surr_rsq(model = model_temp_for,
                                     full_model = full_model,
                                     avg.num = avg.num)[[1]]
        surr_rsq_redu[i] <- surr_rsq_full - surr_rsq_temp[i]
        result[i] <- percent(surr_rsq_redu[i]/surr_rsq_full,0.01)
      }, error=function(e){
        message("Error in fittiing a reduced model without ", final_variables[i], ":\n")
        surr_rsq_temp[i] <<- surr_rsq_redu[i] <<- result[i] <<- NA
      })

    }

    result_table <-
      cbind.data.frame(var_set = unlist(show_variables),
                       surr_rsq_temp,
                       surr_rsq_redu,
                       result)
    names(result_table) <- c("Removed Variable","SurrogateRsq", "Reduction", "Contribution")
    result_table <- result_table[order(result_table$SurrogateRsq, decreasing = FALSE),]
    Ranking <- c(1:a)
    result_table$Ranking <- Ranking
    rownames(result_table) <- NULL

    # Add attribution
    attr(result_table, "total_rsq") <- surr_rsq_full

    # Add class to the result_table
    class(result_table) <- c("surr_rsq_rank", class(result_table))

    return(result_table)
  }


#' @title Print surrogate R-squared ranking table
#' @param x A surr_rsq_rank object for printing out results.
#'
#' @param digits A default number to specify decimal digit values.
#' @param ... Additional optional arguments.
#'
#' @name print
#' @method print surr_rsq_rank
#'
#' @return Print surrogate R-squared ranking table from a surr_rsq_rank object
#'
#' @importFrom stats formula
#'
#' @export
#' @keywords internal
print.surr_rsq_rank <- function(x, digits = max(2, getOption("digits")-2), ...) {
  total_rsq <- attr(x, "total_rsq")

  x_temp <- as.data.frame(x)
  x_temp$SurrogateRsq <-
    format(round(x_temp$SurrogateRsq,
                 digits=max(2, (digits))),
           digits = max(2, (digits)))
  x_temp$Reduction <-
    format(round(x_temp$Reduction,
                 digits=max(2, (digits))),
           digits = max(2, (digits)))

  print(x_temp, row.names = FALSE)

  cat("------------------------------------------------------------------------ \n")
  cat("The total surrogate R-squared of the full model is: \n")
  print(total_rsq, digits)
}
XiaoruiZhu/R2Cate documentation built on March 25, 2024, 2:44 a.m.