#' The contribution of each variable in the final model
#'
#' @description This function calculates reduction of the surrogate R-squared goodness-of-fit of each variable to measure their relative explanatory power. This function creates a table containing the reductions of surrogate R-squared by removing each one of variables in the model.
#' @param object A object of class `"surr_rsq"` that is generated by the function `"surr_rsq"`.
#' It contains the following components: `surr_rsq`, `reduced_model`, `full_model`, and `data`.
#' @param avg.num The number of replication for the averaging of surrogate R-square.
#' @param var.set A list that contains a few sets. Each component of these sets represents the variables
#' that you want to examine for the contribution of goodness of fit. Then, for one component of this list,
#' a model will fit by removing the specified variables.
#' @param ... Additional optional arguments.
#'
#' @importFrom PAsso surrogate
#' @importFrom MASS polr
#' @importFrom scales percent
#'
#' @return The default return is a list that contains the contribution of Surrogate R-squared for each
#' variable in the `full_model`. If the `var.set` is specified, the return is a list of the
#' contribution of the groups of variables in the `var.set`.
#' @examples
#' data("WhiteWine")
#'
#' sele_formula <- as.formula(quality ~ fixed.acidity + volatile.acidity +
#' residual.sugar + + free.sulfur.dioxide +
#' pH + sulphates + alcohol)
#'
#' sele_mod <- polr(formula = sele_formula,
#' data = WhiteWine,
#' method = "probit")
#'
#' sur1 <- surr_rsq(model = sele_mod,
#' full_model = sele_mod,
#' avg.num = 100)
#' \donttest{
#' rank_tab_sur1 <- surr_rsq_rank(object = sur1,
#' avg.num = 30)
#' print(rank_tab_sur1)
#' }
#'
#' @export
#'
surr_rsq_rank <-
function(object,
avg.num = 30,
var.set = NA,
...){
# Collect the full model from the "surr_rsq" object
# reduced_model <- object$reduced_model
full_model <- object$full_model
final_model_formula <- formula(full_model$terms)
if (is.na(var.set)[1]) {
# final_variables <- names(object$coefficients)
show_variables <- final_variables <-
attr(full_model$terms, "term.labels")
} else {
final_variables <- lapply(X = var.set, FUN = function(x) paste(x, collapse = "-"))
show_variables <- lapply(X = var.set, FUN = function(x) paste(x, collapse = "+"))
}
a <- length(final_variables)
# Calculate the surrogate R-squared of the full model
surr_rsq_full <- surr_rsq(model = full_model,
full_model = full_model,
avg.num = avg.num)[[1]]
# Initialize the results
result <- c()
surr_rsq_temp <- surr_rsq_redu <- c()
for(i in 1:a){
# update(final_model_formula,~.-final_variables[i])
tryCatch({
# Here has bug, some update cannot be execute so will trigger a bug
# "full_model 'model_temp_for' not found"
model_temp_for <- update(full_model, paste("~ . -", final_variables[i]))
surr_rsq_temp[i] <- surr_rsq(model = model_temp_for,
full_model = full_model,
avg.num = avg.num)[[1]]
surr_rsq_redu[i] <- surr_rsq_full - surr_rsq_temp[i]
result[i] <- percent(surr_rsq_redu[i]/surr_rsq_full,0.01)
}, error=function(e){
message("Error in fittiing a reduced model without ", final_variables[i], ":\n")
surr_rsq_temp[i] <<- surr_rsq_redu[i] <<- result[i] <<- NA
})
}
result_table <-
cbind.data.frame(var_set = unlist(show_variables),
surr_rsq_temp,
surr_rsq_redu,
result)
names(result_table) <- c("Removed Variable","SurrogateRsq", "Reduction", "Contribution")
result_table <- result_table[order(result_table$SurrogateRsq, decreasing = FALSE),]
Ranking <- c(1:a)
result_table$Ranking <- Ranking
rownames(result_table) <- NULL
# Add attribution
attr(result_table, "total_rsq") <- surr_rsq_full
# Add class to the result_table
class(result_table) <- c("surr_rsq_rank", class(result_table))
return(result_table)
}
#' @title Print surrogate R-squared ranking table
#' @param x A surr_rsq_rank object for printing out results.
#'
#' @param digits A default number to specify decimal digit values.
#' @param ... Additional optional arguments.
#'
#' @name print
#' @method print surr_rsq_rank
#'
#' @return Print surrogate R-squared ranking table from a surr_rsq_rank object
#'
#' @importFrom stats formula
#'
#' @export
#' @keywords internal
print.surr_rsq_rank <- function(x, digits = max(2, getOption("digits")-2), ...) {
total_rsq <- attr(x, "total_rsq")
x_temp <- as.data.frame(x)
x_temp$SurrogateRsq <-
format(round(x_temp$SurrogateRsq,
digits=max(2, (digits))),
digits = max(2, (digits)))
x_temp$Reduction <-
format(round(x_temp$Reduction,
digits=max(2, (digits))),
digits = max(2, (digits)))
print(x_temp, row.names = FALSE)
cat("------------------------------------------------------------------------ \n")
cat("The total surrogate R-squared of the full model is: \n")
print(total_rsq, digits)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.