R/surr_rsq_ci.R

Defines functions print.surr_rsq_ci surr_rsq_ci

Documented in print.surr_rsq_ci surr_rsq_ci

#' A function to calculate the interval estimate of the surrogate R-squared measure
#'
#' @description This function generates the interval measure of surrogate R-squared by bootstrap.
#' @param surr_rsq A object of class `"surr_rsq"` that is generated by the function `"surr_rsq"`.
#' It contains the following components: `surr_rsq`, `reduced_model`, `full_model`, and `data`.
#' @param alpha The significance level alpha. The confidence level is 1-alpha.
#' @param B The number of bootstrap replications.
#' @param ... Additional optional arguments.
#'
#' @return An list that contains the CI_lower, CI_upper.
#'
#' @importFrom progress progress_bar
#' @importFrom stats update lm nobs quantile
#' @importFrom scales percent
#'
#' @examples
#' data("RedWine")
#'
#' full_formula <- as.formula(quality ~ fixed.acidity + volatile.acidity + citric.acid
#' + residual.sugar + chlorides + free.sulfur.dioxide +
#' total.sulfur.dioxide + density + pH + sulphates + alcohol)
#'
#' fullmodel <- polr(formula = full_formula,data=RedWine, method  = "probit")
#'
#' select_model <- update(fullmodel, formula. = ". ~ . - fixed.acidity -
#' citric.acid - residual.sugar - density")
#'
#' surr_rsq_select <- surr_rsq(select_model, fullmodel, data = RedWine, avg.num = 30)
#'
#' # surr_rsq_ci(surr_rsq_select, alpha = 0.05, B = 1000) # Not run, it takes time.
#'
#' @export
#'
surr_rsq_ci <-
  function(surr_rsq,
           alpha = 0.05,
           B     = 1000,
           ...){
    # Save B+1 surrogate rsq, the first one is calculated from full data.
    B <- B + 1

    # Add progress bar --------------------------------------------------------
    pb <- progress_bar$new(
      format = "Replication = :letter [:bar] :percent :elapsed | eta: :eta",
      total = B,    # 300
      width = 80)
    progress_repNo <- c(1:B)  # token reported in progress bar


    # Estract components from surr_rsq object
    res_s <- surr_rsq[[1]]
    reduced_model <- surr_rsq[[2]]
    full_model <- surr_rsq[[3]]

    # Check if datasets from two model objects are the same!
    data <- checkDataSame(model = reduced_model, full_model = full_model)

    n <- nrow(data)
    # resultTable <- array(NA, dim = c(dim(data),1,B))
    # resultTable[,,1,1] <- res_s
    resultTable <- rep(NA, B)
    resultTable[1] <- res_s[[1]]

    for (j in 2:B) {
      BS_data <- data[sample(1:n, n, replace = T), ]
      BS_full_model <- update(full_model, data = BS_data)
      try(
        resultTable[j] <- surr_rsq(model = reduced_model,
                                   full_model = BS_full_model)[[1]]
      )
      while( is.na(resultTable[j])) {
        BS_data <- data[sample(1:n, n, replace = T), ]
        BS_full_model <- update(full_model, data = BS_data)
        try(
          resultTable[j] <- surr_rsq(model = reduced_model,
                                     full_model = BS_full_model)[[1]]
        )
      }

      # ProgressBar
      pb$tick(tokens = list(letter = progress_repNo[j]))
    }

    CI_lower <- quantile(x = resultTable[-1], probs = c(alpha/2))
    CI_lower <- round(CI_lower, 3)

    CI_upper <- quantile(x = resultTable[-1], probs = c(1 - alpha/2))
    CI_upper <- round(CI_upper, 3)

    rsq_ci <- data.frame(Lower = c(percent(alpha/2, 0.01), CI_lower),
                         Upper = c(percent(1 - alpha/2, 0.01), CI_upper),
                         row.names = c("Percentile", "Confidence Interval"))

    # Thanks to @indenkun for providing this revise.
    return_list <- list("surr_rsq" = res_s,
                        "surr_rsq_ci" = rsq_ci,
                        "surr_rsq_BS" = resultTable[-1],
                        "reduced_model" = reduced_model,
                        "full_model" = full_model,
                        "data" = data)

    # Add class to the result_table
    class(return_list) <- "surr_rsq_ci"

    return(return_list)
  }

#' @title Print surrogate R-squared confidence interval measure
#' @param x A surr_rsq_ci object for printing out results.
#'
#' @param digits A default number to specify decimal digit values.
#' @param ... Additional optional arguments.
#'
#' @name print
#' @method print surr_rsq_ci
#'
#' @return Print surrogate R-squared confidence interval measure
#'
#' @importFrom stats formula
#'
#' @export
#' @keywords internal
print.surr_rsq_ci <- function(x, digits = max(2, getOption("digits")-2), ...) {
  cat("------------------------------------------------------------------------ \n")
  cat("The surrogate R-squared of the model \n------------------------------------------------------------------------ \n",
      paste(format(formula(x$reduced_model$terms)), "\n"),
      "------------------------------------------------------------------------ \n",
      "the interval estimate of the surrogate R-squared is: \n", sep = "")

  print.data.frame(x$surr_rsq_ci, digits = digits)
}

Try the SurrogateRsq package in your browser

Any scripts or data that you put into this service are public.

SurrogateRsq documentation built on April 24, 2023, 9:10 a.m.