btoolbox: random toolbox

Documented in predict_ci

#' Calculate Confidence Intervals for Predictions from Regression Models
#' 
#' @param m model object (must be of class \code{lm}, \code{glm}, \code{MASS::polr}, or \code{nnet::multinom})
#' @param newdata dataset to make predictions
#' @param Sigma variance-covariance matrix of estimator; if NULL, \code{vcov(m)} is used
#' @param level confidence level; defaults to .95
#' @param n_sim number of simulation draws when confidence intervals are calculated via simulation; defaults to 1000
#' @details This function calculates confidence intervals for predictions based on fitted GLMs. The function works similarly to other \code{predict} methods. When the fitted model is of class \code{lm} or \code{glm}, endpoints of the confidence interval of the linear predictor are inverted to obtain the confidence intervals on the outcome scale. For \code{MASS::polr} and \code{nnet::multinom} objects, the function simulates \code{n_sim} draws from a \eqn{N(x'b, S)} distribution, where \eqn{x} is the covariate profile at which the predictions are made, \eqn{b} is the estimated regression coefficient vector, and \eqn{S} is the estimated variance-covariance matrix of \eqn{b}. Based on these simulations, the confidence interval for the prediction are calculated and then inverted, using the inverse-link function, into the outcome scale.
#' @examples
#' # binary logistic regression example
#' dat_bin = data.frame(y = c(rep(0, 5), rep(1, 5)), 
#'                      x = c(1:7, 3:5))
#' fit_bin = glm(y ~ x, 
#'               data = dat_bin, 
#'               family = binomial("logit"))
#' 
#' predict_ci(fit_bin, newdata = data.frame(x = c(3, 5)))
#' 
#' # ordered logistic regression example
#' dat_polr = data.frame(
#'     y = c(rep(0, 3), rep(1, 3), rep(2, 4)), 
#'     x = c(1:6, 2:5))
#' fit_polr = MASS::polr(factor(y) ~ x, data = dat_polr)
#' 
#' predict_ci(fit_polr, 
#'            newdata = data.frame(x = c(3, 5)),
#'            level = .90,
#'            n_sim = 3000)
#' @export
predict_ci = function(
    m = NULL, 
    newdata = NULL, 
    Sigma = NULL, 
    level = .95, 
    n_sim = 1000)
{

    if (is.null(newdata))
        stop("need newdata argument")
        
    m_class = class(m)[1]
    
    if (m_class %in% c("glm", "lm", "multinom", "polr") == F)
        stop("model has to be of class glm, lm, multinom, or polr")
    
    if (m_class %in% c("glm", "lm")) {
        
        ci = .inv_ci_pred(m, newdata, Sigma, level, m_class)
        return(ci)
        
    } 
        
    # create model matrix
    X = create_mm(m, newdata)
    
    message(
        paste0("Calculating ", 100 * level, 
               "% confidence intervals with ",
               n_sim, " simulation draws ... ")
    )
    
    if (m_class == "polr") {
        
        if (!requireNamespace("MASS")) 
            stop("please install MASS")
        
        # get vcov
        if (is.null(Sigma)) Sigma = vcov(m)

        X = X[, -1L, drop = FALSE]
        mu = c(coef(m), m$zeta)
        ci = .sim_ci_ologit(mu, Sigma, X, level, n_sim)
        
    } 

    if (m_class == "multinom") {
    
        if (!requireNamespace("nnet"))
            stop("please install nnet")
        
        # get vcov
        if (is.null(Sigma)) Sigma = vcov(m)
        mu = coef(m)
        ci = .sim_ci_mlogit(mu, Sigma, X, level, n_sim)
        
    }
    
    dimnames(ci) = list(
            rownames(newdata),
            c("lower", "upper"),
            paste0("response_", seq.int(dim(ci)[3L]))
        )
    
    return(ci)

}