Nothing
#' Compare Regression Models with Likelihood Ratio Test, AIC, and BIC
#'
#' This function compares a given regression model to a base model using the
#' Likelihood Ratio (LR) test, Akaike Information Criterion (AIC), and Bayesian
#' Information Criterion (BIC).
#'
#' @name regCompTest
#' @param model A fitted regression model object.
#' @param data An options data frame containing the variables in the model. If
#' not supplied, the original data used to estimate the model will be used.
#' @param basemodel A character string specifying the family of base model to
#' compare against (options include the family from \code{\link{countreg}} or
#' "Poisson"). Default is "Poisson".
#' @param variables Logical. If \code{TRUE}, the base model will include the
#' same variables as the provided model. If \code{FALSE}, the base model will
#' be an intercept-only model. Default is \code{FALSE}.
#' @param print Logical. If \code{TRUE}, a table of the results will be shown.
#' If \code{FALSE}, the table of results will not be printed to the console.
#' @param ... Additional arguments to be passed to the base model fitting
#' function - options are any argument from the \code{\link{countreg}}
#' function.
#' @returns A list containing the following components:
#' \item{LL}{Log-likelihood of the provided model.}
#' \item{LLbase}{Log-likelihood of the base model.}
#' \item{LR}{Likelihood Ratio statistic.}
#' \item{LRdof}{Degrees of freedom for the Likelihood Ratio test.}
#' \item{AIC}{Akaike Information Criterion for the provided model.}
#' \item{AICbase}{Akaike Information Criterion for the base model.}
#' \item{BIC}{Bayesian Information Criterion for the provided model.}
#' \item{BICbase}{Bayesian Information Criterion for the base model.}
#' \item{LR_pvalue}{P-value for the Likelihood Ratio test.}
#' \item{PseudoR2}{McFadden's Pseudo R^2.}
#' \item{statistics}{A tibble format summary of the results.}
#' \item{gtTable}{A \link[gt]{gt} table object summarizing the results.}
#' \item{latexTable}{Latex code for a table summarizing the results.}
#' \item{htmlTable}{HTML table summarizing the results.}
#'
#' @include metrics.R
#' @import tibble knitr
#' @importFrom dplyr %>%
#' @importFrom gt gt tab_header fmt_number
#'
#' @details The function performs the following steps:
#' \enumerate{
#' \item Fits the base model, either a Poisson regression or another specified
#' model.
#' \item Computes the log-likelihoods of both the provided model and the base
#' model.
#' \item Calculates the AIC and BIC for both models.
#' \item Conducts a Likelihood Ratio test to compare the models (if the provided
#' model has more parameters than the base model).
#' \item Computes McFadden's Pseudo R^2.
#' }
#'
#' The Likelihood-Ratio test is computed as \deqn{LR = -2 (LL_{base \
#' model}-LL_{model})}. The test is chi-squared with degrees of freedom
#' \deqn{dof=N_{model \ params}-N_{base \ mode \ params}}.
#' The AIC is calculated as \deqn{AIC = -2 \cdot LL + 2 \cdot nparam}, and the
#' BIC is calculated as \deqn{BIC = -2 \cdot LL + nparam \cdot \log(n)}.
#' @examples
#'
#' # Comparing the NBP model with the NB2 model
#' data("washington_roads")
#' washington_roads$AADTover10k <- ifelse(washington_roads$AADT>10000,1,0)
#'
#' nbp.base <- countreg(Total_crashes ~ lnaadt + lnlength + speed50 +
#' ShouldWidth04 + AADTover10k,
#' data=washington_roads, family = 'NBP', method = 'NM',
#' max.iters=3000)
#' regCompTest(nbp.base, washington_roads, basemodel="NB2", print=TRUE)
#'
#' @export
regCompTest <- function(
model, data=NULL, basemodel = "Poisson",
variables = FALSE, print=FALSE, ...){
if(is.null(data)){ # Use object data if no new data are provided
data <- model$data
}
model <- model$model
formula <- model$formula
mod_df <- stats::model.frame(formula, data)
y <- as.numeric(stats::model.response(mod_df))
LL <- model$maximum # The log-likelihood of the original model
if(variables){
if(basemodel=="Poisson"){
base_mod <- glm(formula, data, family = poisson(link = "log"))
}
else{
base_mod <- countreg(formula, data, family=basemodel)
base_mod <- base_mod$model
}
}
else{
if(basemodel=="Poisson"){
base_mod <- glm(y ~ 1, data, family = poisson(link = "log"))
}
else{
base_mod <- countreg(y ~ 1, data, family=basemodel)
base_mod <- base_mod$model
}
}
# Get Log-Likelihood for the Base Model
if (basemodel=="Poisson"){
LLbase <- sum(dpois(base_mod$y, base_mod$fitted.values, log=TRUE))
}
else{
LLbase <- base_mod$maximum
}
# get number of coefficients in the base model
if (basemodel=="Poisson"){
n.coef.base <- length(base_mod$coefficients)
}
else{
n.coef.base <- length(base_mod$estimate)
}
# number of coefficients in the model provided
n.coef <- length(model$estimate)
test<-c()
test$LL <- LL
test$LLbase <- LLbase
# Compute Likelihood Ratio Test if the provided model has more parameters than
# the comparison model
if (n.coef>n.coef.base){
test$LR <- -2 * (LLbase - LL) # LR Statistic
test$LRdof <- n.coef - n.coef.base # LR Degrees of Freedom
if (test$LR > 0) {
# LR p-Value
test$LR_pvalue <- pchisq(test$LR, test$LRdof, lower.tail = FALSE)
} else {
test$LR_pvalue <- 1
}
}
else{
test$LR <- NULL
test$LRdof <- NULL
test$LR_pvalue <- NULL
}
# Calculate AIC and BIC for the main model and the base model
test$AIC <- myAIC(LL, n.coef)
test$AICbase <- myAIC(LLbase, n.coef.base)
test$BIC <- myBIC(LL, n.coef, length(y))
test$BICbase <- myBIC(LLbase, n.coef.base, length(y))
# Compute McFadden's Pseudo R^2, based on a Poisson intercept-only model
test$PseudoR2 <- 1 - LL / LLbase
# Generate a table of the results and values
statistics <- tibble::tibble(
Statistic = c("AIC", "BIC", "LR Test Statistic", "LR degrees of freedom",
"LR p-value", "McFadden's Pseudo R^2"),
Model = c(round(test$AIC, 4),
round(test$BIC, 4),
round(test$LR, 4),
test$LRdof,
round(test$LR_pvalue, 4),
round(test$PseudoR2, 4)),
BaseModel = c(round(test$AICbase, 4),
round(test$BICbase, 4),
NA,
NA,
NA,
NA)
)
test$statistics <- statistics
# gt Table
gtTable <- gt::gt(statistics) %>%
tab_header(
title = "Model Comparison Statistics"
) %>%
fmt_number(
columns = c(rlang::sym("Model"), rlang::sym("BaseModel")),
decimals = 4
)
test$gtTable <- gtTable
if (print){
print(gtTable)
}
# LaTeX table
test$latexTable <- knitr::kable(
statistics,
format = "latex",
booktabs = TRUE,
caption = "Model Comparison Statistics")
# HTML table
test$htmlTable <- knitr::kable(
statistics,
format = "html",
table.attr = "class='table table-striped'",
caption = "Model Comparison Statistics")
return(test)
}
# Utility functions for AIC and BIC
myAIC <- function(LL, nparam) {
return(-2 * LL + 2 * nparam)
}
myBIC <- function(LL, nparam, n) {
return(-2 * LL + nparam * log(n))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.