anova.gkwfit: Compare Fitted gkwfit Models using Likelihood Ratio Tests
In gkwreg: Generalized Kumaraswamy Regression Models for Bounded Data

anova.gkwfit

R Documentation

Compare Fitted gkwfit Models using Likelihood Ratio Tests

Description

Computes Likelihood Ratio Tests (LRT) to compare two or more nested models fitted using gkwfit. It produces a table summarizing the models and the test statistics.

Usage

## S3 method for class 'gkwfit'
anova(object, ...)

Arguments

`object`	An object of class `"gkwfit"`, representing the first fitted model.
`...`	One or more additional objects of class `"gkwfit"`, representing subsequent fitted models, assumed to be nested within each other or the first model.

Details

This function performs pairwise likelihood ratio tests between consecutively ordered models (ordered by their degrees of freedom). It assumes the models are nested and are fitted to the same dataset. A warning is issued if the number of observations differs between models.

The Likelihood Ratio statistic is calculated as LR = 2 \times (\log L_{complex} - \log L_{simple}). This statistic is compared to a Chi-squared distribution with degrees of freedom equal to the difference in the number of parameters between the two models (\Delta df = df_{complex} - df_{simple}).

The output table includes the number of parameters (N.Par), AIC, BIC, log-likelihood (LogLik), the test description (Test), the LR statistic (⁠LR stat.⁠), and the p-value (⁠Pr(>Chi)⁠). Models are ordered by increasing complexity (number of parameters).

Warnings are issued if models do not appear correctly nested based on degrees of freedom or if the log-likelihood decreases for a more complex model, as the LRT results may not be meaningful in such cases.

The function relies on a working logLik.gkwfit method to extract necessary information (log-likelihood, df, nobs).

Value

An object of class c("anova.gkwfit", "anova", "data.frame"). This data frame contains rows for each model and columns summarizing the fit and the pairwise likelihood ratio tests. It includes:

`N.Par`	Number of estimated parameters (degrees of freedom).
`AIC`	Akaike Information Criterion.
`BIC`	Bayesian Information Criterion.
`LogLik`	Log-likelihood value.
`Test`	Description of the pairwise comparison (e.g., "1 vs 2").
`LR stat.`	Likelihood Ratio test statistic.
`Pr(>Chi)`	P-value from the Chi-squared test.

The table is printed using a method that mimics print.anova.

Author(s)

Lopes, J. E.

Examples

## Not run: 
# Load required packages
library(ggplot2)
library(patchwork)
library(betareg)
# Generate data from GKw distribution
set.seed(2203)
n <- 1000
y <- rgkw(n, alpha = 2, beta = 3, gamma = 1.5, delta = 0.2, lambda = 1.2)

# Fit models from GKw family respecting their parameter structures
# Full GKw model: 5 parameters (alpha, beta, gamma, delta, lambda)
fit_gkw <- gkwfit(data = y, family = "gkw", plot = FALSE)

# BKw model: 4 parameters (alpha, beta, gamma, delta)
fit_bkw <- gkwfit(data = y, family = "bkw", plot = FALSE)

# KKw model: 4 parameters (alpha, beta, delta, lambda)
fit_kkw <- gkwfit(data = y, family = "kkw", plot = FALSE)

# EKw model: 3 parameters (alpha, beta, lambda)
fit_ekw <- gkwfit(data = y, family = "ekw", plot = FALSE)

# Mc model: 3 parameters (gamma, delta, lambda)
fit_mc <- gkwfit(data = y, family = "mc", plot = FALSE)

# Kw model: 2 parameters (alpha, beta)
fit_kw <- gkwfit(data = y, family = "kw", plot = FALSE)

# Beta model: 2 parameters (gamma, delta)
fit_beta <- gkwfit(data = y, family = "beta", plot = FALSE)

# Test 1: BKw vs GKw (testing lambda)
# H0: lambda=1 (BKw) vs H1: lambda!=1 (GKw)
cat("=== Testing BKw vs GKw (adding lambda parameter) ===\n")
test_bkw_gkw <- anova(fit_bkw, fit_gkw)
print(test_bkw_gkw)

# Test 2: KKw vs GKw (testing gamma)
# H0: gamma=1 (KKw) vs H1: gamma!=1 (GKw)
cat("\n=== Testing KKw vs GKw (adding gamma parameter) ===\n")
test_kkw_gkw <- anova(fit_kkw, fit_gkw)
print(test_kkw_gkw)

# Test 3: Kw vs EKw (testing lambda)
# H0: lambda=1 (Kw) vs H1: lambda!=1 (EKw)
cat("\n=== Testing Kw vs EKw (adding lambda parameter) ===\n")
test_kw_ekw <- anova(fit_kw, fit_ekw)
print(test_kw_ekw)

# Test 4: Beta vs Mc (testing lambda)
# H0: lambda=1 (Beta) vs H1: lambda!=1 (Mc)
cat("\n=== Testing Beta vs Mc (adding lambda parameter) ===\n")
test_beta_mc <- anova(fit_beta, fit_mc)
print(test_beta_mc)

# Visualize model comparison
# Create dataframe summarizing all models
models_df <- data.frame(
  Model = c("GKw", "BKw", "KKw", "EKw", "Mc", "Kw", "Beta"),
  Parameters = c(
    paste("alpha,beta,gamma,delta,lambda"),
    paste("alpha,beta,gamma,delta"),
    paste("alpha,beta,delta,lambda"),
    paste("alpha,beta,lambda"),
    paste("gamma,delta,lambda"),
    paste("alpha,beta"),
    paste("gamma,delta")
  ),
  Param_count = c(5, 4, 4, 3, 3, 2, 2),
  LogLik = c(
    as.numeric(logLik(fit_gkw)),
    as.numeric(logLik(fit_bkw)),
    as.numeric(logLik(fit_kkw)),
    as.numeric(logLik(fit_ekw)),
    as.numeric(logLik(fit_mc)),
    as.numeric(logLik(fit_kw)),
    as.numeric(logLik(fit_beta))
  ),
  AIC = c(
    fit_gkw$AIC,
    fit_bkw$AIC,
    fit_kkw$AIC,
    fit_ekw$AIC,
    fit_mc$AIC,
    fit_kw$AIC,
    fit_beta$AIC
  ),
  BIC = c(
    fit_gkw$BIC,
    fit_bkw$BIC,
    fit_kkw$BIC,
    fit_ekw$BIC,
    fit_mc$BIC,
    fit_kw$BIC,
    fit_beta$BIC
  )
)

# Sort by AIC
models_df <- models_df[order(models_df$AIC), ]
print(models_df)

# Create comprehensive visualization
# Plot showing model hierarchy and information criteria
p1 <- ggplot(models_df, aes(x = Param_count, y = LogLik, label = Model)) +
  geom_point(size = 3) +
  geom_text(vjust = -0.8) +
  labs(
    title = "Log-likelihood vs Model Complexity",
    x = "Number of Parameters",
    y = "Log-likelihood"
  ) +
  theme_minimal()

# Create information criteria comparison
models_df_long <- tidyr::pivot_longer(
  models_df,
  cols = c("AIC", "BIC"),
  names_to = "Criterion",
  values_to = "Value"
)

p2 <- ggplot(models_df_long, aes(x = reorder(Model, -Value), y = Value, fill = Criterion)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Information Criteria Comparison",
    x = "Model",
    y = "Value (lower is better)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Print plots
print(p1 + p2)

# ============================================================================
# Manual LR tests to demonstrate underlying calculations
# ============================================================================
# Function to perform manual likelihood ratio test
manual_lr_test <- function(model_restricted, model_full, alpha = 0.05) {
  # Extract log-likelihoods
  ll_restricted <- as.numeric(logLik(model_restricted))
  ll_full <- as.numeric(logLik(model_full))

  # Calculate test statistic
  lr_stat <- -2 * (ll_restricted - ll_full)

  # Calculate degrees of freedom (parameter difference)
  df <- length(coef(model_full)) - length(coef(model_restricted))

  # Calculate p-value
  p_value <- pchisq(lr_stat, df = df, lower.tail = FALSE)

  # Return results
  list(
    lr_statistic = lr_stat,
    df = df,
    p_value = p_value,
    significant = p_value < alpha,
    critical_value = qchisq(1 - alpha, df = df)
  )
}

# Example: Manual LR test for BKw vs GKw (testing lambda parameter)
cat("\n=== Manual LR test: BKw vs GKw ===\n")
lr_bkw_gkw <- manual_lr_test(fit_bkw, fit_gkw)
cat("LR statistic:", lr_bkw_gkw$lr_statistic, "\n")
cat("Degrees of freedom:", lr_bkw_gkw$df, "\n")
cat("P-value:", lr_bkw_gkw$p_value, "\n")
cat("Critical value (alpha=0.05):", lr_bkw_gkw$critical_value, "\n")
cat("Decision:", ifelse(lr_bkw_gkw$significant,
  "Reject H0: Lambda is significantly different from 1",
  "Fail to reject H0: Lambda is not significantly different from 1"
), "\n")

# Example: Manual LR test for Kw vs EKw (testing lambda parameter)
cat("\n=== Manual LR test: Kw vs EKw ===\n")
lr_kw_ekw <- manual_lr_test(fit_kw, fit_ekw)
cat("LR statistic:", lr_kw_ekw$lr_statistic, "\n")
cat("Degrees of freedom:", lr_kw_ekw$df, "\n")
cat("P-value:", lr_kw_ekw$p_value, "\n")
cat("Critical value (alpha=0.05):", lr_kw_ekw$critical_value, "\n")
cat("Decision:", ifelse(lr_kw_ekw$significant,
  "Reject H0: Lambda is significantly different from 1",
  "Fail to reject H0: Lambda is not significantly different from 1"
), "\n")

# ============================================================================
# Real data application with correct model nesting
# ============================================================================
if (requireNamespace("betareg", quietly = TRUE)) {
  data("ReadingSkills", package = "betareg")
  y <- ReadingSkills$accuracy

  # Fit models
  rs_gkw <- gkwfit(data = y, family = "gkw", plot = FALSE)
  rs_bkw <- gkwfit(data = y, family = "bkw", plot = FALSE)
  rs_kkw <- gkwfit(data = y, family = "kkw", plot = FALSE)
  rs_kw <- gkwfit(data = y, family = "kw", plot = FALSE)
  rs_beta <- gkwfit(data = y, family = "beta", plot = FALSE)

  # Test nested models
  cat("\n=== Real data: Testing BKw vs GKw (adding lambda) ===\n")
  rs_test_bkw_gkw <- anova(rs_bkw, rs_gkw)
  print(rs_test_bkw_gkw)

  cat("\n=== Real data: Testing Kw vs KKw (adding delta and lambda) ===\n")
  rs_test_kw_kkw <- anova(rs_kw, rs_kkw)
  print(rs_test_kw_kkw)

  # Compare non-nested models with information criteria
  cat("\n=== Real data: Comparing non-nested Beta vs Kw ===\n")
  rs_compare_beta_kw <- anova(rs_beta, rs_kw)
  print(rs_compare_beta_kw)

  # Summarize all models
  cat("\n=== Real data: Model comparison summary ===\n")
  models_rs <- c("GKw", "BKw", "KKw", "Kw", "Beta")
  aic_values <- c(rs_gkw$AIC, rs_bkw$AIC, rs_kkw$AIC, rs_kw$AIC, rs_beta$AIC)
  bic_values <- c(rs_gkw$BIC, rs_bkw$BIC, rs_kkw$BIC, rs_kw$BIC, rs_beta$BIC)
  loglik_values <- c(
    as.numeric(logLik(rs_gkw)),
    as.numeric(logLik(rs_bkw)),
    as.numeric(logLik(rs_kkw)),
    as.numeric(logLik(rs_kw)),
    as.numeric(logLik(rs_beta))
  )

  df_rs <- data.frame(
    Model = models_rs,
    LogLik = loglik_values,
    AIC = aic_values,
    BIC = bic_values
  )
  df_rs <- df_rs[order(df_rs$AIC), ]
  print(df_rs)

  # Determine the best model for the data
  best_model <- df_rs$Model[which.min(df_rs$AIC)]
  cat("\nBest model based on AIC:", best_model, "\n")
}

## End(Not run)

gkwreg documentation built on April 16, 2025, 1:10 a.m.