anova.gkwfit | R Documentation |
Computes Likelihood Ratio Tests (LRT) to compare two or more nested models
fitted using gkwfit
. It produces a table summarizing the models
and the test statistics.
## S3 method for class 'gkwfit'
anova(object, ...)
object |
An object of class |
... |
One or more additional objects of class |
This function performs pairwise likelihood ratio tests between consecutively ordered models (ordered by their degrees of freedom). It assumes the models are nested and are fitted to the same dataset. A warning is issued if the number of observations differs between models.
The Likelihood Ratio statistic is calculated as LR = 2 \times (\log L_{complex} - \log L_{simple})
.
This statistic is compared to a Chi-squared distribution with degrees of freedom
equal to the difference in the number of parameters between the two models
(\Delta df = df_{complex} - df_{simple}
).
The output table includes the number of parameters (N.Par
), AIC, BIC, log-likelihood (LogLik
),
the test description (Test
), the LR statistic (LR stat.
), and the p-value (Pr(>Chi)
).
Models are ordered by increasing complexity (number of parameters).
Warnings are issued if models do not appear correctly nested based on degrees of freedom or if the log-likelihood decreases for a more complex model, as the LRT results may not be meaningful in such cases.
The function relies on a working logLik.gkwfit
method to extract
necessary information (log-likelihood, df, nobs).
An object of class c("anova.gkwfit", "anova", "data.frame")
.
This data frame contains rows for each model and columns summarizing the fit
and the pairwise likelihood ratio tests. It includes:
N.Par |
Number of estimated parameters (degrees of freedom). |
AIC |
Akaike Information Criterion. |
BIC |
Bayesian Information Criterion. |
LogLik |
Log-likelihood value. |
Test |
Description of the pairwise comparison (e.g., "1 vs 2"). |
LR stat. |
Likelihood Ratio test statistic. |
Pr(>Chi) |
P-value from the Chi-squared test. |
The table is printed using a method that mimics print.anova
.
Lopes, J. E.
gkwfit
, logLik.gkwfit
, AIC.gkwfit
, BIC.gkwfit
, anova
## Not run:
# Load required packages
library(ggplot2)
library(patchwork)
library(betareg)
# Generate data from GKw distribution
set.seed(2203)
n <- 1000
y <- rgkw(n, alpha = 2, beta = 3, gamma = 1.5, delta = 0.2, lambda = 1.2)
# Fit models from GKw family respecting their parameter structures
# Full GKw model: 5 parameters (alpha, beta, gamma, delta, lambda)
fit_gkw <- gkwfit(data = y, family = "gkw", plot = FALSE)
# BKw model: 4 parameters (alpha, beta, gamma, delta)
fit_bkw <- gkwfit(data = y, family = "bkw", plot = FALSE)
# KKw model: 4 parameters (alpha, beta, delta, lambda)
fit_kkw <- gkwfit(data = y, family = "kkw", plot = FALSE)
# EKw model: 3 parameters (alpha, beta, lambda)
fit_ekw <- gkwfit(data = y, family = "ekw", plot = FALSE)
# Mc model: 3 parameters (gamma, delta, lambda)
fit_mc <- gkwfit(data = y, family = "mc", plot = FALSE)
# Kw model: 2 parameters (alpha, beta)
fit_kw <- gkwfit(data = y, family = "kw", plot = FALSE)
# Beta model: 2 parameters (gamma, delta)
fit_beta <- gkwfit(data = y, family = "beta", plot = FALSE)
# Test 1: BKw vs GKw (testing lambda)
# H0: lambda=1 (BKw) vs H1: lambda!=1 (GKw)
cat("=== Testing BKw vs GKw (adding lambda parameter) ===\n")
test_bkw_gkw <- anova(fit_bkw, fit_gkw)
print(test_bkw_gkw)
# Test 2: KKw vs GKw (testing gamma)
# H0: gamma=1 (KKw) vs H1: gamma!=1 (GKw)
cat("\n=== Testing KKw vs GKw (adding gamma parameter) ===\n")
test_kkw_gkw <- anova(fit_kkw, fit_gkw)
print(test_kkw_gkw)
# Test 3: Kw vs EKw (testing lambda)
# H0: lambda=1 (Kw) vs H1: lambda!=1 (EKw)
cat("\n=== Testing Kw vs EKw (adding lambda parameter) ===\n")
test_kw_ekw <- anova(fit_kw, fit_ekw)
print(test_kw_ekw)
# Test 4: Beta vs Mc (testing lambda)
# H0: lambda=1 (Beta) vs H1: lambda!=1 (Mc)
cat("\n=== Testing Beta vs Mc (adding lambda parameter) ===\n")
test_beta_mc <- anova(fit_beta, fit_mc)
print(test_beta_mc)
# Visualize model comparison
# Create dataframe summarizing all models
models_df <- data.frame(
Model = c("GKw", "BKw", "KKw", "EKw", "Mc", "Kw", "Beta"),
Parameters = c(
paste("alpha,beta,gamma,delta,lambda"),
paste("alpha,beta,gamma,delta"),
paste("alpha,beta,delta,lambda"),
paste("alpha,beta,lambda"),
paste("gamma,delta,lambda"),
paste("alpha,beta"),
paste("gamma,delta")
),
Param_count = c(5, 4, 4, 3, 3, 2, 2),
LogLik = c(
as.numeric(logLik(fit_gkw)),
as.numeric(logLik(fit_bkw)),
as.numeric(logLik(fit_kkw)),
as.numeric(logLik(fit_ekw)),
as.numeric(logLik(fit_mc)),
as.numeric(logLik(fit_kw)),
as.numeric(logLik(fit_beta))
),
AIC = c(
fit_gkw$AIC,
fit_bkw$AIC,
fit_kkw$AIC,
fit_ekw$AIC,
fit_mc$AIC,
fit_kw$AIC,
fit_beta$AIC
),
BIC = c(
fit_gkw$BIC,
fit_bkw$BIC,
fit_kkw$BIC,
fit_ekw$BIC,
fit_mc$BIC,
fit_kw$BIC,
fit_beta$BIC
)
)
# Sort by AIC
models_df <- models_df[order(models_df$AIC), ]
print(models_df)
# Create comprehensive visualization
# Plot showing model hierarchy and information criteria
p1 <- ggplot(models_df, aes(x = Param_count, y = LogLik, label = Model)) +
geom_point(size = 3) +
geom_text(vjust = -0.8) +
labs(
title = "Log-likelihood vs Model Complexity",
x = "Number of Parameters",
y = "Log-likelihood"
) +
theme_minimal()
# Create information criteria comparison
models_df_long <- tidyr::pivot_longer(
models_df,
cols = c("AIC", "BIC"),
names_to = "Criterion",
values_to = "Value"
)
p2 <- ggplot(models_df_long, aes(x = reorder(Model, -Value), y = Value, fill = Criterion)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Information Criteria Comparison",
x = "Model",
y = "Value (lower is better)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Print plots
print(p1 + p2)
# ============================================================================
# Manual LR tests to demonstrate underlying calculations
# ============================================================================
# Function to perform manual likelihood ratio test
manual_lr_test <- function(model_restricted, model_full, alpha = 0.05) {
# Extract log-likelihoods
ll_restricted <- as.numeric(logLik(model_restricted))
ll_full <- as.numeric(logLik(model_full))
# Calculate test statistic
lr_stat <- -2 * (ll_restricted - ll_full)
# Calculate degrees of freedom (parameter difference)
df <- length(coef(model_full)) - length(coef(model_restricted))
# Calculate p-value
p_value <- pchisq(lr_stat, df = df, lower.tail = FALSE)
# Return results
list(
lr_statistic = lr_stat,
df = df,
p_value = p_value,
significant = p_value < alpha,
critical_value = qchisq(1 - alpha, df = df)
)
}
# Example: Manual LR test for BKw vs GKw (testing lambda parameter)
cat("\n=== Manual LR test: BKw vs GKw ===\n")
lr_bkw_gkw <- manual_lr_test(fit_bkw, fit_gkw)
cat("LR statistic:", lr_bkw_gkw$lr_statistic, "\n")
cat("Degrees of freedom:", lr_bkw_gkw$df, "\n")
cat("P-value:", lr_bkw_gkw$p_value, "\n")
cat("Critical value (alpha=0.05):", lr_bkw_gkw$critical_value, "\n")
cat("Decision:", ifelse(lr_bkw_gkw$significant,
"Reject H0: Lambda is significantly different from 1",
"Fail to reject H0: Lambda is not significantly different from 1"
), "\n")
# Example: Manual LR test for Kw vs EKw (testing lambda parameter)
cat("\n=== Manual LR test: Kw vs EKw ===\n")
lr_kw_ekw <- manual_lr_test(fit_kw, fit_ekw)
cat("LR statistic:", lr_kw_ekw$lr_statistic, "\n")
cat("Degrees of freedom:", lr_kw_ekw$df, "\n")
cat("P-value:", lr_kw_ekw$p_value, "\n")
cat("Critical value (alpha=0.05):", lr_kw_ekw$critical_value, "\n")
cat("Decision:", ifelse(lr_kw_ekw$significant,
"Reject H0: Lambda is significantly different from 1",
"Fail to reject H0: Lambda is not significantly different from 1"
), "\n")
# ============================================================================
# Real data application with correct model nesting
# ============================================================================
if (requireNamespace("betareg", quietly = TRUE)) {
data("ReadingSkills", package = "betareg")
y <- ReadingSkills$accuracy
# Fit models
rs_gkw <- gkwfit(data = y, family = "gkw", plot = FALSE)
rs_bkw <- gkwfit(data = y, family = "bkw", plot = FALSE)
rs_kkw <- gkwfit(data = y, family = "kkw", plot = FALSE)
rs_kw <- gkwfit(data = y, family = "kw", plot = FALSE)
rs_beta <- gkwfit(data = y, family = "beta", plot = FALSE)
# Test nested models
cat("\n=== Real data: Testing BKw vs GKw (adding lambda) ===\n")
rs_test_bkw_gkw <- anova(rs_bkw, rs_gkw)
print(rs_test_bkw_gkw)
cat("\n=== Real data: Testing Kw vs KKw (adding delta and lambda) ===\n")
rs_test_kw_kkw <- anova(rs_kw, rs_kkw)
print(rs_test_kw_kkw)
# Compare non-nested models with information criteria
cat("\n=== Real data: Comparing non-nested Beta vs Kw ===\n")
rs_compare_beta_kw <- anova(rs_beta, rs_kw)
print(rs_compare_beta_kw)
# Summarize all models
cat("\n=== Real data: Model comparison summary ===\n")
models_rs <- c("GKw", "BKw", "KKw", "Kw", "Beta")
aic_values <- c(rs_gkw$AIC, rs_bkw$AIC, rs_kkw$AIC, rs_kw$AIC, rs_beta$AIC)
bic_values <- c(rs_gkw$BIC, rs_bkw$BIC, rs_kkw$BIC, rs_kw$BIC, rs_beta$BIC)
loglik_values <- c(
as.numeric(logLik(rs_gkw)),
as.numeric(logLik(rs_bkw)),
as.numeric(logLik(rs_kkw)),
as.numeric(logLik(rs_kw)),
as.numeric(logLik(rs_beta))
)
df_rs <- data.frame(
Model = models_rs,
LogLik = loglik_values,
AIC = aic_values,
BIC = bic_values
)
df_rs <- df_rs[order(df_rs$AIC), ]
print(df_rs)
# Determine the best model for the data
best_model <- df_rs$Model[which.min(df_rs$AIC)]
cat("\nBest model based on AIC:", best_model, "\n")
}
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.