Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE,
cache = TRUE,
cache.lazy = FALSE,
fig.align = "center",
comment = "",
fig.width = 10,
fig.height = 5,
dpi = 150
)
set.seed(2024)
## ----packages-----------------------------------------------------------------
library(gkwreg)
library(betareg)
library(ggplot2)
# Standardized color palette for all comparisons
MODEL_COLORS <- c(
"Beta (betareg)" = "#D32F2F", # Red
"Beta (gkwreg)" = "#1976D2", # Blue
"Kumaraswamy" = "#388E3C", # Green
"Exp. Kumaraswamy" = "#7B1FA2" # Purple
)
MODEL_NAMES <- c(
betareg = "Beta (betareg)",
gkw_beta = "Beta (gkwreg)",
gkw_kw = "Kumaraswamy",
gkw_ekw = "Exp. Kumaraswamy"
)
## ----helper_functions, include=FALSE------------------------------------------
# ==============================================================================
# SIMULATION INFRASTRUCTURE
# ==============================================================================
# Store fitted model objects
run_single_sim_with_fits <- function(n, dgp_fun, params, formula, test_prop = 0.2) {
data_full <- tryCatch(
{
dgp_fun(n, params)
},
error = function(e) NULL
)
if (is.null(data_full)) {
return(list(
betareg = list(success = FALSE),
gkw_kw = list(success = FALSE)
))
}
n_test <- floor(n * test_prop)
test_idx <- sample(seq_len(n), n_test)
train_data <- data_full[-test_idx, ]
test_data <- data_full[test_idx, ]
results <- list()
# Beta regression
results$betareg <- tryCatch(
{
t0 <- proc.time()[3]
fit <- betareg(formula, data = train_data)
time_elapsed <- proc.time()[3] - t0
pred <- predict(fit, newdata = test_data, type = "response")
if (any(is.na(pred)) || any(is.infinite(pred))) {
return(list(success = FALSE))
}
rmse <- sqrt(mean((test_data$y - pred)^2))
list(
fit = fit,
coef = coef(fit),
se = sqrt(diag(vcov(fit))),
loglik = as.numeric(logLik(fit)),
aic = AIC(fit),
bic = BIC(fit),
rmse = rmse,
time = time_elapsed,
converged = isTRUE(fit$converged),
success = TRUE
)
},
error = function(e) {
list(success = FALSE)
}
)
# Kumaraswamy regression
results$gkw_kw <- tryCatch(
{
t0 <- proc.time()[3]
fit <- gkwreg(formula, data = train_data, family = "kw")
time_elapsed <- proc.time()[3] - t0
pred <- predict(fit, newdata = test_data, type = "response")
if (any(is.na(pred)) || any(is.infinite(pred))) {
return(list(success = FALSE))
}
rmse <- sqrt(mean((test_data$y - pred)^2))
list(
fit = fit,
coef = coef(fit),
se = fit$se,
loglik = fit$loglik,
aic = fit$aic,
bic = fit$bic,
rmse = rmse,
time = time_elapsed,
converged = isTRUE(fit$convergence),
success = TRUE
)
},
error = function(e) {
list(success = FALSE)
}
)
results
}
run_single_sim <- function(n, dgp_fun, params, formula, test_prop = 0.2) {
data_full <- tryCatch(
{
dgp_fun(n, params)
},
error = function(e) NULL
)
if (is.null(data_full)) {
return(list(
betareg = list(success = FALSE),
gkw_beta = list(success = FALSE),
gkw_kw = list(success = FALSE),
gkw_ekw = list(success = FALSE)
))
}
n_test <- floor(n * test_prop)
test_idx <- sample(seq_len(n), n_test)
train_data <- data_full[-test_idx, ]
test_data <- data_full[test_idx, ]
results <- list()
safe_fit <- function(fit_fun, name) {
tryCatch(
{
t0 <- proc.time()[3]
fit <- fit_fun()
time_elapsed <- proc.time()[3] - t0
if (name == "betareg") {
loglik <- as.numeric(logLik(fit))
aic <- AIC(fit)
bic <- BIC(fit)
converged <- isTRUE(fit$converged)
} else {
loglik <- fit$loglik
aic <- fit$aic
bic <- fit$bic
converged <- isTRUE(fit$convergence)
}
pred <- predict(fit, newdata = test_data, type = "response")
if (any(is.na(pred)) || any(is.infinite(pred))) {
return(list(success = FALSE))
}
rmse <- sqrt(mean((test_data$y - pred)^2))
if (!is.finite(loglik)) loglik <- NA_real_
if (!is.finite(aic)) aic <- NA_real_
if (!is.finite(bic)) bic <- NA_real_
if (!is.finite(rmse)) rmse <- NA_real_
if (!is.finite(time_elapsed)) time_elapsed <- NA_real_
list(
loglik = as.numeric(loglik),
aic = as.numeric(aic),
bic = as.numeric(bic),
rmse = as.numeric(rmse),
time = as.numeric(time_elapsed),
converged = converged,
success = TRUE
)
},
error = function(e) {
list(success = FALSE)
}
)
}
results$betareg <- safe_fit(
function() betareg(formula, data = train_data),
"betareg"
)
results$gkw_beta <- safe_fit(
function() gkwreg(formula, data = train_data, family = "beta"),
"gkwreg"
)
results$gkw_kw <- safe_fit(
function() gkwreg(formula, data = train_data, family = "kw"),
"gkwreg"
)
results$gkw_ekw <- safe_fit(
function() gkwreg(formula, data = train_data, family = "ekw"),
"gkwreg"
)
results
}
run_full_simulation <- function(n_sim, n, dgp_fun, params, formula, verbose = FALSE) {
all_results <- vector("list", n_sim)
for (i in seq_len(n_sim)) {
all_results[[i]] <- run_single_sim(n, dgp_fun, params, formula)
}
models <- c("betareg", "gkw_beta", "gkw_kw", "gkw_ekw")
summary_stats <- list()
for (model in models) {
success_idx <- sapply(all_results, function(x) {
!is.null(x[[model]]) && isTRUE(x[[model]]$success)
})
n_success <- sum(success_idx)
if (n_success == 0) {
summary_stats[[model]] <- list(
n_success = 0,
conv_rate = 0,
loglik_mean = NA_real_,
aic_mean = NA_real_,
bic_mean = NA_real_,
rmse_mean = NA_real_,
time_mean = NA_real_
)
next
}
successful <- all_results[success_idx]
extract_metric <- function(metric_name) {
vals <- sapply(successful, function(x) {
val <- x[[model]][[metric_name]]
if (is.null(val) || !is.numeric(val)) {
return(NA_real_)
}
as.numeric(val)
})
if (all(is.na(vals))) {
return(NA_real_)
}
vals
}
converged_vals <- sapply(successful, function(x) {
val <- x[[model]]$converged
if (is.null(val)) {
return(FALSE)
}
if (is.logical(val)) {
return(val)
}
if (is.numeric(val)) {
return(val > 0)
}
FALSE
})
summary_stats[[model]] <- list(
n_success = n_success,
conv_rate = mean(converged_vals, na.rm = TRUE),
loglik_mean = mean(extract_metric("loglik"), na.rm = TRUE),
aic_mean = mean(extract_metric("aic"), na.rm = TRUE),
bic_mean = mean(extract_metric("bic"), na.rm = TRUE),
rmse_mean = mean(extract_metric("rmse"), na.rm = TRUE),
time_mean = mean(extract_metric("time"), na.rm = TRUE)
)
}
list(
raw = all_results,
summary = summary_stats,
n = n,
n_sim = n_sim,
formula = formula
)
}
make_comparison_table <- function(sim_results) {
models <- names(sim_results$summary)
safe_format <- function(x, digits = 2) {
if (is.null(x) || is.na(x)) {
return(NA_real_)
}
round(as.numeric(x), digits)
}
df_list <- lapply(models, function(model) {
s <- sim_results$summary[[model]]
if (is.null(s) || s$n_success == 0) {
return(data.frame(
Model = MODEL_NAMES[model],
N_Success = 0,
Conv_Rate = NA_real_,
AIC = NA_real_,
RMSE = NA_real_,
Time = NA_real_,
stringsAsFactors = FALSE
))
}
data.frame(
Model = MODEL_NAMES[model],
N_Success = s$n_success,
Conv_Rate = safe_format(s$conv_rate * 100, 1),
AIC = safe_format(s$aic_mean, 2),
RMSE = safe_format(s$rmse_mean, 4),
Time = safe_format(s$time_mean, 4),
stringsAsFactors = FALSE
)
})
df <- do.call(rbind, df_list)
rownames(df) <- NULL
df
}
## ----scenario1_dgp------------------------------------------------------------
dgp_beta <- function(n, params) {
x1 <- rnorm(n, 0, 1)
x2 <- runif(n, -1, 1)
eta_mu <- params$beta_mu[1] + params$beta_mu[2] * x1 + params$beta_mu[3] * x2
eta_phi <- params$beta_phi[1] + params$beta_phi[2] * x1
mu <- plogis(eta_mu)
phi <- exp(eta_phi)
y <- rbeta(n, mu * phi, (1 - mu) * phi)
y <- pmax(pmin(y, 0.999), 0.001)
data.frame(y = y, x1 = x1, x2 = x2, mu = mu, phi = phi)
}
## ----scenario1_visualization, fig.height=6, fig.cap="Distributional Characteristics - Scenario 1 (Well-Specified Beta)"----
set.seed(123)
vis_data_s1 <- dgp_beta(1000, list(
beta_mu = c(0.5, -0.8, 0.6),
beta_phi = c(1.5, 0.4)
))
par(mfrow = c(2, 2), mar = c(4, 4, 2, 2))
# Response distribution
hist(vis_data_s1$y,
breaks = 40, col = "#388E3C", border = "white",
main = "Response Distribution", xlab = "Y", prob = TRUE,
cex.main = 1.2, cex.lab = 1.1
)
lines(density(vis_data_s1$y), col = "#D32F2F", lwd = 2)
legend("topright", "Kernel density", col = "#D32F2F", lwd = 2, bty = "n")
# Effect of x1
plot(vis_data_s1$x1, vis_data_s1$y,
pch = 16, col = rgb(0, 0, 0, 0.3),
main = "Covariate Effect: x1", xlab = "x1", ylab = "Y",
cex.main = 1.2, cex.lab = 1.1
)
lines(lowess(vis_data_s1$x1, vis_data_s1$y), col = "#D32F2F", lwd = 3)
abline(h = 0.5, lty = 2, col = "gray50")
# Effect of x2
plot(vis_data_s1$x2, vis_data_s1$y,
pch = 16, col = rgb(0, 0, 0, 0.3),
main = "Covariate Effect: x2", xlab = "x2", ylab = "Y",
cex.main = 1.2, cex.lab = 1.1
)
lines(lowess(vis_data_s1$x2, vis_data_s1$y), col = "#388E3C", lwd = 3)
abline(h = 0.5, lty = 2, col = "gray50")
# Conditional variance
plot(abs(vis_data_s1$x1), (vis_data_s1$y - vis_data_s1$mu)^2,
pch = 16, col = rgb(0, 0, 0, 0.2),
main = "Heteroscedasticity Structure",
xlab = "|x1|", ylab = "Squared Residuals",
cex.main = 1.2, cex.lab = 1.1
)
lines(lowess(abs(vis_data_s1$x1), (vis_data_s1$y - vis_data_s1$mu)^2),
col = "#1976D2", lwd = 3
)
# mtext("Figure 1: Distributional Characteristics - Scenario 1 (Well-Specified Beta)",
# side = 3, line = -2, outer = TRUE, font = 2, cex = 1.1)
## ----scenario1_sim, cache=TRUE------------------------------------------------
results_s1 <- run_full_simulation(
n_sim = 200,
n = 300,
dgp_fun = dgp_beta,
params = list(
beta_mu = c(0.5, -0.8, 0.6),
beta_phi = c(1.5, 0.4)
),
formula = y ~ x1 + x2 | x1
)
comp_s1 <- make_comparison_table(results_s1)
## ----scenario1_coef_comparison, cache=TRUE------------------------------------
# Fit models to one dataset for coefficient comparison
set.seed(456)
data_coef_s1 <- dgp_beta(300, list(
beta_mu = c(0.5, -0.8, 0.6),
beta_phi = c(1.5, 0.4)
))
fit_beta_s1 <- betareg(y ~ x1 + x2 | x1, data = data_coef_s1)
fit_kw_s1 <- gkwreg(y ~ x1 + x2 | x1, data = data_coef_s1, family = "kw")
# Extract coefficients
coef_beta_s1 <- coef(fit_beta_s1)
se_beta_s1 <- sqrt(diag(vcov(fit_beta_s1)))
coef_kw_s1 <- coef(fit_kw_s1)
se_kw_s1 <- fit_kw_s1$se
# Build comparison table
coef_names <- names(coef_beta_s1)
true_values <- c(0.5, -0.8, 0.6, 1.5, 0.4)
coef_comparison_s1 <- data.frame(
Parameter = coef_names,
True_Value = true_values,
Beta_Est = round(coef_beta_s1, 4),
Beta_SE = round(se_beta_s1, 4),
Beta_Z = round(coef_beta_s1 / se_beta_s1, 2),
Kw_Est = round(coef_kw_s1, 4),
Kw_SE = round(se_kw_s1, 4),
Kw_Z = round(coef_kw_s1 / se_kw_s1, 2)
)
# Calculate differences
coef_comparison_s1$Bias_Beta <- round(coef_comparison_s1$Beta_Est - true_values, 4)
coef_comparison_s1$Bias_Kw <- round(coef_comparison_s1$Kw_Est - true_values, 4)
coef_comparison_s1$SE_Ratio <- round(coef_comparison_s1$Kw_SE / coef_comparison_s1$Beta_SE, 3)
knitr::kable(
row.names = FALSE,
coef_comparison_s1,
caption = "Table 1: Parameter Estimates - Scenario 1 (Well-Specified Beta, n=300)",
col.names = c(
"Parameter", "True", "Est.", "SE", "z", "Est.", "SE", "z",
"Bias", "Bias", "SE Ratio"
),
align = c("l", rep("r", 10))
)
## ----scenario2_dgp------------------------------------------------------------
dgp_heavy_tails <- function(n, params) {
x1 <- rnorm(n, 0, 1)
x2 <- rbinom(n, 1, 0.5)
eta_alpha <- params$beta_alpha[1] + params$beta_alpha[2] * x1
eta_beta <- params$beta_beta[1] + params$beta_beta[2] * x2
eta_lambda <- params$beta_lambda[1]
alpha <- exp(eta_alpha)
beta <- exp(eta_beta)
lambda <- exp(eta_lambda)
u <- runif(n)
y <- (1 - (1 - u^(1 / beta))^(1 / alpha))^(1 / lambda)
y <- pmax(pmin(y, 0.9999), 0.0001)
data.frame(y = y, x1 = x1, x2 = factor(x2))
}
## ----scenario2_visualization, fig.height=6, fig.cap="Distributional Characteristics - Scenario 2 (Heavy Tails)"----
set.seed(789)
vis_data_s2 <- dgp_heavy_tails(1500, list(
beta_alpha = c(0.8, -0.5),
beta_beta = c(0.3, 0.4),
beta_lambda = c(0.6)
))
par(mfrow = c(2, 2), mar = c(4, 4, 3, 2))
# Response distribution
hist(vis_data_s2$y,
breaks = 60, col = "#7B1FA2", border = "white",
main = "Heavy-Tailed Distribution", xlab = "Y", prob = TRUE,
cex.main = 1.2, cex.lab = 1.1
)
lines(density(vis_data_s2$y), col = "#D32F2F", lwd = 2)
# Add Beta approximation for comparison
beta_approx <- rbeta(1500, 2, 2)
lines(density(beta_approx), col = "#1976D2", lwd = 2, lty = 2)
legend("topright", c("True density", "Beta approximation"),
col = c("#D32F2F", "#1976D2"), lwd = 2, lty = c(1, 2), bty = "n", cex = 0.9
)
# QQ plot against Beta
qqplot(rbeta(1500, 2, 2), vis_data_s2$y,
main = "Q-Q Plot: EKw vs Beta(2,2)",
xlab = "Beta(2,2) Quantiles", ylab = "Observed Quantiles",
pch = 16, col = rgb(0, 0, 0, 0.3), cex.main = 1.2, cex.lab = 1.1
)
abline(0, 1, col = "#D32F2F", lwd = 2)
text(0.2, 0.8, "Heavier tails\n(departures)", col = "#D32F2F", cex = 0.9)
# Effect by group
boxplot(y ~ x2,
data = vis_data_s2, col = c("#388E3C", "#7B1FA2"),
main = "Distribution by Group", xlab = "x2", ylab = "Y",
names = c("Group 0", "Group 1"), cex.main = 1.2, cex.lab = 1.1
)
# Tail behavior
plot(sort(vis_data_s2$y)[1:50],
ylab = "Y", xlab = "Order (Lower Tail)",
main = "Lower Tail Concentration", pch = 16, col = "#7B1FA2",
cex.main = 1.2, cex.lab = 1.1
)
beta_tail <- sort(rbeta(1500, 2, 2))[1:50]
points(beta_tail, pch = 1, col = "#1976D2")
legend("topleft", c("EKw", "Beta(2,2)"),
col = c("#7B1FA2", "#1976D2"),
pch = c(16, 1), bty = "n"
)
# mtext("Figure 2: Distributional Characteristics - Scenario 2 (Heavy Tails)",
# side = 3, line = -2, outer = TRUE, font = 2, cex = 1.1)
## ----scenario2_sim, cache=TRUE------------------------------------------------
results_s2 <- run_full_simulation(
n_sim = 200,
n = 300,
dgp_fun = dgp_heavy_tails,
params = list(
beta_alpha = c(0.8, -0.5),
beta_beta = c(0.3, 0.4),
beta_lambda = c(0.6)
),
formula = y ~ x1 | x2
)
comp_s2 <- make_comparison_table(results_s2)
## ----scenario2_coef_comparison, cache=TRUE------------------------------------
# Fit models to one dataset
set.seed(101112)
data_coef_s2 <- dgp_heavy_tails(300, list(
beta_alpha = c(0.8, -0.5),
beta_beta = c(0.3, 0.4),
beta_lambda = c(0.6)
))
fit_beta_s2 <- betareg(y ~ x1 | x2, data = data_coef_s2)
fit_kw_s2 <- gkwreg(y ~ x1 | x2, data = data_coef_s2, family = "kw")
coef_beta_s2 <- coef(fit_beta_s2)
se_beta_s2 <- sqrt(diag(vcov(fit_beta_s2)))
coef_kw_s2 <- coef(fit_kw_s2)
se_kw_s2 <- fit_kw_s2$se
# Note: True parameters are on different scale (α, β vs μ, φ)
# Focus on relative comparisons
coef_comparison_s2 <- data.frame(
Parameter = names(coef_beta_s2),
Beta_Est = round(coef_beta_s2, 4),
Beta_SE = round(se_beta_s2, 4),
Beta_Z = round(coef_beta_s2 / se_beta_s2, 2),
Kw_Est = round(coef_kw_s2, 4),
Kw_SE = round(se_kw_s2, 4),
Kw_Z = round(coef_kw_s2 / se_kw_s2, 2),
SE_Ratio = round(se_kw_s2 / se_beta_s2, 3)
)
knitr::kable(
row.names = FALSE,
coef_comparison_s2,
caption = "Table 2: Parameter Estimates - Scenario 2 (Heavy Tails, n=300)",
# col.names = c("Parameter", "Est.", "SE", "z", "Est.", "SE", "z", "SE Ratio"),
align = c("l", rep("r", 7))
)
## ----scenario3_dgp------------------------------------------------------------
dgp_extreme <- function(n, params) {
x1 <- rnorm(n, 0, 1)
group <- sample(c("J", "U"), n, replace = TRUE)
alpha <- ifelse(
group == "J",
exp(params$alpha_J[1] + params$alpha_J[2] * x1),
exp(params$alpha_U[1] + params$alpha_U[2] * x1)
)
beta <- ifelse(group == "J", exp(params$beta_J), exp(params$beta_U))
u <- runif(n)
y <- (1 - (1 - u)^(1 / beta))^(1 / alpha)
y <- pmax(pmin(y, 0.9999), 0.0001)
data.frame(y = y, x1 = x1, group = factor(group))
}
## ----scenario3_visualization, fig.height=7, fig.cap="Extreme Distributional Shapes - Scenario 3 (Boundary Concentration)"----
set.seed(131415)
vis_data_s3 <- dgp_extreme(2000, list(
alpha_J = c(-1.5, 0.2),
beta_J = 2.0,
alpha_U = c(-1.8, 0.1),
beta_U = -0.8
))
par(mfrow = c(2, 2), mar = c(4, 4, 3, 2))
# J-shaped distribution
hist(vis_data_s3$y[vis_data_s3$group == "J"],
breaks = 50,
col = "#FF6F00", border = "white",
main = "J-Shaped Distribution", xlab = "Y", prob = TRUE,
xlim = c(0, 1), cex.main = 1.2, cex.lab = 1.1
)
lines(density(vis_data_s3$y[vis_data_s3$group == "J"]),
col = "#D32F2F", lwd = 2
)
text(0.5, 15, sprintf(
"80%% of mass\n< %.2f",
quantile(vis_data_s3$y[vis_data_s3$group == "J"], 0.8)
),
col = "#D32F2F", cex = 0.9
)
# U-shaped distribution
hist(vis_data_s3$y[vis_data_s3$group == "U"],
breaks = 50,
col = "#0277BD", border = "white",
main = "U-Shaped Distribution", xlab = "Y", prob = TRUE,
xlim = c(0, 1), cex.main = 1.2, cex.lab = 1.1
)
lines(density(vis_data_s3$y[vis_data_s3$group == "U"]),
col = "#D32F2F", lwd = 2
)
text(0.5, 3, "Bimodal:\nconcentration\nat boundaries",
col = "#D32F2F", cex = 0.9
)
# Combined distribution
plot(density(vis_data_s3$y),
main = "Mixture Distribution",
xlab = "Y", ylab = "Density", lwd = 2, col = "#7B1FA2",
cex.main = 1.2, cex.lab = 1.1
)
polygon(density(vis_data_s3$y), col = rgb(0.48, 0.12, 0.63, 0.3), border = NA)
# Empirical CDF showing boundary concentration
plot(ecdf(vis_data_s3$y),
main = "Empirical CDF",
xlab = "Y", ylab = "F(Y)", lwd = 2, col = "#388E3C",
cex.main = 1.2, cex.lab = 1.1
)
abline(v = c(0.1, 0.9), lty = 2, col = "gray40")
text(0.05, 0.5, sprintf(
"%.0f%%\n< 0.1",
100 * mean(vis_data_s3$y < 0.1)
),
col = "#D32F2F", cex = 0.9
)
text(0.95, 0.5, sprintf(
"%.0f%%\n> 0.9",
100 * mean(vis_data_s3$y > 0.9)
),
col = "#D32F2F", cex = 0.9
)
# mtext("Figure 3: Extreme Distributional Shapes - Scenario 3 (Boundary Concentration)",
# side = 3, line = -2, outer = TRUE, font = 2, cex = 1.1)
## ----scenario3_sim, cache=TRUE------------------------------------------------
results_s3 <- run_full_simulation(
n_sim = 200,
n = 400,
dgp_fun = dgp_extreme,
params = list(
alpha_J = c(-1.5, 0.2),
beta_J = 2.0,
alpha_U = c(-1.8, 0.1),
beta_U = -0.8
),
formula = y ~ x1 * group | group
)
comp_s3 <- make_comparison_table(results_s3)
## ----scenario3_coef_comparison, cache=TRUE------------------------------------
# For this scenario, we'll fit to a single J-shaped dataset where Beta converges
set.seed(161718)
# Generate J-shaped only for better Beta convergence probability
data_coef_s3 <- dgp_extreme(400, list(
alpha_J = c(-1.2, 0.15), # Slightly less extreme for illustration
beta_J = 1.8,
alpha_U = c(-1.5, 0.1),
beta_U = -0.5
))
# Try fitting - Beta may fail
fit_beta_s3 <- tryCatch(
{
betareg(y ~ x1 * group | group, data = data_coef_s3)
},
error = function(e) NULL
)
fit_kw_s3 <- gkwreg(y ~ x1 * group | group, data = data_coef_s3, family = "kw")
if (!is.null(fit_beta_s3) && fit_beta_s3$converged) {
coef_beta_s3 <- coef(fit_beta_s3)
se_beta_s3 <- sqrt(diag(vcov(fit_beta_s3)))
coef_kw_s3 <- coef(fit_kw_s3)
se_kw_s3 <- fit_kw_s3$se
coef_comparison_s3 <- data.frame(
Parameter = names(coef_beta_s3),
Beta_Est = round(coef_beta_s3, 4),
Beta_SE = round(se_beta_s3, 4),
Beta_Z = round(coef_beta_s3 / se_beta_s3, 2),
Kw_Est = round(coef_kw_s3, 4),
Kw_SE = round(se_kw_s3, 4),
Kw_Z = round(coef_kw_s3 / se_kw_s3, 2),
SE_Ratio = round(se_kw_s3 / se_beta_s3, 3)
)
knitr::kable(
coef_comparison_s3,
caption = "Table 3: Parameter Estimates - Scenario 3 (Extreme Shapes, n=400, Beta Converged)",
col.names = c("Parameter", "Est.", "SE", "z", "Est.", "SE", "z", "SE Ratio"),
align = c("l", rep("r", 7))
)
} else {
cat("**Table 3: Parameter Estimates - Scenario 3**\n\n")
cat("Beta regression failed to converge (as expected in 94.5% of cases).\n")
cat("Only Kumaraswamy estimates are available:\n\n")
coef_kw_s3 <- coef(fit_kw_s3)
se_kw_s3 <- fit_kw_s3$se
knitr::kable(
digits = 3,
data.frame(
row.names = FALSE,
Parameter = names(coef_kw_s3),
Estimate = round(coef_kw_s3, 4),
SE = round(se_kw_s3, 4),
z_stat = round(coef_kw_s3 / se_kw_s3, 2),
p_value = round(2 * pnorm(-abs(coef_kw_s3 / se_kw_s3)), 4)
),
caption = "Kumaraswamy Parameter Estimates (Beta Failed)"
)
}
## ----results_table, echo=FALSE------------------------------------------------
all_scenarios <- rbind(
data.frame(Scenario = "S1: Well-Specified Beta", comp_s1),
data.frame(Scenario = "S2: Heavy Tails", comp_s2),
data.frame(Scenario = "S3: Extreme Shapes", comp_s3)
)
knitr::kable(
row.names = FALSE,
all_scenarios,
caption = "Table 4: Comprehensive Model Comparison Across Three Simulation Scenarios",
digits = c(0, 0, 0, 1, 2, 3, 3),
align = c("l", "l", "r", "r", "r", "r", "r")
)
## ----comprehensive_comparison, echo=FALSE, fig.height=6, fig.cap="Comparative Performance Across Scenarios"----
par(mfrow = c(1, 3), mar = c(10, 4, 3, 1))
# AIC by scenario for Beta and Kumaraswamy only
scenarios <- c("S1", "S2", "S3")
beta_aic <- c(
comp_s1$AIC[comp_s1$Model == "Beta (betareg)"],
comp_s2$AIC[comp_s2$Model == "Beta (betareg)"],
comp_s3$AIC[comp_s3$Model == "Beta (betareg)"]
)
kw_aic <- c(
comp_s1$AIC[comp_s1$Model == "Kumaraswamy"],
comp_s2$AIC[comp_s2$Model == "Kumaraswamy"],
comp_s3$AIC[comp_s3$Model == "Kumaraswamy"]
)
# For S3, Beta AIC is huge, so use log scale or relative
aic_matrix <- rbind(beta_aic, kw_aic)
colnames(aic_matrix) <- scenarios
barplot(aic_matrix,
beside = TRUE, col = c("#D32F2F", "#388E3C"),
names.arg = scenarios, las = 1,
main = "Model Fit (AIC)", ylab = "AIC",
cex.names = 1.2, cex.main = 1.2
)
legend("topleft", c("Beta", "Kumaraswamy"),
fill = c("#D32F2F", "#388E3C"),
bty = "n", cex = 1.1
)
text(2, max(beta_aic) * 0.9, "Lower\nis better", cex = 0.9, col = "gray30")
# Convergence rates
beta_conv <- c(
comp_s1$Conv_Rate[comp_s1$Model == "Beta (betareg)"],
comp_s2$Conv_Rate[comp_s2$Model == "Beta (betareg)"],
comp_s3$Conv_Rate[comp_s3$Model == "Beta (betareg)"]
)
kw_conv <- c(
comp_s1$Conv_Rate[comp_s1$Model == "Kumaraswamy"],
comp_s2$Conv_Rate[comp_s2$Model == "Kumaraswamy"],
comp_s3$Conv_Rate[comp_s3$Model == "Kumaraswamy"]
)
conv_matrix <- rbind(beta_conv, kw_conv)
barplot(conv_matrix,
beside = TRUE, col = c("#D32F2F", "#388E3C"),
names.arg = scenarios, las = 1, ylim = c(0, 110),
main = "Convergence Reliability", ylab = "Success Rate (%)",
cex.names = 1.2, cex.main = 1.2
)
abline(h = 95, lty = 2, lwd = 2, col = "darkred")
text(1.5, 98, "95% threshold", col = "darkred", cex = 0.9)
# Computational speedup
beta_time <- c(
comp_s1$Time[comp_s1$Model == "Beta (betareg)"],
comp_s2$Time[comp_s2$Model == "Beta (betareg)"],
comp_s3$Time[comp_s3$Model == "Beta (betareg)"]
)
kw_time <- c(
comp_s1$Time[comp_s1$Model == "Kumaraswamy"],
comp_s2$Time[comp_s2$Model == "Kumaraswamy"],
comp_s3$Time[comp_s3$Model == "Kumaraswamy"]
)
speedup <- beta_time / kw_time
barplot(speedup,
names.arg = scenarios, col = "#388E3C",
las = 1, main = "Computational Speedup",
ylab = "Speedup Factor (Kw vs Beta)",
cex.names = 1.2, cex.main = 1.2
)
abline(h = 1, lty = 2, col = "gray40")
text(1:3, speedup + 0.5, sprintf("%.1fx", speedup), cex = 1.1, font = 2)
# mtext("Figure 4: Comparative Performance Across Scenarios",
# side = 3, line = -2, outer = TRUE, font = 2, cex = 1.2)
## ----efficiency_analysis, echo=FALSE------------------------------------------
timing <- aggregate(
Time ~ Model,
data = all_scenarios[!is.na(all_scenarios$Time), ],
FUN = mean
)
timing <- timing[order(timing$Time), ]
timing$Speedup <- timing$Time[timing$Model == "Beta (betareg)"] / timing$Time
knitr::kable(
timing,
caption = "Table 5: Average Computational Time and Speedup Relative to Beta Regression",
digits = 3,
col.names = c("Model", "Mean Time (sec)", "Speedup Factor"),
row.names = FALSE
)
## ----session_info, echo=FALSE-------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.