R/gkwfit.R
In gkwreg: Generalized Kumaraswamy Regression Models for Bounded Data

Documented in .determine_start_values .family_to_code .fit_tmb .get_default_fixed .get_default_start .get_family_param_info .validate_data .validate_parameters

# Modularized implementation of the GKw family fitting functions
# This file contains the main gkwfit function and all its helper functions

#' Convert family string to numeric code for TMB
#'
#' @param family Character string specifying the family.
#' @return Integer code for TMB. 0="gkw", 1="bkw", 2="kkw", 3="ekw", 4="mc", 5="kw", 6="beta"
#' @keywords internal
.family_to_code <- function(family) {
  codes <- c(gkw = 0, bkw = 1, kkw = 2, ekw = 3, mc = 4, kw = 5, beta = 6)
  return(codes[family])
}

#' Get family parameter information
#'
#' @param family Character string specifying the family.
#' @return List with parameter names and count.
#' @keywords internal
.get_family_param_info <- function(family) {
  family_params <- list(
    gkw = list(names = c("alpha", "beta", "gamma", "delta", "lambda"), n = 5),
    bkw = list(names = c("alpha", "beta", "gamma", "delta"), n = 4),
    kkw = list(names = c("alpha", "beta", "delta", "lambda"), n = 4),
    ekw = list(names = c("alpha", "beta", "lambda"), n = 3),
    mc = list(names = c("gamma", "delta", "lambda"), n = 3),
    kw = list(names = c("alpha", "beta"), n = 2),
    beta = list(names = c("gamma", "delta"), n = 2)
  )

  return(family_params[[family]])
}

#' Get default fixed parameters for a family
#'
#' @param family Character string specifying the family.
#' @return Named list of fixed parameters.
#' @keywords internal
.get_default_fixed <- function(family) {
  family_fixed <- list()

  if (family != "gkw") {
    # Set fixed parameters based on family
    if (family == "bkw") {
      family_fixed$lambda <- 1 # BKw: lambda = 1 fixed
    } else if (family == "kkw") {
      family_fixed$gamma <- 1 # KKw: gamma = 1 fixed
    } else if (family == "ekw") {
      family_fixed$gamma <- 1 # EKw: gamma = 1, delta = 0 fixed
      family_fixed$delta <- 0
    } else if (family == "mc") {
      family_fixed$alpha <- 1 # Mc: alpha = 1, beta = 1 fixed
      family_fixed$beta <- 1
    } else if (family == "kw") {
      family_fixed$gamma <- 1 # Kw: gamma = 1, delta = 0, lambda = 1 fixed
      family_fixed$delta <- 0
      family_fixed$lambda <- 1
    } else if (family == "beta") {
      family_fixed$alpha <- 1 # Beta: alpha = 1, beta = 1, lambda = 1 fixed
      family_fixed$beta <- 1
      family_fixed$lambda <- 1
    }
  }

  return(family_fixed)
}

#' Get default start values for a family
#'
#' @param family Character string specifying the family.
#' @return Named list of starting values.
#' @keywords internal
.get_default_start <- function(family) {
  if (family == "gkw") {
    start <- list(alpha = 2, beta = 2, gamma = 1, delta = 0.5, lambda = 2)
  } else if (family == "bkw") {
    start <- list(alpha = 2, beta = 2, gamma = 1, delta = 0.5) # lambda = 1 fixed
  } else if (family == "kkw") {
    start <- list(alpha = 2, beta = 2, delta = 0.5, lambda = 2) # gamma = 1 fixed
  } else if (family == "ekw") {
    start <- list(alpha = 2, beta = 2, lambda = 2) # gamma = 1, delta = 0 fixed
  } else if (family == "mc") {
    start <- list(gamma = 1, delta = 0.5, lambda = 2) # alpha = 1, beta = 1 fixed
  } else if (family == "kw") {
    start <- list(alpha = 2, beta = 2) # gamma = 1, delta = 0, lambda = 1 fixed
  } else if (family == "beta") {
    start <- list(gamma = 1, delta = 0.5) # alpha = 1, beta = 1, lambda = 1 fixed
  }

  return(start)
}

#' Validate data for GKw family distributions
#'
#' @param data Numeric vector with values in the (0, 1) interval.
#' @param n_params Number of parameters to estimate.
#' @return Validated and possibly adjusted data vector.
#' @keywords internal
.validate_data <- function(data, n_params) {
  # Data validation
  if (!is.numeric(data)) {
    stop("Data must be numeric")
  }

  if (any(is.na(data))) {
    warning("Missing values removed from data")
    data <- data[!is.na(data)]
  }

  if (length(data) < n_params) {
    stop(paste0("At least ", n_params, " observations are required to fit a ", n_params, "-parameter model"))
  }

  # Check data bounds with a small tolerance for numerical precision
  epsilon <- .Machine$double.eps^0.5
  if (any(data <= epsilon | data >= (1 - epsilon))) {
    # Try to automatically adjust boundary values
    orig_length <- length(data)
    boundary_vals <- data <= epsilon | data >= (1 - epsilon)

    if (sum(boundary_vals) > 0.1 * orig_length) {
      # If more than 10% of data is at boundaries, warn and stop
      stop(
        "Too many data values (", sum(boundary_vals),
        ") are at or beyond the boundaries of (0, 1). Please preprocess your data."
      )
    } else {
      # Adjust boundary values with a warning
      data[data <= epsilon] <- epsilon + epsilon * 10
      data[data >= (1 - epsilon)] <- (1 - epsilon) - epsilon * 10
      warning("Adjusted ", sum(boundary_vals), " data values that were at or beyond boundaries to be within (0, 1)")
    }
  }

  return(data)
}

#' Validate parameters for GKw family distributions
#'
#' @param start List with initial parameter values.
#' @param fixed List of parameters to be held fixed (not estimated).
#' @param param_names Character vector of parameter names needed for the model.
#' @return List containing validated start and fixed parameter lists.
#' @keywords internal
.validate_parameters <- function(start, fixed, param_names) {
  # Check that all required parameters for the selected family are provided
  if (!is.null(start)) {
    missing_params <- setdiff(param_names, names(start))

    if (length(missing_params) > 0) {
      stop("Missing parameters in 'start': ", paste(missing_params, collapse = ", "))
    }

    # Check for valid parameter values
    invalid_params <- names(start)[start <= 0]
    if (length(invalid_params) > 0) {
      warning(
        "Initial values for parameters must be positive. Adjusting: ",
        paste(invalid_params, collapse = ", ")
      )
      for (param in invalid_params) {
        start[[param]] <- 1.0 # Set to a reasonable default
      }
    }
  }

  # Apply fixed parameters if any
  if (!is.null(fixed)) {
    # Validate fixed parameters
    fixed_invalid <- names(fixed)[fixed <= 0 & names(fixed) != "delta"]
    if (length(fixed_invalid) > 0) {
      stop("Fixed parameters must be positive: ", paste(fixed_invalid, collapse = ", "))
    }

    # For delta, ensure it's at least 0
    if ("delta" %in% names(fixed) && fixed$delta < 0) {
      stop("Fixed parameter 'delta' must be non-negative")
    }

    for (param in names(fixed)) {
      if (param %in% names(start)) {
        start[[param]] <- fixed[[param]]
      } else if (!param %in% c("alpha", "beta", "gamma", "delta", "lambda")) {
        warning("Ignoring unknown fixed parameter: ", param)
      }
    }
  }

  return(list(start = start, fixed = fixed))
}

#' Determine initial parameter values
#'
#' @param data Numeric vector with values in the (0, 1) interval.
#' @param start Optional list with initial parameter values.
#' @param fixed Optional list of parameters to be held fixed.
#' @param family Character string specifying the distribution family.
#' @param use_moments Logical; if TRUE, uses method of moments for initial values.
#' @param silent Logical; if TRUE, suppresses messages.
#' @return List with initial parameter values.
#' @keywords internal
.determine_start_values <- function(data, start, fixed, family, use_moments, silent) {
  param_info <- .get_family_param_info(family)
  param_names <- param_info$names

  # Get default fixed parameters for the family
  family_fixed <- .get_default_fixed(family)

  # Merge user-provided fixed parameters with family-specific fixed parameters
  if (!is.null(fixed)) {
    fixed <- c(fixed, family_fixed[!names(family_fixed) %in% names(fixed)])
  } else {
    fixed <- family_fixed
  }

  # Generate initial values using method of moments if requested
  if (use_moments) {
    if (!silent) {
      message("Computing starting values using method of moments...")
    }

    # Try to use gkwgetstartvalues with a fallback
    moment_start <- tryCatch(
      {
        gkwgetstartvalues(data, n_starts = 10)
      },
      error = function(e) {
        warning(
          "Error in method of moments estimation: ", e$message,
          ". Using default starting values."
        )
        c(alpha = 2, beta = 2, gamma = 1, delta = 0.5, lambda = 2)
      },
      warning = function(w) {
        warning("Warning in method of moments estimation: ", w$message)
        # Continue execution but still return the result
        NULL
      }
    )

    if (is.null(start) && !is.null(moment_start)) {
      start <- as.list(moment_start)
    } else if (!silent) {
      message("Using provided start values instead of method of moments estimates.")
    }
  }

  # If start is still NULL, use default starting values
  if (is.null(start)) {
    # Get default starting values for the family
    start <- .get_default_start(family)

    if (!silent) {
      message("Using default starting values for ", family, " family parameters")
    }
  }

  # Validate parameters
  valid <- .validate_parameters(start, fixed, param_names)
  start <- valid$start
  fixed <- valid$fixed

  return(list(start = start, fixed = fixed))
}


#' Fit GKw family distributions using TMB
#'
#' @param data Numeric vector with values in the (0, 1) interval.
#' @param family Character string specifying the distribution family.
#' @param start List with initial parameter values.
#' @param fixed List of parameters to be held fixed.
#' @param method Optimization method: "nlminb" or "optim".
#' @param hessian Logical; if TRUE, computes standard errors and covariance matrix.
#' @param conf.level Confidence level for intervals.
#' @param optimizer.control List of control parameters for the optimizer.
#' @param silent Logical; if TRUE, suppresses messages.
#'
#' @importFrom stats nlminb optim pnorm
#' @importFrom utils modifyList
#'
#' @return List containing fit results.
#' @keywords internal
.fit_tmb <- function(data, family, start, fixed, method, hessian, conf.level, optimizer.control, silent) {
  if (!requireNamespace("TMB", quietly = TRUE)) {
    stop("Package 'TMB' is required but not installed. Please install it with install.packages('TMB')")
  }

  # dll_name <- "gkwmletmb"

  # Check and compile TMB code
  tryCatch(
    {
      .check_and_compile_TMB_code("gkwmletmb", verbose = !silent)
    },
    error = function(e) {
      stop("Failed to compile TMB code: ", e$message)
    }
  )

  if (!silent) {
    message("Checking TMB model compilation...")
  }

  # Get the numeric code for family
  family_code <- .family_to_code(family)

  # Get the parameter names for this family
  param_names <- .get_family_param_info(family)$names

  # Create full parameter list for TMB
  full_start_tmb <- list(
    log_alpha = log(1), # Default values, will be overwritten if needed
    log_beta = log(1),
    log_gamma = log(1),
    log_delta = log(0.0001), # Small value for delta to avoid log(0)
    log_lambda = log(1)
  )

  # Update with user-provided start values
  for (param in names(start)) {
    full_start_tmb[[paste0("log_", param)]] <- log(start[[param]])
  }

  # Apply fixed parameters for TMB
  map <- list()

  # Add family-specific fixed parameters to the map
  all_fixed_params <- c(names(fixed), setdiff(
    c("alpha", "beta", "gamma", "delta", "lambda"),
    c(param_names, names(fixed))
  ))

  for (param in all_fixed_params) {
    param_name <- paste0("log_", param)
    if (param %in% names(fixed)) {
      # User or family provided fixed value
      full_start_tmb[[param_name]] <- log(fixed[[param]])
    } else if (param == "alpha" || param == "beta") {
      # Default alpha, beta = 1 when not specified
      full_start_tmb[[param_name]] <- log(1)
    } else if (param == "gamma") {
      # Default gamma = 1 when not specified
      full_start_tmb[[param_name]] <- log(1)
    } else if (param == "delta") {
      # Default delta = 0 when not specified (use small value to avoid log(0))
      full_start_tmb[[param_name]] <- log(0.0001)
    } else if (param == "lambda") {
      # Default lambda = 1 when not specified
      full_start_tmb[[param_name]] <- log(1)
    }

    # Add to map to keep parameter fixed
    map[[param_name]] <- factor(NA)
  }

  # Prepare data for TMB
  tmb_data <- list(
    x = data,
    family = family_code
  )

  # Create TMB object
  obj <- tryCatch(
    {
      TMB::MakeADFun(
        data = tmb_data,
        parameters = full_start_tmb,
        map = if (length(map) > 0) map else NULL,
        DLL = "gkwmletmb",
        silent = silent
      )
    },
    error = function(e) {
      stop("Error creating TMB model: ", e$message)
    }
  )

  # Set up optimizer controls based on the chosen method
  if (method == "nlminb") {
    control_defaults <- list(eval.max = 500, iter.max = 300, trace = ifelse(silent, 0, 1))
  } else if (method == "optim") {
    control_defaults <- list(maxit = 500, trace = ifelse(silent, 0, 1))
  } else {
    stop("Unknown optimizer method: ", method)
  }

  # Merge user controls with defaults, with user controls taking precedence
  control <- modifyList(control_defaults, optimizer.control)

  # Run optimization
  if (method == "nlminb") {
    opt <- tryCatch(
      {
        nlminb(
          start = obj$par,
          objective = obj$fn,
          gradient = obj$gr,
          control = control
        )
      },
      error = function(e) {
        stop("Optimization with nlminb failed: ", e$message)
      }
    )

    opt$convergence <- opt$convergence == 0
    opt$message <- opt$message
  } else {
    opt <- tryCatch(
      {
        optim(
          par = obj$par,
          fn = obj$fn,
          gr = obj$gr,
          method = method,
          control = control
        )
      },
      error = function(e) {
        stop("Optimization with optim failed: ", e$message)
      }
    )

    opt$objective <- opt$value

    opt$convergence <- opt$convergence == 0
    opt$message <- if (opt$convergence) "Successful convergence" else "Optimization failed to converge"
  }

  if (!opt$convergence) {
    warning("Model did not converge: ", opt$message)
  }

  # Get parameter estimates for all parameters
  all_params <- exp(opt$par)
  names(all_params) <- sub("log_", "", names(all_params))

  # Filter parameters to include only those relevant for the family
  filtered_coefficients <- all_params[paste0("log_", param_names) %in% names(opt$par)]
  names(filtered_coefficients) <- param_names[param_names %in% sub("log_", "", names(opt$par))]

  # If some parameters are missing (fixed), add them from the fixed list
  for (param in param_names) {
    if (!param %in% names(filtered_coefficients)) {
      if (param %in% names(fixed)) {
        filtered_coefficients[param] <- fixed[[param]]
      } else {
        # Use default values for missing parameters
        if (param == "alpha" || param == "beta" || param == "gamma" || param == "lambda") {
          filtered_coefficients[param] <- 1.0
        } else if (param == "delta") {
          filtered_coefficients[param] <- 0.0
        }
      }
    }
  }

  # Ensure the order matches param_names
  filtered_coefficients <- filtered_coefficients[param_names]

  # Calculate standard errors and Hessian if requested
  filtered_std_errors <- rep(NA, length(param_names))
  names(filtered_std_errors) <- param_names

  cov_matrix <- NULL
  coef_summary <- NULL
  conf_int <- NULL

  if (hessian) {
    sd_report <- tryCatch(
      {
        TMB::sdreport(obj)
      },
      error = function(e) {
        warning("Error calculating standard errors: ", e$message)
        NULL
      }
    )

    if (!is.null(sd_report) && !is.character(sd_report)) {
      # Extract parameter estimates, SEs, and covariance matrix
      cov_matrix <- sd_report$cov.fixed
      std_errors_log <- sqrt(diag(cov_matrix))

      # Get the SEs only for non-fixed parameters
      for (i in seq_along(param_names)) {
        param <- param_names[i]
        log_param <- paste0("log_", param)

        if (log_param %in% names(std_errors_log)) {
          # Use the Delta method to transform SEs to original scale
          filtered_std_errors[i] <- std_errors_log[log_param] * filtered_coefficients[i]
        }
      }

      # Create coefficient summary
      coef_summary <- data.frame(
        Estimate = filtered_coefficients,
        `Std. Error` = filtered_std_errors,
        `z value` = filtered_coefficients / filtered_std_errors,
        `Pr(>|z|)` = 2 * pnorm(abs(filtered_coefficients / filtered_std_errors), lower.tail = FALSE),
        row.names = param_names,
        check.names = FALSE
      )

      # Calculate confidence intervals
      z_value <- stats::qnorm(1 - (1 - conf.level) / 2)
      conf_int_params <- character()
      conf_int_estimates <- numeric()
      conf_int_se <- numeric()
      conf_int_lower <- numeric()
      conf_int_upper <- numeric()

      for (i in seq_along(param_names)) {
        if (!is.na(filtered_std_errors[i])) {
          param <- param_names[i]
          conf_int_params <- c(conf_int_params, param)
          conf_int_estimates <- c(conf_int_estimates, filtered_coefficients[i])
          conf_int_se <- c(conf_int_se, filtered_std_errors[i])
          lower <- max(filtered_coefficients[i] - z_value * filtered_std_errors[i], .Machine$double.eps)
          upper <- filtered_coefficients[i] + z_value * filtered_std_errors[i]
          conf_int_lower <- c(conf_int_lower, lower)
          conf_int_upper <- c(conf_int_upper, upper)
        }
      }

      if (length(conf_int_params) > 0) {
        conf_int <- data.frame(
          parameter = conf_int_params,
          estimate = conf_int_estimates,
          std.error = conf_int_se,
          lower = conf_int_lower,
          upper = conf_int_upper,
          row.names = NULL, check.names = FALSE
        )
      }
    } else {
      warning("Hessian calculation failed, standard errors not available")

      # Create coefficient summary without SEs
      coef_summary <- data.frame(
        Estimate = filtered_coefficients,
        `Std. Error` = filtered_std_errors,
        `z value` = rep(NA, length(param_names)),
        `Pr(>|z|)` = rep(NA, length(param_names)),
        row.names = param_names,
        check.names = FALSE
      )
    }
  } else {
    # If hessian = FALSE, only report parameter estimates
    coef_summary <- data.frame(
      Estimate = filtered_coefficients,
      `Std. Error` = filtered_std_errors,
      `z value` = rep(NA, length(param_names)),
      `Pr(>|z|)` = rep(NA, length(param_names)),
      row.names = param_names,
      check.names = FALSE
    )
  }

  # Calculate log likelihood, AIC, BIC
  loglik <- -opt$objective
  n <- length(data)
  k <- sum(!is.na(opt$par)) # Count non-fixed parameters
  aic <- -2 * loglik + 2 * k
  bic <- -2 * loglik + log(n) * k
  aicc <- aic + (2 * k * (k + 1)) / (n - k - 1)

  # Create comprehensive result object for TMB
  result <- list(
    coefficients = filtered_coefficients, # Only the parameters for this family
    std.errors = filtered_std_errors,
    coef_summary = coef_summary,
    vcov = cov_matrix,
    loglik = loglik,
    AIC = aic,
    BIC = bic,
    AICc = aicc,
    data = data,
    nobs = n,
    df = k,
    convergence = opt$convergence,
    message = opt$message,
    method = "tmb",
    optimizer_method = method,
    conf.int = conf_int,
    conf.level = conf.level,
    optimizer = opt,
    obj = obj,
    fixed = fixed
  )

  return(result)
}



#' Calculate profile likelihoods
#'
#' @param result Fit result from TMB or Newton-Raphson.
#' @param data Numeric vector with values in the (0, 1) interval.
#' @param family Character string specifying the distribution family.
#' @param fixed List of parameters to be held fixed.
#' @param fit Estimation method: "tmb" or "nr".
#' @param method Optimization method (for TMB): "nlminb" or "optim".
#' @param npoints Number of points in profile.
#' @param silent Logical; if TRUE, suppresses messages.
#' @return List of profile likelihoods.
#' @keywords internal
.calculate_profiles <- function(result, data, family, fixed, fit, method, npoints, silent) {
  prof_list <- list()

  # Filter out fixed parameters
  param_info <- .get_family_param_info(family)
  params_to_profile <- setdiff(param_info$names, names(fixed))

  # Configure progress reporting
  total_profiles <- length(params_to_profile)
  if (!silent && total_profiles > 0) {
    message("Computing profile likelihoods for ", total_profiles, " parameters...")
  }

  # Process each parameter
  for (param_idx in seq_along(params_to_profile)) {
    param <- params_to_profile[param_idx]

    if (!silent) {
      message("  Computing profile for ", param, " (", param_idx, "/", total_profiles, ")...")
    }

    # Get current parameter estimate
    est_value <- result$coefficients[param]

    # Determine appropriate range for profiling
    if (!is.null(result$std.errors) && !is.na(result$std.errors[param])) {
      # Use standard error to determine range if available
      se <- result$std.errors[param]
      # Calculate range ensuring positive values and reasonable breadth
      min_value <- max(est_value - 3 * se, .Machine$double.eps * 10)
      max_value <- est_value + 3 * se

      profile_range <- seq(min_value, max_value, length.out = npoints)
    } else {
      # If no standard error, use a proportional range
      min_value <- max(est_value * 0.2, .Machine$double.eps * 10)
      max_value <- est_value * 2.0

      profile_range <- seq(min_value, max_value, length.out = npoints)
    }

    # Calculate log-likelihood at each point in the profile
    prof_ll <- numeric(length(profile_range))

    for (i in seq_along(profile_range)) {
      if (fit == "tmb") {
        # For TMB, modify log parameters
        tmb_par <- result$optimizer$par
        log_param <- paste0("log_", param)
        tmb_par[log_param] <- log(profile_range[i])

        # Evaluate negative log-likelihood
        prof_ll[i] <- -result$obj$fn(tmb_par)
      } else {
        # For Newton-Raphson, directly modify the parameter vector
        mod_params <- result$coefficients
        mod_params[param] <- profile_range[i]

        # Transform to the family-specific parameter vector
        param_names <- .get_family_param_info(family)$names
        family_mod_params <- mod_params[param_names]

        # Use the appropriate log-likelihood function based on family
        ll_func <- switch(family,
          "gkw" = llgkw,
          "bkw" = llbkw,
          "kkw" = llkkw,
          "ekw" = llekw,
          "mc" = llmc,
          "kw" = llkw,
          "beta" = llbeta
        )

        # Use ll_func directly (already negated for consistency)
        prof_ll[i] <- ll_func(family_mod_params, data)
      }
    }

    # Create profile data frame
    prof_list[[param]] <- data.frame(
      parameter = param,
      value = profile_range,
      loglik = prof_ll
    )
  }

  return(prof_list)
}

#' Fit submodels for comparison
#'
#' @param data Numeric vector with values in the (0, 1) interval.
#' @param result Main fit result.
#' @param fit Estimation method: "tmb" or "nr".
#' @param method Optimization method (for TMB): "nlminb" or "optim".
#' @param hessian Logical; if TRUE, computes standard errors and covariance matrix.
#' @param optimizer.control List of control parameters for the optimizer.
#' @param silent Logical; if TRUE, suppresses messages.
#'
#' @importFrom stats pchisq
#'
#' @return List containing submodel fits and LRT results.
#' @keywords internal
.fit_submodels <- function(data, result, fit, method, hessian, optimizer.control, silent) {
  # Only fit submodels if current model is GKw (full model)
  if (result$family != "gkw") {
    if (!silent) {
      message("Submodel fitting is only available when family = 'gkw'. Skipping.")
    }
    return(NULL)
  }

  if (!silent) {
    message("Fitting submodels for comparison...")
  }

  submodel_results <- list()
  submodel_families <- c("bkw", "kkw", "ekw", "mc", "kw", "beta")

  for (submodel in submodel_families) {
    if (!silent) {
      message("  Fitting ", submodel, " submodel...")
    }

    tryCatch(
      {
        # Use current parameter estimates as starting values for submodel
        submodel_start <- as.list(result$coefficients)

        # Subset to include only parameters needed for this family
        submodel_param_names <- .get_family_param_info(submodel)$names
        submodel_start <- submodel_start[submodel_param_names]

        submodel_result <- gkwfit(
          data = data,
          family = submodel,
          start = submodel_start,
          fit = fit,
          method = method,
          use_moments = FALSE,
          hessian = hessian,
          profile = FALSE,
          plot = FALSE,
          silent = TRUE,
          optimizer.control = optimizer.control
        )

        submodel_results[[submodel]] <- submodel_result
      },
      error = function(e) {
        warning("Failed to fit ", submodel, " submodel: ", e$message)
      }
    )
  }

  # Perform likelihood ratio tests
  lrt_results <- list()

  for (submodel in names(submodel_results)) {
    lrt_result <- data.frame(
      Model = paste(submodel, "vs GKw"),
      df = result$df - submodel_results[[submodel]]$df,
      loglik_full = result$loglik,
      loglik_reduced = submodel_results[[submodel]]$loglik,
      LR_statistic = 2 * (result$loglik - submodel_results[[submodel]]$loglik),
      p_value = 1 - pchisq(
        2 * (result$loglik - submodel_results[[submodel]]$loglik),
        result$df - submodel_results[[submodel]]$df
      )
    )
    lrt_results[[submodel]] <- lrt_result
  }

  return(list(submodels = submodel_results, lrt = lrt_results))
}



#' Generate diagnostic plots for distribution models
#'
#' This internal function creates a set of diagnostic plots for evaluating
#' the fit of various distribution families to bounded data in the (0, 1) interval.
#' It generates histograms with fitted density overlays, P-P plots, Q-Q plots,
#' and profile likelihood plots when available.
#'
#' @param result A list containing model fit results from TMB or Newton-Raphson,
#'        must include a 'coefficients' element with named parameters.
#' @param data Numeric vector with values in the (0, 1) interval.
#' @param family Character string specifying the distribution family.
#'        Supported values: "gkw", "bkw", "kkw", "ekw", "mc", "kw", "beta".
#' @param silent Logical; if TRUE, suppresses messages. Default is FALSE.
#'
#' @return A list of ggplot2 objects:
#'   \item{histogram}{Histogram with fitted density overlay}
#'   \item{pp_plot}{Probability-Probability plot}
#'   \item{qq_plot}{Quantile-Quantile plot}
#'   \item{profile_*}{Profile likelihood plots for each parameter (if available)}
#'
#' @importFrom stats ecdf ppoints qchisq density
#'
#' @keywords internal
.generate_plots <- function(result, data, family, silent = FALSE) {
  plots <- list()

  # Check if ggplot2 is available
  if (!requireNamespace("ggplot2", quietly = TRUE)) {
    warning("Package 'ggplot2' is required for plotting but not installed. Plotting will be disabled.")
    return(NULL)
  }

  if (!silent) {
    message("Generating diagnostic plots...")
  }

  # Calculate density values on a grid
  x_grid <- seq(0.001, 0.999, length.out = 200)

  # Use the appropriate density function based on family
  density_func <- switch(family,
    "gkw" = function(x) {
      dgkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["gamma"], result$coefficients["delta"],
        result$coefficients["lambda"]
      )
    },
    "bkw" = function(x) {
      dbkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["gamma"], result$coefficients["delta"]
      )
    },
    "kkw" = function(x) {
      dkkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["delta"], result$coefficients["lambda"]
      )
    },
    "ekw" = function(x) {
      dekw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["lambda"]
      )
    },
    "mc" = function(x) {
      dmc(
        x, result$coefficients["gamma"], result$coefficients["delta"],
        result$coefficients["lambda"]
      )
    },
    "kw" = function(x) dkw(x, result$coefficients["alpha"], result$coefficients["beta"]),
    "beta" = function(x) dbeta_(x, result$coefficients["gamma"], result$coefficients["delta"])
  )

  density_values <- tryCatch(
    {
      sapply(x_grid, density_func)
    },
    error = function(e) {
      warning("Error calculating density values for plot: ", e$message)
      rep(NA, length(x_grid))
    }
  )

  # Create data frame for plotting
  plot_data <- data.frame(x = x_grid, density = density_values)

  # Create histogram with density curve
  p1 <- ggplot2::ggplot() +
    ggplot2::geom_histogram(ggplot2::aes(x = data, y = ggplot2::after_stat(density)),
      bins = min(30, ceiling(sqrt(length(data)))),
      fill = "lightblue", color = "black", alpha = 0.7
    ) +
    ggplot2::geom_line(
      data = plot_data, ggplot2::aes(x = x, y = density),
      color = "red", linewidth = 1
    ) +
    ggplot2::labs(
      title = paste("Fitted", toupper(family), "Distribution"),
      x = "Data", y = "Density"
    ) +
    ggplot2::theme_minimal()

  plots$histogram <- p1

  # P-P plot (Probability-Probability plot)
  ecdf_vals <- ecdf(data)(sort(data))

  # Use the appropriate CDF function based on family
  cdf_func <- switch(family,
    "gkw" = function(x) {
      pgkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["gamma"], result$coefficients["delta"],
        result$coefficients["lambda"]
      )
    },
    "bkw" = function(x) {
      pbkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["gamma"], result$coefficients["delta"]
      )
    },
    "kkw" = function(x) {
      pkkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["delta"], result$coefficients["lambda"]
      )
    },
    "ekw" = function(x) {
      pekw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["lambda"]
      )
    },
    "mc" = function(x) {
      pmc(
        x, result$coefficients["gamma"], result$coefficients["delta"],
        result$coefficients["lambda"]
      )
    },
    "kw" = function(x) pkw(x, result$coefficients["alpha"], result$coefficients["beta"]),
    "beta" = function(x) pbeta_(x, result$coefficients["gamma"], result$coefficients["delta"])
  )

  # Calculate theoretical CDF values
  theor_cdf <- tryCatch(
    {
      sapply(sort(data), cdf_func)
    },
    error = function(e) {
      warning("Error calculating theoretical CDF for P-P plot: ", e$message)
      rep(NA, length(data))
    }
  )

  # Create P-P plot data frame
  pp_data <- data.frame(Empirical = ecdf_vals, Theoretical = theor_cdf)

  # Create P-P plot
  p2 <- ggplot2::ggplot(pp_data, ggplot2::aes(x = Theoretical, y = Empirical)) +
    ggplot2::geom_point() +
    ggplot2::geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
    ggplot2::labs(
      title = "P-P Plot",
      x = "Theoretical Probability", y = "Empirical Probability"
    ) +
    ggplot2::theme_minimal()

  plots$pp_plot <- p2

  # Q-Q plot (Quantile-Quantile plot)
  # Use the appropriate quantile function based on family
  quant_func <- switch(family,
    "gkw" = function(p) {
      qgkw(
        p, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["gamma"], result$coefficients["delta"],
        result$coefficients["lambda"]
      )
    },
    "bkw" = function(p) {
      qbkw(
        p, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["gamma"], result$coefficients["delta"]
      )
    },
    "kkw" = function(p) {
      qkkw(
        p, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["delta"], result$coefficients["lambda"]
      )
    },
    "ekw" = function(p) {
      qekw(
        p, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["lambda"]
      )
    },
    "mc" = function(p) {
      qmc(
        p, result$coefficients["gamma"], result$coefficients["delta"],
        result$coefficients["lambda"]
      )
    },
    "kw" = function(p) qkw(p, result$coefficients["alpha"], result$coefficients["beta"]),
    "beta" = function(p) qbeta_(p, result$coefficients["gamma"], result$coefficients["delta"])
  )

  # Calculate theoretical quantiles
  theor_quant <- tryCatch(
    {
      sapply(ppoints(length(data)), quant_func)
    },
    error = function(e) {
      warning("Error calculating theoretical quantiles for Q-Q plot: ", e$message)
      rep(NA, length(data))
    }
  )

  # Create Q-Q plot data frame
  qq_data <- data.frame(Theoretical = theor_quant, Empirical = sort(data))

  # Create Q-Q plot
  p3 <- ggplot2::ggplot(qq_data, ggplot2::aes(x = Theoretical, y = Empirical)) +
    ggplot2::geom_point() +
    ggplot2::geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
    ggplot2::labs(
      title = "Q-Q Plot",
      x = "Theoretical Quantiles", y = "Empirical Quantiles"
    ) +
    ggplot2::theme_minimal()

  plots$qq_plot <- p3

  # Add profile likelihood plots if available
  if (!is.null(result$profile) && length(result$profile) > 0) {
    for (param in names(result$profile)) {
      prof_data <- result$profile[[param]]

      # Calculate reference line at max - qchisq(0.95, 1)/2 for 95% confidence
      ref_level <- max(prof_data$loglik, na.rm = TRUE) - qchisq(0.95, 1) / 2

      # Create profile likelihood plot
      p <- ggplot2::ggplot(prof_data, ggplot2::aes(x = value, y = loglik)) +
        ggplot2::geom_line(size = 1) +
        ggplot2::geom_vline(
          xintercept = result$coefficients[param],
          linetype = "dashed", color = "red"
        ) +
        ggplot2::geom_hline(
          yintercept = ref_level,
          linetype = "dotted", color = "blue"
        ) +
        ggplot2::labs(
          title = paste("Profile Likelihood for", param),
          x = param, y = "Log-likelihood"
        ) +
        ggplot2::theme_minimal()

      plots[[paste0("profile_", param)]] <- p
    }
  }
  return(plots)
}


#' Calculate goodness-of-fit statistics
#'
#' @param result Fit result from TMB or Newton-Raphson.
#' @param data Numeric vector with values in the (0, 1) interval.
#' @param family Character string specifying the distribution family.
#' @param silent Logical; if TRUE, suppresses messages.
#' @return List of goodness-of-fit statistics.
#' @importFrom stats ks.test var integrate
#' @keywords internal
.calculate_gof <- function(result, data, family, silent) {
  if (!silent) {
    message("Calculating goodness-of-fit statistics...")
  }

  gof <- list()

  # Use the appropriate CDF function based on family for Kolmogorov-Smirnov test
  cdf_func <- switch(family,
    "gkw" = function(x) {
      pgkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["gamma"], result$coefficients["delta"],
        result$coefficients["lambda"]
      )
    },
    "bkw" = function(x) {
      pbkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["gamma"], result$coefficients["delta"]
      )
    },
    "kkw" = function(x) {
      pkkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["delta"], result$coefficients["lambda"]
      )
    },
    "ekw" = function(x) {
      pekw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["lambda"]
      )
    },
    "mc" = function(x) {
      pmc(
        x, result$coefficients["gamma"], result$coefficients["delta"],
        result$coefficients["lambda"]
      )
    },
    "kw" = function(x) pkw(x, result$coefficients["alpha"], result$coefficients["beta"]),
    "beta" = function(x) pbeta_(x, result$coefficients["gamma"], result$coefficients["delta"])
  )

  # Kolmogorov-Smirnov test
  ks_test <- suppressWarnings(
    ks.test(data, cdf_func)
  )
  gof$ks <- ks_test

  # Use the appropriate density function based on family
  density_func <- switch(family,
    "gkw" = function(x) {
      dgkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["gamma"], result$coefficients["delta"],
        result$coefficients["lambda"]
      )
    },
    "bkw" = function(x) {
      dbkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["gamma"], result$coefficients["delta"]
      )
    },
    "kkw" = function(x) {
      dkkw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["delta"], result$coefficients["lambda"]
      )
    },
    "ekw" = function(x) {
      dekw(
        x, result$coefficients["alpha"], result$coefficients["beta"],
        result$coefficients["lambda"]
      )
    },
    "mc" = function(x) {
      dmc(
        x, result$coefficients["gamma"], result$coefficients["delta"],
        result$coefficients["lambda"]
      )
    },
    "kw" = function(x) dkw(x, result$coefficients["alpha"], result$coefficients["beta"]),
    "beta" = function(x) dbeta_(x, result$coefficients["gamma"], result$coefficients["delta"])
  )

  # Calculate diagnostic statistics
  diagnostics <- list(
    mean_obs = mean(data),
    var_obs = var(data),
    mean_fitted = tryCatch(
      {
        integrate(function(x) x * density_func(x), 0, 1)$value
      },
      error = function(e) NA
    ),
    var_fitted = tryCatch(
      {
        m1 <- integrate(function(x) x * density_func(x), 0, 1)$value
        m2 <- integrate(function(x) x^2 * density_func(x), 0, 1)$value
        m2 - m1^2
      },
      error = function(e) NA
    )
  )

  return(list(gof = gof, diagnostics = diagnostics))
}

#' Get default fixed parameters for each GKw family
#'
#' @param family Character string, the family name
#' @return Named list of parameters that are fixed by default for this family
#' @keywords internal
.get_family_fixed_defaults <- function(family) {
  fixed_params <- switch(family,
    "gkw" = list(), # All parameters free
    "bkw" = list(lambda = 1.0),
    "kkw" = list(gamma = 1.0),
    "ekw" = list(gamma = 1.0, delta = 0.0),
    "mc" = list(alpha = 1.0, beta = 1.0),
    "kw" = list(gamma = 1.0, delta = 0.0, lambda = 1.0),
    "beta" = list(alpha = 1.0, beta = 1.0, lambda = 1.0),
    list() # Default case: no fixed parameters
  )

  return(fixed_params)
}


#' Fit submodels for the GKw family for model comparison
#'
#' This internal function fits nested submodels for a given GKw family model,
#' to facilitate likelihood ratio testing and model comparison.
#'
#' @param data Original data used for the main model.
#' @param result Result from the main model fit.
#' @param method Optimization method used.
#' @param hessian Logical; if TRUE, compute standard errors for submodels.
#' @param optimizer.control Control parameters for optimization.
#' @param silent Logical; if TRUE, suppress messages.
#'
#' @return A list containing submodel fits and likelihood ratio test results.
#' @keywords internal
.fit_submodels_tmb <- function(data, result, method, hessian, optimizer.control, silent) {
  # Define the chain of nested models for each family
  model_chain <- list(
    gkw = c("gkw", "bkw", "kkw", "ekw", "kw", "beta"),
    bkw = c("bkw", "ekw", "kw"),
    kkw = c("kkw", "ekw", "kw"),
    ekw = c("ekw", "kw"),
    mc = c("mc", "beta"),
    kw = c("kw"),
    beta = c("beta")
  )

  family <- result$family
  subchain <- model_chain[[family]]

  # Remove the current model from the chain (we already have it)
  subchain <- subchain[subchain != family]

  if (length(subchain) == 0) {
    if (!silent) message("No submodels to fit for family ", family)
    return(NULL)
  }

  # Prepare results containers
  submodels <- list()
  lrt_results <- data.frame(
    model1 = character(),
    model2 = character(),
    statistic = numeric(),
    df = numeric(),
    p.value = numeric(),
    stringsAsFactors = FALSE
  )

  # Fixed parameters for the main model
  original_fixed <- result$fixed

  # Fit each submodel
  for (subfam in subchain) {
    if (!silent) message("Fitting submodel: ", subfam)

    # Get default fixed parameters for this subfamily
    default_fixed <- .get_family_fixed_defaults(subfam)

    # Combine with user's original fixed parameters (already included in result$fixed)
    submodel_fixed <- original_fixed
    for (param in names(default_fixed)) {
      if (!param %in% names(submodel_fixed)) {
        submodel_fixed[[param]] <- default_fixed[[param]]
      }
    }

    # Use existing parameter estimates as starting values when possible
    start_from_parent <- result$coefficients
    start_for_sub <- list()
    for (param in names(start_from_parent)) {
      if (!param %in% names(submodel_fixed)) {
        start_for_sub[[param]] <- start_from_parent[[param]]
      }
    }

    # Fit the submodel
    sub_result <- tryCatch(
      {
        gkwfit(
          data = data,
          family = subfam,
          start = start_for_sub,
          fixed = submodel_fixed,
          method = method,
          hessian = hessian,
          profile = FALSE,
          plot = FALSE,
          optimizer.control = optimizer.control,
          silent = silent
        )
      },
      error = function(e) {
        warning("Failed to fit submodel ", subfam, ": ", e$message)
        NULL
      }
    )

    if (!is.null(sub_result)) {
      submodels[[subfam]] <- sub_result

      # Calculate LRT between parent and this submodel
      lr_stat <- 2 * (result$loglik - sub_result$loglik)
      df_diff <- result$df - sub_result$df
      p_val <- stats::pchisq(lr_stat, df = df_diff, lower.tail = FALSE)

      lrt_results <- rbind(lrt_results, data.frame(
        model1 = result$family,
        model2 = subfam,
        statistic = lr_stat,
        df = df_diff,
        p.value = p_val,
        stringsAsFactors = FALSE
      ))
    }
  }

  # Combine results
  out <- list(
    submodels = submodels,
    lrt = lrt_results
  )

  return(out)
}



#' @title Fit Generalized Kumaraswamy Distribution via Maximum Likelihood Estimation using TMB
#'
#' @description
#' Fits any distribution from the Generalized Kumaraswamy (GKw) family to data using maximum
#' likelihood estimation through Template Model Builder (TMB). The function supports several
#' optimization methods including R's nlminb and various optim algorithms.
#'
#' @param data A numeric vector with values strictly between 0 and 1. Values at the boundaries
#'   (0 or 1) may cause issues; consider slight adjustments if necessary (see Details).
#' @param family A character string specifying the distribution family. One of: \code{"gkw"} (default),
#'   \code{"bkw"}, \code{"kkw"}, \code{"ekw"}, \code{"mc"}, \code{"kw"}, or \code{"beta"}. See Details for parameter specifications.
#' @param start Optional list with initial parameter values (using natural parameter names like `alpha`, `beta`, etc.).
#'   If \code{NULL}, reasonable starting values will be determined, potentially using the method of moments if \code{use_moments = TRUE}.
#' @param fixed Optional list of parameters to be held fixed at specific values during estimation (e.g., \code{list(lambda = 1)}).
#' @param method Optimization method to use. One of: \code{"nlminb"} (default), \code{"Nelder-Mead"}, \code{"BFGS"},
#'   \code{"CG"}, \code{"L-BFGS-B"} or \code{"SANN"}. If \code{"nlminb"} is selected, R's \code{\link[stats]{nlminb}} function is used;
#'   otherwise, R's \code{\link[stats]{optim}} function is used with the specified method.
#' @param use_moments Logical; if \code{TRUE} and \code{start = NULL}, attempts to use method of moments estimates
#'   (via `gkwgetstartvalues`) as initial values. Default: \code{FALSE}.
#' @param hessian Logical; if \code{TRUE}, attempts to compute the Hessian matrix at the MLE to estimate
#'   standard errors and the variance-covariance matrix using TMB's `sdreport`. Default: \code{TRUE}.
#' @param profile Logical; if \code{TRUE}, computes likelihood profiles for parameters using TMB's profiling capabilities. Default: \code{FALSE}.
#' @param npoints Integer; number of points to use in profile likelihood calculations (minimum 5). Only relevant if \code{profile = TRUE}. Default: 20.
#' @param plot Logical; if \code{TRUE}, generates diagnostic plots (histogram with fitted density, QQ-plot) using `ggplot2` and `patchwork`. Default: \code{TRUE}.
#' @param conf.level Numeric, the confidence level for confidence intervals calculated from standard errors (requires \code{hessian = TRUE}). Default: 0.95.
#' @param optimizer.control List of control parameters passed to the chosen optimizer.
#'   The valid parameters depend on the `method` chosen. See Details.
#' @param submodels Logical; if \code{TRUE}, fits relevant nested submodels for comparison via likelihood ratio tests. Default: \code{FALSE}.
#' @param silent Logical; if \code{TRUE}, suppresses messages during fitting. Default: \code{FALSE}.
#' @param ... Additional arguments (currently unused).
#'
#' @return An object of class \code{"gkwfit"} (inheriting from \code{"list"}) containing the fitted model results. Key components include:
#' \item{coefficients}{Named vector of estimated parameters (on their natural scale).}
#' \item{std.errors}{Named vector of estimated standard errors (if \code{hessian = TRUE}).}
#' \item{coef_summary}{Data frame summarizing estimates, SEs, z-values, and p-values.}
#' \item{vcov}{Variance-covariance matrix of the estimates (if \code{hessian = TRUE}).}
#' \item{loglik}{Log-likelihood value at the maximum.}
#' \item{AIC}{Akaike Information Criterion.}
#' \item{BIC}{Bayesian Information Criterion.}
#' \item{AICc}{Corrected Akaike Information Criterion.}
#' \item{data}{The input data vector used for fitting.}
#' \item{nobs}{Number of observations used.}
#' \item{df}{Number of estimated parameters.}
#' \item{convergence}{Logical indicating successful convergence.}
#' \item{message}{Convergence message from the optimizer.}
#' \item{family}{The specified distribution family.}
#' \item{method}{The specific optimization method used.}
#' \item{conf.int}{Data frame with confidence intervals (if \code{hessian = TRUE}).}
#' \item{conf.level}{The confidence level used.}
#' \item{optimizer}{The raw output object from the optimizer function.}
#' \item{obj}{The TMB object used for fitting (if available).}
#' \item{fixed}{The list of fixed parameters used.}
#' \item{profile}{A list containing likelihood profile results (if \code{profile = TRUE}).}
#' \item{submodels}{A list of fitted submodels (if \code{submodels = TRUE}).}
#' \item{lrt}{A list of likelihood ratio test results comparing nested models (if \code{submodels = TRUE}).}
#' \item{gof}{Goodness-of-fit statistics (e.g., AD, CvM, KS).}
#' \item{diagnostics}{Diagnostic information related to GOF tests.}
#' \item{plots}{A list or `patchwork` object containing ggplot objects for diagnostics (if \code{plot = TRUE}).}
#' \item{call}{The matched function call.}
#'
#' @details
#' The \code{gkwfit} function provides a unified interface for fitting the seven distributions in the Generalized Kumaraswamy family:
#' \itemize{
#'   \item \strong{GKw}: 5 parameters (\eqn{\alpha, \beta, \gamma, \delta, \lambda}) - All positive.
#'   \item \strong{BKw}: 4 parameters (\eqn{\alpha, \beta, \gamma, \delta}), \eqn{\lambda = 1} fixed - All positive.
#'   \item \strong{KKw}: 4 parameters (\eqn{\alpha, \beta, \delta, \lambda}), \eqn{\gamma = 1} fixed - All positive.
#'   \item \strong{EKw}: 3 parameters (\eqn{\alpha, \beta, \lambda}), \eqn{\gamma = 1, \delta = 0} fixed - All positive.
#'   \item \strong{Mc} (McDonald / Beta Power): 3 parameters (\eqn{\gamma, \delta, \lambda}), \eqn{\alpha = 1, \beta = 1} fixed - All positive.
#'   \item \strong{Kw} (Kumaraswamy): 2 parameters (\eqn{\alpha, \beta}), \eqn{\gamma = 1, \delta = 0, \lambda = 1} fixed - All positive.
#'   \item \strong{Beta}: 2 parameters (\eqn{\gamma, \delta}), \eqn{\alpha = 1, \beta = 1, \lambda = 1} fixed - All positive. (\eqn{\gamma, \delta} correspond to standard Beta shape1, shape2).
#' }
#'
#' This function uses Template Model Builder (TMB) for parameter estimation, which provides accurate and efficient automatic differentiation.
#'
#' **Optimizer Method (`method` argument):**
#' \itemize{
#'   \item \code{"nlminb"}: Uses R's built-in `stats::nlminb` optimizer. Good for problems with box constraints. Default option.
#'   \item \code{"Nelder-Mead"}: Uses R's `stats::optim` with the Nelder-Mead simplex algorithm, which doesn't require derivatives.
#'   \item \code{"BFGS"}: Uses R's `stats::optim` with the BFGS quasi-Newton method for unconstrained optimization.
#'   \item \code{"CG"}: Uses R's `stats::optim` with conjugate gradients method for unconstrained optimization.
#'   \item \code{"L-BFGS-B"}: Uses R's `stats::optim` with the limited-memory BFGS method with box constraints.
#'   \item \code{"SANN"}: Uses R's stats::optim with simulated annealing, a global optimization method useful for problems with multiple local minima.
#' }
#'
#' **Optimizer Control (`optimizer.control`):**
#' Pass a list with parameters specific to the chosen optimizer:
#' \itemize{
#'   \item For \code{method = "nlminb"}: Controls are passed to `stats::nlminb`. See `?nlminb` for options like `eval.max`, `iter.max`, `trace`, `rel.tol`, etc.
#'   \item For other methods: Controls are passed to `stats::optim`. See `?optim` for options like `maxit`, `trace`, `factr`, `pgtol`, etc.
#' }
#' If `optimizer.control` is empty, reasonable defaults are used for each method.
#'
#' **Data Preprocessing:** The function includes basic validation to ensure data is numeric and within (0, 1). It attempts to adjust values exactly at 0 or 1 by a small epsilon (`.Machine$double.eps^0.5`) with a warning, but stops if more than 10% of data needs adjustment. It's generally recommended to handle boundary data appropriately *before* calling `gkwfit`.
#'
#' @examples
#' \dontrun{
#' # Load required packages
#' library(ggplot2)
#' library(patchwork)
#' library(betareg)
#'
#' # ============================================================================
#' # EXAMPLE 1: Basic Usage with Simulated Data for Different Distributions
#' # ============================================================================
#' # Set seed for reproducibility
#' set.seed(2203)
#' n <- 1000
#'
#' # Generate random samples from various distributions in the GKw family
#' y_gkw <- rgkw(n, alpha = 2, beta = 3, gamma = 1.5, delta = 0.5, lambda = 1.2)
#' y_bkw <- rbkw(n, alpha = 2, beta = 3, gamma = 1.5, delta = 0.5) # BKw has lambda fixed at 1
#' y_kw <- rkw(n, alpha = 2, beta = 3) # Standard Kumaraswamy (gamma=1, delta=0, lambda=1)
#' y_beta <- rbeta_(n, gamma = 2, delta = 3) # Beta with shape1=2, shape2=3
#'
#' # Calculate densities for the first 5 observations
#' head(dgkw(y_gkw[1:5], alpha = 2, beta = 3, gamma = 1.5, delta = 0.5, lambda = 1.2))
#'
#' # Compare with beta density (note different parameterization)
#' head(dbeta_(y_beta[1:5], gamma = 2, delta = 3))
#'
#' # Compute log-likelihood using parameter vector format
#' # Parameter order: alpha, beta, gamma, delta, lambda
#' par_gkw <- c(2, 3, 1.5, 0.5, 1.2)
#' ll_gkw <- llgkw(par_gkw, y_gkw)
#'
#' par_kw <- c(2, 3) # Kumaraswamy parameters
#' ll_kw <- llkw(par_kw, y_kw)
#'
#' cat("Log-likelihood GKw:", ll_gkw, "\nLog-likelihood Kw:", ll_kw, "\n")
#'
#' # ============================================================================
#' # EXAMPLE 2: Visualization and Distribution Comparisons
#' # ============================================================================
#' # Generate data for plotting
#' x_vals <- seq(0.001, 0.999, length.out = 500)
#'
#' # Calculate densities for different distributions
#' dens_gkw <- dgkw(x_vals, alpha = 2, beta = 3, gamma = 1.5, delta = 0.5, lambda = 1.2)
#' dens_bkw <- dbkw(x_vals, alpha = 2, beta = 3, gamma = 1.5, delta = 0.5)
#' dens_kw <- dkw(x_vals, alpha = 2, beta = 3)
#' dens_beta <- dbeta_(x_vals, gamma = 2, delta = 3)
#'
#' # Create and display plot
#' df <- data.frame(
#'   x = rep(x_vals, 4),
#'   density = c(dens_gkw, dens_bkw, dens_kw, dens_beta),
#'   Distribution = rep(c("GKw", "BKw", "Kw", "Beta"), each = length(x_vals))
#' )
#'
#' p <- ggplot(df, aes(x = x, y = density, color = Distribution)) +
#'   geom_line(linewidth = 1) +
#'   theme_minimal() +
#'   labs(
#'     title = "Density Comparison of GKw Distribution Family",
#'     x = "x", y = "Density"
#'   )
#'
#' print(p)
#'
#' # Examine quantile functions
#' # Calculate 0.25, 0.5, and 0.75 quantiles for each distribution
#' probs <- c(0.25, 0.5, 0.75)
#' qgkw_values <- qgkw(probs, alpha = 2, beta = 3, gamma = 1.5, delta = 0.5, lambda = 1.2)
#' qkw_values <- qkw(probs, alpha = 2, beta = 3)
#' qbeta_values <- qbeta_(probs, gamma = 2, delta = 3)
#'
#' # Display the quantiles
#' quantile_df <- data.frame(
#'   Probability = probs,
#'   GKw = qgkw_values,
#'   Kw = qkw_values,
#'   Beta = qbeta_values
#' )
#' print(quantile_df)
#'
#' # ============================================================================
#' # EXAMPLE 3: Model Fitting with Simulated Data
#' # ============================================================================
#' # Simulate data from GKw
#' set.seed(2203)
#' true_params <- list(alpha = 2.5, beta = 1.8, gamma = 1.3, delta = 0.4, lambda = 1.5)
#' y <- rgkw(n,
#'   alpha = true_params$alpha,
#'   beta = true_params$beta,
#'   gamma = true_params$gamma,
#'   delta = true_params$delta,
#'   lambda = true_params$lambda
#' )
#'
#' # Fit full GKw model
#' fit_gkw <- gkwfit(data = y, family = "gkw")
#' summary(fit_gkw)
#'
#' # Fit restricted models for comparison
#' fit_bkw <- gkwfit(data = y, family = "bkw")
#' fit_kkw <- gkwfit(data = y, family = "kkw")
#' fit_kw <- gkwfit(data = y, family = "kw")
#'
#' # Compare models using AIC
#' models <- c("GKw", "BKw", "KKw", "Kw")
#' AIC_values <- c(fit_gkw$AIC, fit_bkw$AIC, fit_kkw$AIC, fit_kw$AIC)
#' model_comparison <- data.frame(Model = models, AIC = AIC_values)
#' model_comparison <- model_comparison[order(model_comparison$AIC), ]
#' print(model_comparison)
#'
#' # ============================================================================
#' # EXAMPLE 4: Fixed Parameter Estimation and Likelihood Ratio Tests
#' # ============================================================================
#' # Simulate data where delta = 0
#' set.seed(2203)
#' true_params <- list(alpha = 1.8, beta = 2.2, gamma = 0.9, delta = 0, lambda = 1.2)
#' y <- rgkw(n,
#'   alpha = true_params$alpha,
#'   beta = true_params$beta,
#'   gamma = true_params$gamma,
#'   delta = true_params$delta,
#'   lambda = true_params$lambda
#' )
#'
#' # Fit model with delta fixed at 0
#' fit_fixed <- gkwfit(data = y, family = "gkw", method = "L-BFGS-B", fixed = list(delta = 0.5))
#' summary(fit_fixed)
#'
#' # Fit model without fixing delta (should estimate close to 0)
#' fit_free <- gkwfit(data = y, family = "gkw")
#' summary(fit_free)
#'
#' # Compare models via likelihood ratio test
#' # H0: delta = 0 vs H1: delta != 0
#' LR_stat <- -2 * (fit_fixed$loglik - fit_free$loglik)
#' p_value <- 1 - pchisq(LR_stat, df = 1)
#' cat("LR test statistic:", LR_stat, "\np-value:", p_value, "\n")
#'
#' # ============================================================================
#' # EXAMPLE 5: Profile Likelihood Analysis
#' # ============================================================================
#' set.seed(2203)
#' y <- rgkw(n, alpha = 2, beta = 3, gamma = 1.5, delta = 0, lambda = 1.2)
#'
#' # Fit with profile likelihood
#' fit_profile <- gkwfit(
#'   data = y,
#'   family = "gkw",
#'   profile = TRUE,
#'   npoints = 15
#' )
#'
#' # Examine profile objects
#' str(fit_profile$profile)
#' fit_profile$plots
#'
#' # ============================================================================
#' # EXAMPLE 6: Real Data Application with Beta Regression Datasets
#' # ============================================================================
#' # Example 1: Reading Skills data
#' data("ReadingSkills", package = "betareg")
#' y <- ReadingSkills$accuracy
#'
#' # Summary statistics of the response variable
#' summary(y)
#'
#' # Fit different distributions to this data
#' fit_rs_beta <- gkwfit(data = y, family = "beta")
#' fit_rs_kw <- gkwfit(data = y, family = "kw")
#' fit_rs_gkw <- gkwfit(data = y, family = "gkw")
#'
#' # Model comparison
#' rs_models <- c("Beta", "Kumaraswamy", "GKw")
#' rs_AICs <- c(fit_rs_beta$AIC, fit_rs_kw$AIC, fit_rs_gkw$AIC)
#' rs_BICs <- c(fit_rs_beta$BIC, fit_rs_kw$BIC, fit_rs_gkw$BIC)
#' rs_comparison <- data.frame(
#'   Model = rs_models,
#'   AIC = rs_AICs,
#'   BIC = rs_BICs,
#'   Parameters = c(
#'     length(coef(fit_rs_beta)),
#'     length(coef(fit_rs_kw)),
#'     length(coef(fit_rs_gkw))
#'   )
#' )
#' print(rs_comparison[order(rs_comparison$AIC), ])
#'
#' # Example 2: Gasoline Yield data
#' data("GasolineYield", package = "betareg")
#' y <- GasolineYield$yield
#'
#' # Check range and make adjustments if needed (data must be in (0,1))
#' if (min(y) <= 0 || max(y) >= 1) {
#'   # Apply common transformation for proportions that include 0 or 1
#'   n_obs <- length(y)
#'   y <- (y * (n_obs - 1) + 0.5) / n_obs
#' }
#'
#' # Fit best model based on previous comparison
#' best_family <- rs_comparison$Model[1]
#' fit_gas <- gkwfit(data = y, family = tolower(best_family))
#' summary(fit_gas)
#'
#' # Plot fitted density over histogram of data
#' # Get parameters from fitted model
#' params <- coef(fit_gas)
#'
#' # Generate x values and calculate density
#' x_seq <- seq(min(y), max(y), length.out = 100)
#' fitted_density <- switch(tolower(best_family),
#'   "beta" = dbeta_(x_seq, gamma = params["gamma"], delta = params["delta"]),
#'   "kumaraswamy" = dkw(x_seq, alpha = params["alpha"], beta = params["beta"]),
#'   "gkw" = dgkw(x_seq,
#'     alpha = params["alpha"], beta = params["beta"],
#'     gamma = params["gamma"], delta = params["delta"],
#'     lambda = params["lambda"]
#'   )
#' )
#'
#' # Create data frame for plotting
#' df_plot <- data.frame(x = x_seq, density = fitted_density)
#'
#' # Create plot
#' p <- ggplot() +
#'   geom_histogram(
#'     data = data.frame(y = y), aes(x = y, y = after_stat(density)),
#'     bins = 30, fill = "lightblue", color = "black", alpha = 0.7
#'   ) +
#'   geom_line(data = df_plot, aes(x = x, y = density), color = "red", size = 1) +
#'   labs(
#'     title = paste("Fitted", best_family, "Distribution to Gasoline Yield Data"),
#'     x = "Yield",
#'     y = "Density"
#'   ) +
#'   theme_minimal()
#'
#' print(p)
#'
#' # ============================================================================
#' # EXAMPLE 7: Beta Distribution Variants and Special Functions
#' # ============================================================================
#' # Generate samples from beta distribution
#' set.seed(2203)
#' y_beta <- rbeta_(n, gamma = 2, delta = 3)
#'
#' # Calculate density and log-likelihood
#' beta_dens <- dbeta_(y_beta[1:5], gamma = 2, delta = 3)
#'
#' # Using parameter vector format for log-likelihood (gamma, delta)
#' par_beta <- c(2, 3)
#' beta_ll <- llbeta(par_beta, y_beta)
#' cat("Beta density (first 5):", beta_dens, "\n")
#' cat("Beta log-likelihood:", beta_ll, "\n")
#'
#' # Gradient of log-likelihood with respect to parameters
#' beta_grad <- grbeta(par_beta, y_beta)
#' cat("Gradient of Beta log-likelihood:\n")
#' print(beta_grad)
#'
#' # Hessian of log-likelihood with respect to parameters
#' beta_hess <- hsbeta(par_beta, y_beta)
#' cat("Hessian of Beta log-likelihood:\n")
#' print(beta_hess)
#'
#' # ============================================================================
#' # EXAMPLE 8: Gradient and Hessian Functions for GKw Distribution
#' # ============================================================================
#' # Set seed and generate data
#' set.seed(2203)
#'
#' # Define parameters
#' alpha <- 2
#' beta <- 1.5
#' gamma <- 1.2
#' delta <- 0.3
#' lambda <- 1.1
#' par_gkw <- c(alpha, beta, gamma, delta, lambda)
#'
#' # Generate random sample
#' y <- rgkw(n, alpha, beta, gamma, delta, lambda)
#'
#' # Calculate log-likelihood of the sample using parameter vector format
#' ll <- llgkw(par_gkw, y)
#' cat("GKw log-likelihood:", ll, "\n")
#'
#' # Calculate gradient of log-likelihood
#' gr <- grgkw(par_gkw, y)
#' cat("GKw log-likelihood gradient:\n")
#' print(gr)
#'
#' # Calculate Hessian matrix of log-likelihood
#' hs <- hsgkw(par_gkw, y)
#' cat("GKw log-likelihood Hessian:\n")
#' print(hs)
#'
#' # ============================================================================
#' # EXAMPLE 9: Optimization with Custom Gradient and Hessian
#' # ============================================================================
#' # Manual optimization demonstration
#' set.seed(2203)
#'
#' # Generate data from a known distribution
#' true_par <- c(alpha = 1.8, beta = 2.5, gamma = 1.3, delta = 0.2, lambda = 1.1)
#' y <- rgkw(n,
#'   alpha = true_par["alpha"],
#'   beta = true_par["beta"],
#'   gamma = true_par["gamma"],
#'   delta = true_par["delta"],
#'   lambda = true_par["lambda"]
#' )
#'
#' # Define the negative log-likelihood function (for minimization)
#' nll <- function(log_par) {
#'   # Transform from log-scale to natural scale (ensures positivity)
#'   par <- exp(log_par)
#'
#'   # Return negative log-likelihood
#'   -llgkw(par, y)
#' }
#'
#' # Define the gradient function using analytical gradient
#' gr_func <- function(log_par) {
#'   # Transform parameters
#'   par <- exp(log_par)
#'
#'   # Get the gradient with respect to the original parameters
#'   gradient <- grgkw(par, y)
#'
#'   # Apply chain rule for the log transformation
#'   gradient <- gradient * par
#'
#'   # Return negative gradient for minimization
#'   gradient
#' }
#'
#' # Starting values (on log scale to ensure positivity)
#' start_log_par <- log(c(1, 1, 1, 0.1, 1))
#'
#' # Optimize using L-BFGS-B method with analytic gradient
#' opt_result <- optim(
#'   par = start_log_par,
#'   fn = nll,
#'   gr = gr_func,
#'   method = "BFGS",
#'   control = list(trace = 1, maxit = 100)
#' )
#'
#' # Transform parameters back to original scale
#' estimated_par <- exp(opt_result$par)
#' names(estimated_par) <- c("alpha", "beta", "gamma", "delta", "lambda")
#'
#' # Compare with true parameters
#' params_comparison <- data.frame(
#'   True = true_par,
#'   Estimated = estimated_par,
#'   Absolute_Error = abs(true_par - estimated_par),
#'   Relative_Error = abs((true_par - estimated_par) / true_par)
#' )
#' print(params_comparison)
#'
#' # ============================================================================
#' # EXAMPLE 10: Third Betareg Dataset (ImpreciseTask) and McDonald Distribution
#' # ============================================================================
#' data("ImpreciseTask", package = "betareg")
#' y <- ImpreciseTask$location
#'
#' # Make sure data is within (0, 1)
#' if (min(y) <= 0 || max(y) >= 1) {
#'   # Apply common transformation for proportions
#'   n_obs <- length(y)
#'   y <- (y * (n_obs - 1) + 0.5) / n_obs
#' }
#'
#' # Fit models from the GKw family
#' fit_beta <- gkwfit(data = y, family = "beta")
#' fit_kw <- gkwfit(data = y, family = "kw")
#' fit_mc <- gkwfit(data = y, family = "mc") # McDonald distribution
#'
#' # Compare information criteria
#' ic_comparison <- data.frame(
#'   Model = c("Beta", "Kumaraswamy", "McDonald"),
#'   AIC = c(fit_beta$AIC, fit_kw$AIC, fit_mc$AIC),
#'   BIC = c(fit_beta$BIC, fit_kw$BIC, fit_mc$BIC),
#'   LogLik = c(fit_beta$loglik, fit_kw$loglik, fit_mc$loglik)
#' )
#' print(ic_comparison[order(ic_comparison$AIC), ])
#'
#' # Get best model
#' best_model <- ic_comparison$Model[which.min(ic_comparison$AIC)]
#' best_fit <- switch(tolower(best_model),
#'   "beta" = fit_beta,
#'   "kumaraswamy" = fit_kw,
#'   "mcdonald" = fit_mc
#' )
#'
#' # Goodness of fit tests
#' print(paste("Best model:", best_model))
#' print(best_fit$gof)
#'
#' # Generate values from fitted McDonald distribution (if it's the best model)
#' if (best_model == "McDonald") {
#'   # McDonald's parameters: gamma, delta, lambda (alpha=1, beta=1 fixed)
#'   gamma_mc <- coef(fit_mc)["gamma"]
#'   delta_mc <- coef(fit_mc)["delta"]
#'   lambda_mc <- coef(fit_mc)["lambda"]
#'
#'   # Generate new sample using fitted parameters
#'   set.seed(2203)
#'   y_mc_simulated <- rmc(n, gamma = gamma_mc, delta = delta_mc, lambda = lambda_mc)
#'
#'   # Compare histograms of original and simulated data
#'   df_orig <- data.frame(value = y, type = "Original")
#'   df_sim <- data.frame(value = y_mc_simulated, type = "Simulated")
#'   df_combined <- rbind(df_orig, df_sim)
#'
#'   # Create comparative histogram
#'   p <- ggplot(df_combined, aes(x = value, fill = type)) +
#'     geom_histogram(position = "identity", alpha = 0.5, bins = 30) +
#'     labs(
#'       title = "Comparison of Original Data and Simulated McDonald Distribution",
#'       x = "Value",
#'       y = "Count"
#'     ) +
#'     theme_minimal()
#'
#'   print(p)
#' }
#'
#' # ============================================================================
#' # EXAMPLE 11: Working with Alternative Distributions in the GKw Family
#' # ============================================================================
#' # Explore EKw - Exponential Kumaraswamy distribution (alpha, beta, lambda)
#' set.seed(2203)
#'
#' # Generate EKw sample
#' y_ekw <- rekw(n, alpha = 2.5, beta = 1.5, lambda = 2.0)
#'
#' # Calculate density and distribution function
#' ekw_density <- dekw(y_ekw[1:5], alpha = 2.5, beta = 1.5, lambda = 2.0)
#' ekw_cdf <- pekw(y_ekw[1:5], alpha = 2.5, beta = 1.5, lambda = 2.0)
#'
#' # Calculate log-likelihood
#' par_ekw <- c(2.5, 1.5, 2.0) # alpha, beta, lambda for EKw
#' ll_ekw <- llekw(par_ekw, y_ekw)
#'
#' cat("EKw density (first 5):", ekw_density, "\n")
#' cat("EKw CDF (first 5):", ekw_cdf, "\n")
#' cat("EKw log-likelihood:", ll_ekw, "\n")
#'
#' # Calculate gradient and Hessian
#' gr_ekw <- grekw(par_ekw, y_ekw)
#' hs_ekw <- hsekw(par_ekw, y_ekw)
#'
#' cat("EKw gradient:\n")
#' print(gr_ekw)
#'
#' cat("EKw Hessian (first 2x2):\n")
#' print(hs_ekw)
#'
#' # Fit EKw model to data
#' fit_ekw <- gkwfit(data = y_ekw, family = "ekw")
#' summary(fit_ekw)
#'
#' # Compare with true parameters
#' cat("True parameters: alpha=2.5, beta=1.5, lambda=2.0\n")
#' cat("Estimated parameters:\n")
#' print(coef(fit_ekw))
#'
#' # ============================================================================
#' # EXAMPLE 12: Comprehensive Parameter Recovery Simulation
#' # ============================================================================
#' # Function to simulate data and recover parameters
#' simulate_and_recover <- function(family, true_params, n = 1000) {
#'   set.seed(2203)
#'
#'   # Generate data based on family
#'   y <- switch(family,
#'     "gkw" = rgkw(n,
#'       alpha = true_params[1], beta = true_params[2],
#'       gamma = true_params[3], delta = true_params[4],
#'       lambda = true_params[5]
#'     ),
#'     "bkw" = rbkw(n,
#'       alpha = true_params[1], beta = true_params[2],
#'       gamma = true_params[3], delta = true_params[4]
#'     ),
#'     "kw" = rkw(n, alpha = true_params[1], beta = true_params[2]),
#'     "beta" = rbeta_(n, gamma = true_params[1], delta = true_params[2])
#'   )
#'
#'   # Fit model
#'   fit <- gkwfit(data = y, family = family, silent = TRUE)
#'
#'   # Return comparison
#'   list(
#'     family = family,
#'     true = true_params,
#'     estimated = coef(fit),
#'     loglik = fit$loglik,
#'     AIC = fit$AIC,
#'     converged = fit$convergence
#'   )
#' }
#'
#' # Define true parameters for each family
#' params_gkw <- c(alpha = 2.0, beta = 3.0, gamma = 1.5, delta = 0.5, lambda = 1.2)
#' params_bkw <- c(alpha = 2.5, beta = 1.8, gamma = 1.2, delta = 0.3)
#' params_kw <- c(alpha = 1.5, beta = 2.0)
#' params_beta <- c(gamma = 2.0, delta = 3.0)
#'
#' # Run simulations
#' result_gkw <- simulate_and_recover("gkw", params_gkw)
#' result_bkw <- simulate_and_recover("bkw", params_bkw)
#' result_kw <- simulate_and_recover("kw", params_kw)
#' result_beta <- simulate_and_recover("beta", params_beta)
#'
#' # Create summary table
#' create_comparison_df <- function(result) {
#'   param_names <- names(result$true)
#'   df <- data.frame(
#'     Parameter = param_names,
#'     True = result$true,
#'     Estimated = result$estimated[param_names],
#'     Abs_Error = abs(result$true - result$estimated[param_names]),
#'     Rel_Error = abs((result$true - result$estimated[param_names]) / result$true) * 100
#'   )
#'   return(df)
#' }
#'
#' # Print results
#' cat("===== GKw Parameter Recovery =====\n")
#' print(create_comparison_df(result_gkw))
#' cat("\n===== BKw Parameter Recovery =====\n")
#' print(create_comparison_df(result_bkw))
#' cat("\n===== Kw Parameter Recovery =====\n")
#' print(create_comparison_df(result_kw))
#' cat("\n===== Beta Parameter Recovery =====\n")
#' print(create_comparison_df(result_beta))
#' }
#'
#' @references
#' Kumaraswamy, P. (1980). A generalized probability density function for double-bounded
#' random processes. *Journal of Hydrology*, 46(1-2), 79-88. \doi{10.1016/0022-1694(80)90036-0}
#'
#' Cordeiro, G. M., & de Castro, M. (2011). A new family of generalized distributions.
#' *Journal of Statistical Computation and Simulation*, 81(7), 883-898. \doi{10.1080/00949650903530745}
#'
#' Kristensen, K., Nielsen, A., Berg, C. W., Skaug, H., & Bell, B. M. (2016). TMB: Automatic Differentiation and Laplace Approximation. *Journal of Statistical Software*, 70(5), 1–21. \doi{10.18637/jss.v070.i05}
#'
#' @seealso User-facing S3 methods: \code{\link{summary.gkwfit}}, \code{\link{print.gkwfit}}, \code{\link{plot.gkwfit}}, \code{\link{coef.gkwfit}}, \code{\link{vcov.gkwfit}}, \code{\link{logLik.gkwfit}}, \code{\link{confint.gkwfit}}. Density/distribution functions: \code{\link{dgkw}}, \code{\link{pgkw}}, \code{\link{qgkw}}, \code{\link{rgkw}}.
#' @keywords distribution models mle optimization
#' @author Lopes, J. E.
#' @export
gkwfit <- function(data,
                   family = "gkw",
                   start = NULL,
                   fixed = NULL,
                   method = "nlminb",
                   use_moments = FALSE,
                   hessian = TRUE,
                   profile = FALSE,
                   npoints = 20,
                   plot = TRUE,
                   conf.level = 0.95,
                   optimizer.control = list(),
                   submodels = FALSE,
                   silent = TRUE,
                   ...) {
  # --- Argument Matching and Validation ---
  call <- match.call()
  family <- match.arg(family, choices = c("gkw", "bkw", "kkw", "ekw", "mc", "kw", "beta"))

  # Validate optimization method
  method <- match.arg(method, choices = c("nlminb", "Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "SANN"))

  # Determine if we're using nlminb or optim (with the specified method)
  use_nlminb <- method == "nlminb"

  # Package checks
  if (!requireNamespace("TMB", quietly = TRUE)) {
    stop("Package 'TMB' is required but not installed. Please install it with install.packages('TMB')")
  }

  if (profile && !requireNamespace("TMB", quietly = TRUE)) {
    warning("Package 'TMB' needed for profile likelihoods. Setting profile=FALSE.")
    profile <- FALSE
  }

  if (plot && (!requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("patchwork", quietly = TRUE))) {
    warning("Packages 'ggplot2' and 'patchwork' needed for plot=TRUE. Setting plot=FALSE.")
    plot <- FALSE
  }

  # Parameter validation
  if (!is.numeric(conf.level) || conf.level <= 0 || conf.level >= 1) {
    stop("conf.level must be a number between 0 and 1")
  }
  if (!is.numeric(npoints) || npoints < 5) {
    stop("npoints must be a positive integer >= 5")
  }
  npoints <- as.integer(npoints)

  # --- Data and Initial Parameter Setup ---
  param_info <- .get_family_param_info(family)
  n_params <- param_info$n
  param_names <- param_info$names

  data <- .validate_data(data, n_params) # Validate data early

  result <- list(call = call, family = family) # Initialize result list

  # Determine initial values and merge fixed parameters
  start_info <- .determine_start_values(data, start, fixed, family, use_moments, silent)
  start <- start_info$start # This is now a validated list for the specific family
  fixed <- start_info$fixed # This now includes user + family default fixed

  # --- Core Fitting Call ---
  if (!silent) message("Fitting model using TMB with method: ", method)

  # Check and compile TMB code
  tryCatch(
    {
      .check_and_compile_TMB_code("gkwmletmb", verbose = !silent)
    },
    error = function(e) {
      stop("Failed to compile TMB code: ", e$message)
    }
  )

  if (!silent) {
    message("Checking TMB model compilation...")
  }

  # Get the numeric code for family
  family_code <- .family_to_code(family)

  # Create full parameter list for TMB
  full_start_tmb <- list(
    log_alpha = log(1), # Default values, will be overwritten if needed
    log_beta = log(1),
    log_gamma = log(1),
    log_delta = log(0.0001), # Small value for delta to avoid log(0)
    log_lambda = log(1)
  )

  # Update with user-provided start values
  for (param in names(start)) {
    full_start_tmb[[paste0("log_", param)]] <- log(start[[param]])
  }

  # Apply fixed parameters for TMB
  map <- list()

  # Add family-specific fixed parameters to the map
  all_fixed_params <- c(names(fixed), setdiff(
    c("alpha", "beta", "gamma", "delta", "lambda"),
    c(param_names, names(fixed))
  ))

  for (param in all_fixed_params) {
    param_name <- paste0("log_", param)
    if (param %in% names(fixed)) {
      # User or family provided fixed value
      full_start_tmb[[param_name]] <- log(fixed[[param]])
    } else if (param == "alpha" || param == "beta") {
      # Default alpha, beta = 1 when not specified
      full_start_tmb[[param_name]] <- log(1)
    } else if (param == "gamma") {
      # Default gamma = 1 when not specified
      full_start_tmb[[param_name]] <- log(1)
    } else if (param == "delta") {
      # Default delta = 0 when not specified (use small value to avoid log(0))
      full_start_tmb[[param_name]] <- log(0.0001)
    } else if (param == "lambda") {
      # Default lambda = 1 when not specified
      full_start_tmb[[param_name]] <- log(1)
    }

    # Add to map to keep parameter fixed
    map[[param_name]] <- factor(NA)
  }

  # Prepare data for TMB
  tmb_data <- list(
    x = data,
    family = family_code
  )

  # Create TMB object
  obj <- tryCatch(
    {
      TMB::MakeADFun(
        data = tmb_data,
        parameters = full_start_tmb,
        map = if (length(map) > 0) map else NULL,
        DLL = "gkwmletmb",
        silent = silent
      )
    },
    error = function(e) {
      stop("Error creating TMB model: ", e$message)
    }
  )

  # Set up optimizer controls based on the chosen method
  if (use_nlminb) {
    control_defaults <- list(eval.max = 500, iter.max = 300, trace = ifelse(silent, 0, 1))
  } else { # optim methods
    control_defaults <- list(maxit = 500, trace = ifelse(silent, 0, 1))
  }

  # Merge user controls with defaults, with user controls taking precedence
  control <- utils::modifyList(control_defaults, optimizer.control)

  # Run optimization
  if (use_nlminb) {
    opt <- tryCatch(
      {
        stats::nlminb(
          start = obj$par,
          objective = obj$fn,
          gradient = obj$gr,
          control = control
        )
      },
      error = function(e) {
        stop("Optimization with nlminb failed: ", e$message)
      }
    )

    opt$convergence <- opt$convergence == 0
    opt$message <- opt$message
    opt$objective <- opt$objective
  } else {
    opt <- tryCatch(
      {
        stats::optim(
          par = obj$par,
          fn = obj$fn,
          gr = obj$gr,
          method = method,
          control = control
        )
      },
      error = function(e) {
        stop(paste("Optimization with optim method", method, "failed:", e$message))
      }
    )

    opt$objective <- opt$value
    opt$convergence <- opt$convergence == 0
    opt$message <- if (opt$convergence) "Successful convergence" else "Optimization failed to converge"
  }

  if (!opt$convergence) {
    warning("Model did not converge: ", opt$message)
  }

  # Get parameter estimates for all parameters
  all_params <- exp(opt$par)
  names(all_params) <- sub("log_", "", names(opt$par))

  # Filter parameters to include only those relevant for the family
  filtered_coefficients <- all_params[paste0("log_", param_names) %in% names(opt$par)]
  names(filtered_coefficients) <- param_names[param_names %in% sub("log_", "", names(opt$par))]

  # If some parameters are missing (fixed), add them from the fixed list
  for (param in param_names) {
    if (!param %in% names(filtered_coefficients)) {
      if (param %in% names(fixed)) {
        filtered_coefficients[param] <- fixed[[param]]
      } else {
        # Use default values for missing parameters
        if (param == "alpha" || param == "beta" || param == "gamma" || param == "lambda") {
          filtered_coefficients[param] <- 1.0
        } else if (param == "delta") {
          filtered_coefficients[param] <- 0.0
        }
      }
    }
  }

  # Ensure the order matches param_names
  filtered_coefficients <- filtered_coefficients[param_names]

  # Calculate standard errors and Hessian if requested
  filtered_std_errors <- rep(NA, length(param_names))
  names(filtered_std_errors) <- param_names

  cov_matrix <- NULL
  coef_summary <- NULL
  conf_int <- NULL

  if (hessian) {
    sd_report <- tryCatch(
      {
        TMB::sdreport(obj)
      },
      error = function(e) {
        warning("Error calculating standard errors: ", e$message)
        NULL
      }
    )

    if (!is.null(sd_report) && !is.character(sd_report)) {
      # Extract parameter estimates, SEs, and covariance matrix
      cov_matrix <- sd_report$cov.fixed
      std_errors_log <- sqrt(diag(cov_matrix))

      # Get the SEs only for non-fixed parameters
      for (i in seq_along(param_names)) {
        param <- param_names[i]
        log_param <- paste0("log_", param)

        if (log_param %in% names(std_errors_log)) {
          # Use the Delta method to transform SEs to original scale
          filtered_std_errors[i] <- std_errors_log[log_param] * filtered_coefficients[i]
        }
      }

      # Create coefficient summary
      coef_summary <- data.frame(
        Estimate = filtered_coefficients,
        `Std. Error` = filtered_std_errors,
        `z value` = filtered_coefficients / filtered_std_errors,
        `Pr(>|z|)` = 2 * stats::pnorm(abs(filtered_coefficients / filtered_std_errors), lower.tail = FALSE),
        row.names = param_names,
        check.names = FALSE
      )

      # Calculate confidence intervals
      z_value <- stats::qnorm(1 - (1 - conf.level) / 2)
      conf_int_params <- character()
      conf_int_estimates <- numeric()
      conf_int_se <- numeric()
      conf_int_lower <- numeric()
      conf_int_upper <- numeric()

      for (i in seq_along(param_names)) {
        if (!is.na(filtered_std_errors[i])) {
          param <- param_names[i]
          conf_int_params <- c(conf_int_params, param)
          conf_int_estimates <- c(conf_int_estimates, filtered_coefficients[i])
          conf_int_se <- c(conf_int_se, filtered_std_errors[i])

          # For delta, lower bound is 0, for others it's machine epsilon
          if (param == "delta") {
            lower <- max(filtered_coefficients[i] - z_value * filtered_std_errors[i], 0)
          } else {
            lower <- max(filtered_coefficients[i] - z_value * filtered_std_errors[i], .Machine$double.eps)
          }

          upper <- filtered_coefficients[i] + z_value * filtered_std_errors[i]
          conf_int_lower <- c(conf_int_lower, lower)
          conf_int_upper <- c(conf_int_upper, upper)
        }
      }

      if (length(conf_int_params) > 0) {
        conf_int <- data.frame(
          parameter = conf_int_params,
          estimate = conf_int_estimates,
          std.error = conf_int_se,
          lower = conf_int_lower,
          upper = conf_int_upper,
          row.names = NULL, check.names = FALSE
        )
      }
    } else {
      warning("Hessian calculation failed, standard errors not available")

      # Create coefficient summary without SEs
      coef_summary <- data.frame(
        Estimate = filtered_coefficients,
        `Std. Error` = filtered_std_errors,
        `z value` = rep(NA, length(param_names)),
        `Pr(>|z|)` = rep(NA, length(param_names)),
        row.names = param_names,
        check.names = FALSE
      )
    }
  } else {
    # If hessian = FALSE, only report parameter estimates
    coef_summary <- data.frame(
      Estimate = filtered_coefficients,
      `Std. Error` = filtered_std_errors,
      `z value` = rep(NA, length(param_names)),
      `Pr(>|z|)` = rep(NA, length(param_names)),
      row.names = param_names,
      check.names = FALSE
    )
  }

  # Calculate log likelihood, AIC, BIC
  loglik <- -opt$objective
  n <- length(data)
  k <- sum(!is.na(opt$par)) # Count non-fixed parameters
  aic <- -2 * loglik + 2 * k
  bic <- -2 * loglik + log(n) * k
  aicc <- aic + (2 * k * (k + 1)) / (n - k - 1)

  # Compile core results
  fit_result <- list(
    coefficients = filtered_coefficients,
    std.errors = filtered_std_errors,
    coef_summary = coef_summary,
    vcov = cov_matrix,
    loglik = loglik,
    AIC = aic,
    BIC = bic,
    AICc = aicc,
    data = data,
    nobs = n,
    df = k,
    convergence = opt$convergence,
    message = opt$message,
    method = method,
    conf.int = conf_int,
    conf.level = conf.level,
    optimizer = opt,
    obj = obj,
    fixed = fixed
  )

  # Merge fit results into the main result object
  # Preserve call and family from the initial list
  result <- c(result, fit_result[!names(fit_result) %in% names(result)])

  # --- Post-fitting Analysis ---

  # Calculate profile likelihoods if requested
  if (profile) {
    # Check if sdreport ran successfully
    if (!is.null(result$obj)) {
      prof_list <- .calculate_profiles(result, data, family, fixed, "tmb", method, npoints, silent)
      result$profile <- prof_list
    } else {
      warning("Cannot calculate profiles as TMB object was not available.")
    }
  }

  # Fit submodels if requested
  if (submodels) {
    if (!silent) message("Fitting submodels...")
    submodel_results <- .fit_submodels_tmb(data, result, method, hessian, optimizer.control, silent)
    if (!is.null(submodel_results)) {
      result$submodels <- submodel_results$submodels
      result$lrt <- submodel_results$lrt
    }
  }

  # Calculate goodness of fit tests
  if (!silent) message("Calculating goodness-of-fit statistics...")
  gof_results <- .calculate_gof(result, data, family, silent)
  result$gof <- gof_results$gof
  result$diagnostics <- gof_results$diagnostics # Store any extra diagnostics

  # Generate diagnostic plots if requested
  if (plot) {
    plots <- .generate_plots(result, data, family, silent)

    if (!is.null(plots) && requireNamespace("patchwork", quietly = TRUE)) {
      plots <- patchwork::wrap_plots(plots) +
        patchwork::plot_annotation(
          title = paste("Diagnostic Plots for Fitted", toupper(family), "Model")
        )
      result$plots <- plots
    }
  }

  # Ensure class is set correctly
  class(result) <- "gkwfit"

  # Return the final result
  if (!silent) message("Fitting complete.")
  return(result)
}





#' @title Print Method for gkwfit Objects
#'
#' @description
#' Prints a concise summary of a model fitted by the \code{\link{gkwfit}} function,
#' displaying the call, estimated coefficients, log-likelihood, AIC, BIC,
#' number of observations, and a convergence warning if applicable.
#'
#' @param x An object of class \code{"gkwfit"}, typically the result of a call to \code{\link{gkwfit}}.
#' @param digits Integer; the minimum number of significant digits to be printed in values.
#'   Defaults to \code{max(3, getOption("digits") - 3)}.
#' @param ... Additional arguments passed to underlying print methods (currently unused).
#'
#' @return Invisibly returns the original input object \code{x}. Called for its side effect of printing to the console.
#'
#' @seealso \code{\link{gkwfit}}, \code{\link{summary.gkwfit}}
#'
#' @examples
#' \donttest{
#' # Generate a small sample from Kumaraswamy distribution
#' set.seed(2203)
#' y <- rkw(50, alpha = 2.5, beta = 1.5)
#'
#' # Fit the model with minimal options for speed
#' fit <- gkwfit(data = y, family = "kw", plot = FALSE, silent = TRUE)
#'
#' # Print method displays concise model summary
#' print(fit)
#'
#' # Alternative: object prints automatically when returned
#' fit
#' }
#'
#' @keywords methods internal
#' @author Lopes, J. E.
#' @export
print.gkwfit <- function(x, digits = max(3, getOption("digits") - 3), ...) {
  cat("Generalized Kumaraswamy Distribution Fit\n\n")

  cat("Call:\n")
  print(x$call)

  cat("\nCoefficients:\n")
  print.default(format(x$coefficients, digits = digits),
    print.gap = 2,
    quote = FALSE
  )

  cat("\nLog-likelihood:", formatC(x$loglik, digits = digits, format = "f"))
  cat("\nAIC:", formatC(x$AIC, digits = digits, format = "f"))
  cat("\nBIC:", formatC(x$BIC, digits = digits, format = "f"))
  cat("\nNumber of observations:", x$nobs)

  if (!x$convergence) {
    cat("\n\nWarning: Model did not converge\n")
  }

  invisible(x)
}



# Ensure stats package is implicitly available or loaded for pnorm, cov2cor etc.

#' @title Summary Method for gkwfit Objects
#'
#' @description
#' Calculates and prepares a detailed summary of a model fitted by \code{\link{gkwfit}}.
#' This includes coefficients, standard errors, test statistics (z-values), p-values,
#' log-likelihood, information criteria (AIC, BIC, AICc), number of estimated
#' parameters, convergence status, and optimizer details.
#'
#' @param object An object of class \code{"gkwfit"}, typically the result of a call
#'   to \code{\link{gkwfit}}.
#' @param correlation Logical; if \code{TRUE}, the correlation matrix of the estimated
#'   parameters is computed from the \code{vcov} component and included in the
#'   summary. Defaults to \code{FALSE}.
#' @param ... Additional arguments (currently unused).
#'
#' @details
#' This function computes standard errors, z-values (\eqn{Estimate / SE}), and
#' p-values (two-tailed test based on the standard normal distribution) for the
#' estimated model parameters by utilizing the coefficient estimates (\code{coef})
#' and their variance-covariance matrix (\code{vcov}). This requires that the
#' variance-covariance matrix was successfully computed and is available in the
#' \code{object} (typically requires \code{hessian = TRUE} in the original
#' \code{\link{gkwfit}} call and successful Hessian inversion).
#'
#' If standard errors cannot be reliably calculated (e.g., \code{vcov} is missing,
#' invalid, or indicates non-positive variance), the corresponding columns in the
#' coefficient table will contain \code{NA} values, and the \code{se_available}
#' flag will be set to \code{FALSE}.
#'
#' The returned object is of class \code{"summary.gkwfit"}, and its printing is
#' handled by \code{\link{print.summary.gkwfit}}.
#'
#' @return An object of class \code{"summary.gkwfit"}, which is a list containing:
#' \item{call}{The original function call.}
#' \item{family}{The specified distribution family.}
#' \item{coefficients}{A matrix of estimates, standard errors, z-values, and p-values. Contains NAs if SEs could not be computed.}
#' \item{loglik}{The maximized log-likelihood value (numeric).}
#' \item{df}{The number of estimated parameters.}
#' \item{aic}{Akaike Information Criterion.}
#' \item{bic}{Bayesian Information Criterion.}
#' \item{aicc}{Corrected Akaike Information Criterion.}
#' \item{nobs}{Number of observations used in fitting (integer).}
#' \item{convergence}{The convergence code returned by the optimizer.}
#' \item{message}{The message returned by the optimizer.}
#' \item{se_available}{Logical indicating if standard errors could be computed.}
#' \item{correlation}{The correlation matrix of coefficients (if \code{correlation = TRUE} and calculable), otherwise \code{NULL}.}
#' \item{fixed}{A named list of parameters that were held fixed during estimation, or \code{NULL}.}
#' \item{fit_method}{The primary fitting method specified ('tmb' or 'nr').}
#' \item{optimizer_method}{The specific optimizer used (e.g., 'nlminb', 'optim', 'Newton-Raphson').}
#'
#' @seealso \code{\link{gkwfit}}, \code{\link{print.summary.gkwfit}}, \code{\link{coef.gkwfit}}, \code{\link{vcov.gkwfit}}, \code{\link{logLik.gkwfit}}, \code{\link{AIC.gkwfit}}
#'
#' @examples
#' \donttest{
#' # Generate data and fit model
#' set.seed(2203)
#' y <- rkw(50, alpha = 2, beta = 3)
#' fit <- gkwfit(data = y, family = "kw", plot = FALSE)
#'
#' # Display detailed summary with parameter estimates and standard errors
#' summary(fit)
#'
#' # Control digits in output
#' summary(fit, digits = 4)
#' }
#'
#' @keywords methods summary
#' @author Lopes, J. E. (with refinements)
#' @export
summary.gkwfit <- function(object, correlation = FALSE, ...) {
  if (!inherits(object, "gkwfit")) {
    stop("Input 'object' must be of class 'gkwfit'")
  }

  # --- Extract components (using accessors if defined, else direct) ---
  # Define helper to safely get value or NULL
  safe_get <- function(obj, name, accessor = NULL) {
    val <- NULL
    if (!is.null(accessor) && exists(accessor, mode = "function")) {
      # Use tryCatch in case accessor fails unexpectedly
      val <- tryCatch(get(accessor)(obj), error = function(e) {
        warning("Accessor function '", accessor, "' failed: ", e$message)
        NULL
      })
    }
    # Fallback or if no accessor specified
    if (is.null(val) && name %in% names(obj)) {
      val <- obj[[name]]
    }
    return(val)
  }

  coefs <- safe_get(object, "coefficients", "coef.gkwfit")
  vcov_matrix <- safe_get(object, "vcov", "vcov.gkwfit")
  logLik_val <- safe_get(object, "loglik", "logLik.gkwfit") # Note output name is loglik
  nobs_val <- safe_get(object, "nobs", "nobs.gkwfit")
  aic_val <- safe_get(object, "AIC", "AIC.gkwfit")
  bic_val <- safe_get(object, "BIC", "BIC.gkwfit")
  aicc_val <- safe_get(object, "AICc") # Assuming no standard AICc accessor
  df_val <- safe_get(object, "df") # Number of estimated parameters

  # Direct access for components less likely to have accessors
  conv_status <- object$convergence
  conv_message <- object$message
  fixed_params <- object$fixed
  family_val <- object$family
  call_val <- object$call
  fit_method_val <- object$method # From TMB fit
  optimizer_method_val <- object$optimizer_method # Specific optimizer

  # --- Prepare Coefficient Table ---
  se_available <- FALSE
  coef_table <- NULL

  # Ensure coefficients exist before proceeding
  if (is.null(coefs) || length(coefs) == 0) {
    warning("No coefficients found in the 'gkwfit' object.")
    # Create an empty matrix with standard columns for consistency downstream
    coef_table <- matrix(NA_real_,
      nrow = 0, ncol = 4,
      dimnames = list(NULL, c("Estimate", "Std. Error", "z value", "Pr(>|z|)"))
    )
  } else {
    # Ensure coefficients are named
    if (is.null(names(coefs))) {
      # Try getting names from vcov if possible
      if (!is.null(vcov_matrix) && is.matrix(vcov_matrix) && nrow(vcov_matrix) == length(coefs) && !is.null(rownames(vcov_matrix))) {
        names(coefs) <- rownames(vcov_matrix)
      } else {
        warning("Coefficient names are missing; using generic names.")
        names(coefs) <- paste0("param", seq_along(coefs))
      }
    }

    # se <- sqrt(diag(solve(hskkw(object$coefficients, data = object$data))))

    # std_err <- sqrt(diag(solve(vcov_matrix)))
    std_err <- object$std.errors
    z_vals <- coefs / std_err
    # Handle division by zero or non-finite results safely
    z_vals[!is.finite(z_vals)] <- NA
    p_vals <- 2 * stats::pnorm(abs(z_vals), lower.tail = FALSE)

    coef_table <- cbind(
      Estimate = coefs,
      `Std. Error` = std_err,
      `z value` = z_vals,
      `Pr(>|z|)` = p_vals
    )
    se_available <- TRUE
    rownames(coef_table) <- names(coefs) # Ensure row names are set
    # Row names should be correct due to checks above
  }

  # --- Calculate Correlation Matrix (optional) ---
  cor_matrix <- NULL
  if (correlation && se_available) {
    # Further check for near-zero variances before cov2cor
    if (all(diag(vcov_matrix) > sqrt(.Machine$double.eps))) {
      cor_matrix <- tryCatch(stats::cov2cor(vcov_matrix), error = function(e) {
        warning("Could not compute correlation matrix from 'vcov': ", e$message)
        NULL # Return NULL if cov2cor fails
      })
      # Ensure dimnames match coefficient names, should be correct if vcov check passed
      if (!is.null(cor_matrix)) dimnames(cor_matrix) <- list(names(coefs), names(coefs))
    } else {
      warning("Cannot compute correlations: near-zero or zero variance estimates found.")
    }
  }


  # --- Assemble Summary List ---
  summary_list <- list(
    call = call_val,
    family = family_val,
    coefficients = coef_table,
    loglik = if (is.numeric(logLik_val)) as.numeric(logLik_val) else NA_real_,
    df = if (is.numeric(df_val)) as.integer(df_val) else NA_integer_,
    aic = if (is.numeric(aic_val)) aic_val else NA_real_,
    bic = if (is.numeric(bic_val)) bic_val else NA_real_,
    aicc = if (is.numeric(aicc_val)) aicc_val else NA_real_,
    nobs = if (is.numeric(nobs_val)) as.integer(nobs_val) else NA_integer_,
    convergence = if (is.numeric(conv_status)) conv_status else NA_integer_,
    message = if (is.character(conv_message)) conv_message else NA_character_,
    se_available = se_available,
    correlation = cor_matrix,
    fixed = fixed_params, # Can be NULL if none were fixed
    fit_method = fit_method_val, # Method used in TMB call (nlminb/optim) or null if NR? Need gkwfit logic check. Let's assume it stores 'tmb' or 'nr' maybe? Check object$method vs object$optimizer_method
    optimizer_method = optimizer_method_val # Specific optimizer used
  )

  # Assign class
  class(summary_list) <- "summary.gkwfit"
  return(summary_list)
}


# Ensure stats package is implicitly available or loaded for printCoefmat

#' @title Print Method for summary.gkwfit Objects
#'
#' @description
#' Prints the formatted summary of a \code{gkwfit} model object, generated by
#' \code{summary.gkwfit}. It displays the call, family, coefficient table
#' (with estimates, standard errors, z-values, p-values, and significance stars*),
#' fit statistics (LogLik, AIC, BIC, AICc), number of parameters and observations,
#' fixed parameters (if any), fit method, convergence status including optimizer
#' message, and optionally the correlation matrix of coefficients.
#'
#' * Significance stars are shown next to p-values by default if standard errors
#'   were available.
#'
#' @param x An object of class \code{"summary.gkwfit"}, usually the result of
#'   \code{summary(gkwfit_object)}.
#' @param digits Integer; the minimum number of significant digits to display
#'   for numeric values. Defaults to \code{max(3L, getOption("digits") - 3L)}.
#' @param signif.stars Logical; if \code{TRUE}, p-values are additionally encoded
#'   visually using "significance stars". Defaults to
#'   \code{getOption("show.signif.stars", TRUE)}.
#' @param ... Additional arguments passed to \code{\link[stats]{printCoefmat}}.
#'
#' @return Invisibly returns the original input object \code{x}. Called primarily
#'   for its side effect of printing the summary to the console.
#'
#' @seealso \code{\link{summary.gkwfit}}, \code{\link{gkwfit}}, \code{\link[stats]{printCoefmat}}
#'
#' @keywords methods internal print
#' @author Lopes, J. E. (with refinements)
#' @export
#' @method print summary.gkwfit
print.summary.gkwfit <- function(x, digits = max(3L, getOption("digits") - 3L),
                                 signif.stars = getOption("show.signif.stars", TRUE),
                                 ...) {
  # --- Print Call ---
  cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "")

  # --- Print Family ---
  cat("Family:", ifelse(is.null(x$family), "Not specified", x$family), "\n")

  # --- Print Fixed Parameters ---
  if (!is.null(x$fixed) && length(x$fixed) > 0) {
    cat("Fixed Parameters:\n")
    fixed_vals <- sapply(x$fixed, function(val) format(val, digits = digits, nsmall = 0))
    fixed_str <- paste(names(x$fixed), "=", fixed_vals, collapse = ", ")
    cat(" ", fixed_str, "\n")
  }

  # --- Print Coefficients ---
  cat("\nCoefficients:\n")
  if (!is.null(x$coefficients) && nrow(x$coefficients) > 0) {
    # Determine if p-values exist (last column usually) and SEs were available
    has_pvalue <- x$se_available && (ncol(x$coefficients) == 4)

    stats::printCoefmat(x$coefficients,
      digits = digits,
      signif.stars = signif.stars, # Ensures stars are used based on option/arg
      na.print = "NA",
      has.Pvalue = has_pvalue, # Correctly indicate if P-values are present and valid
      cs.ind = 1:2, # Column indices for Estimate and Std. Error
      tst.ind = 3, # Column index for z value (test statistic)
      # zap.ind = integer(), # Optional: prevent zapping small numbers, default is usually fine
      ...
    )
    if (!x$se_available) {
      cat("\n(Standard Errors & Tests unavailable; requires valid Hessian/vcov)\n")
    }
  } else {
    cat(" (No coefficients estimated or available to display)\n")
  }


  # --- Print Fit Statistics ---
  cat("\n--- Fit Statistics ---\n")
  cat("Log-likelihood:", formatC(x$loglik, format = "f", digits = digits))
  cat("  Parameters (est.):", ifelse(is.na(x$df), "NA", x$df), "\n") # Added df
  cat(
    "AIC:", formatC(x$aic, format = "f", digits = digits),
    "  BIC:", formatC(x$bic, format = "f", digits = digits),
    "  AICc:", formatC(x$aicc, format = "f", digits = digits), "\n"
  ) # Added AICc
  cat("Number of observations:", ifelse(is.na(x$nobs), "NA", x$nobs), "\n")


  # --- Print Fit Method and Convergence ---
  cat("Fit Method:", ifelse(is.null(x$fit_method), "Unknown", x$fit_method))
  if (!is.null(x$optimizer_method) && !is.null(x$fit_method) &&
    x$optimizer_method != x$fit_method) {
    cat(" (Optimizer: ", x$optimizer_method, ")", sep = "")
  }
  cat("\n") # Newline before convergence

  # Convergence Status Text
  conv_code <- x$convergence
  if (is.null(conv_code) || is.na(conv_code)) {
    conv_status_text <- "Unknown"
  } else if (conv_code == 0) {
    conv_status_text <- "Successful convergence"
  } else {
    conv_status_text <- paste0("Potential issues (code ", conv_code, ")")
  }
  # cat("Convergence:", conv_status_text)

  # Optimizer Message
  conv_msg <- if (!is.null(x$message) && nzchar(trimws(x$message))) trimws(x$message) else "None reported"
  # Print message only if it's not trivial (like the standard NLMINB message for success) or if convergence failed
  # Basic heuristic: print if code != 0 or if message is not standard success message
  print_msg <- (!is.na(conv_code) && conv_code != 0) ||
    (conv_msg != "None reported" && !grepl("relative convergence|objective function values|both X and GR tolerances met", conv_msg, ignore.case = TRUE))

  if (print_msg) {
    cat("\nOptimizer Message:", conv_msg, "\n")
  } else {
    cat("\n") # Ensure newline even if message isn't printed
  }


  # --- Print Correlation Matrix (Optional) ---
  if (!is.null(x$correlation)) {
    cat("\nCorrelation of Coefficients:\n")
    cor_mat <- x$correlation
    # Use stats::print.default or similar; printCoefmat is not for cor matrices
    # Ensure symmetric is FALSE if you only want lower/upper triangle
    stats::printCoefmat(cor_mat,
      digits = digits, signif.stars = FALSE, # No stars for correlations
      na.print = "NA", P.values = FALSE, has.Pvalue = FALSE, ...
    )
  }

  cat("\n") # Final newline for clean separation
  invisible(x) # Return the summary object invisibly
}

#' @title Plot Diagnostics for a gkwfit Object
#'
#' @description
#' Creates a panel of diagnostic plots for assessing the fit of a model estimated
#' by \code{\link{gkwfit}}. It displays a histogram of the data overlaid with the
#' fitted density, a Probability-Probability (P-P) plot, a Quantile-Quantile (Q-Q)
#' plot, and profile likelihood plots for each parameter if they were computed
#' during the fit (i.e., if \code{profile = TRUE} was used in \code{\link{gkwfit}}).
#'
#' @details
#' This function utilizes \code{ggplot2} for creating the plots and \code{patchwork}
#' for arranging them into a single figure. All plots use \code{ggplot2::theme_minimal()}.
#'
#' If the plots were already generated during the original \code{\link{gkwfit}} call
#' (because \code{plot = TRUE}), they are retrieved from the fitted object.
#' Otherwise, this function will attempt to generate the plots on the fly,
#' which requires the \code{ggplot2} package and the necessary distribution
#' functions (like \code{dgkw}, \code{pgkw}, \code{qgkw}, etc.) for the specific
#' \code{family} to be available.
#'
#' The arrangement of plots is handled automatically by \code{patchwork::wrap_plots}.
#' No user interaction (like menu selection) is required.
#'
#' @param x An object of class \code{"gkwfit"}, typically the result of a call to \code{\link{gkwfit}}.
#' @param ... Additional arguments (currently ignored).
#'
#' @return Invisibly returns the original input object \code{x}. This function is called for its side effect of producing a plot.
#'
#' @seealso \code{\link{gkwfit}}, \code{\link{summary.gkwfit}}
#'
#' @examples
#' \donttest{
#' # Load required package
#' library(ggplot2)
#'
#' # Generate data and fit model
#' set.seed(2203)
#' y <- rbeta_(50, gamma = 2, delta = 3)
#' fit <- gkwfit(data = y, family = "beta", plot = FALSE)
#'
#' # Generate standard diagnostic plots
#' plot(fit)
#'
#' # Generate data and fit model with profile = TRUE
#' fit <- gkwfit(data = y, family = "gkw", profile = TRUE, npoints = 15)
#'
#' # Standard diagnostic plots
#' plot(fit)
#' }
#'
#' @keywords hplot methods
#' @author Lopes, J. E.
#' @export
plot.gkwfit <- function(x, ...) {
  if (!inherits(x, "gkwfit")) {
    stop("Input 'x' must be of class 'gkwfit'")
  }

  plots_list <- gkwgof(x, simulate_p_values = FALSE, plot = TRUE, print_summary = FALSE, ...)

  invisible(plots_list)
}


#' @title Extract Model Coefficients from a gkwfit Object
#'
#' @description
#' Extracts the estimated coefficients for the parameters of a model fitted by
#' \code{\link{gkwfit}}. This is an S3 method for the generic \code{\link[stats]{coef}} function.
#'
#' @param object An object of class \code{"gkwfit"}, typically the result of a call to \code{\link{gkwfit}}.
#' @param ... Additional arguments (currently ignored).
#'
#' @return A named numeric vector containing the estimated coefficients for the
#'   parameters of the specified GKw family distribution. The names correspond
#'   to the parameter names (e.g., \code{"alpha"}, \code{"beta"}, etc.).
#'
#' @seealso \code{\link{gkwfit}}, \code{\link[stats]{coef}}, \code{\link{vcov.gkwfit}}, \code{\link{logLik.gkwfit}}
#'
#' @examples
#' \donttest{
#' # Generate data and fit model
#' set.seed(2203)
#' y <- rgkw(50, alpha = 1.5, beta = 2.5, gamma = 1.2, delta = 0.3, lambda = 1.1)
#' fit <- gkwfit(data = y, family = "gkw", plot = FALSE)
#'
#' # Extract all coefficients
#' params <- coef(fit)
#' print(params)
#'
#' # Access specific parameters
#' alpha_est <- coef(fit)["alpha"]
#' lambda_est <- coef(fit)["lambda"]
#' cat("Estimated alpha:", alpha_est, "\n")
#' cat("Estimated lambda:", lambda_est, "\n")
#' }
#'
#' @keywords methods models
#' @author Lopes, J. E.
#' @export
coef.gkwfit <- function(object, ...) {
  # Basic check for existence
  if (is.null(object$coefficients)) {
    warning("Component 'coefficients' not found in the 'gkwfit' object.")
    return(NULL)
  }
  object$coefficients
}


#' @title Extract Variance-Covariance Matrix from a gkwfit Object
#'
#' @description
#' Extracts the variance-covariance matrix of the estimated parameters from a model
#' fitted by \code{\link{gkwfit}}. This matrix is typically derived from the
#' inverse of the observed Hessian matrix calculated during fitting (requires
#' \code{hessian = TRUE} in the \code{\link{gkwfit}} call). This is an S3 method
#' for the generic \code{\link[stats]{vcov}} function.
#'
#' @param object An object of class \code{"gkwfit"}, typically the result of a call to \code{\link{gkwfit}}.
#' @param ... Additional arguments (currently ignored).
#'
#' @return A numeric matrix representing the variance-covariance matrix of the
#'   estimated model parameters. Row and column names correspond to the parameter
#'   names (e.g., \code{"alpha"}, \code{"beta"}, etc.). Returns \code{NULL} or
#'   raises a warning/error if the matrix is not available (e.g., if \code{hessian=FALSE}
#'   was used or if the Hessian computation failed).
#'
#' @seealso \code{\link{gkwfit}}, \code{\link[stats]{vcov}}, \code{\link{coef.gkwfit}}, \code{\link{logLik.gkwfit}}
#'
#' @examples
#' \donttest{
#' # Generate data and fit model (ensure hessian = TRUE for vcov)
#' set.seed(2203)
#' y <- rbkw(50, alpha = 2, beta = 3, gamma = 1.5, delta = 0.5)
#' fit <- gkwfit(data = y, family = "bkw", plot = FALSE, hessian = TRUE)
#'
#' # Extract variance-covariance matrix
#' vcov_matrix <- vcov(fit)
#' print(vcov_matrix)
#'
#' # Extract standard errors from the diagonal
#' std_errors <- sqrt(diag(vcov_matrix))
#' print(std_errors)
#'
#' # Compare with standard errors from summary
#' summary_se <- summary(fit)$coefficients[, "Std. Error"]
#' all.equal(std_errors, summary_se)
#' }
#'
#' @keywords methods models
#' @author Lopes, J. E.
#' @export
vcov.gkwfit <- function(object, ...) {
  # Basic check for existence
  vcov_mat <- object$vcov
  if (is.null(vcov_mat)) {
    warning(
      "Component 'vcov' (variance-covariance matrix) not found in the 'gkwfit' object.",
      "\nWas the model fitted with hessian=TRUE?"
    )
    return(NULL)
  }
  # Optional: Further checks if vcov_mat is a valid matrix?
  if (!is.matrix(vcov_mat) || !is.numeric(vcov_mat)) {
    warning("'vcov' component is not a numeric matrix.")
    return(NULL)
  }
  # Ensure dimnames match coefficients if possible
  if (!is.null(object$coefficients)) {
    coef_names <- names(object$coefficients)
    if (length(coef_names) == nrow(vcov_mat) && length(coef_names) == ncol(vcov_mat)) {
      dimnames(vcov_mat) <- list(coef_names, coef_names)
    }
  }
  vcov_mat
}


#' @title Compute Confidence Intervals for gkwfit Parameters
#'
#' @description
#' Computes confidence intervals for one or more parameters in a model fitted by
#' \code{\link{gkwfit}}. It uses the Wald method based on the estimated coefficients
#' and their standard errors derived from the variance-covariance matrix.
#'
#' @param object An object of class \code{"gkwfit"}, typically the result of a call to \code{\link{gkwfit}}.
#'   The object must contain valid coefficient estimates and a corresponding
#'   variance-covariance matrix (usually requires fitting with \code{hessian = TRUE}).
#' @param parm A specification of which parameters are to be given confidence intervals,
#'   either a vector of numbers (indices) or a vector of names. If missing,
#'   confidence intervals are computed for all parameters that have a valid
#'   standard error available. Parameter indices refer to the order of parameters
#'   for which standard errors could be calculated.
#' @param level The confidence level required (default: 0.95).
#' @param ... Additional arguments (currently ignored).
#'
#' @details
#' This function calculates confidence intervals using the Wald method:
#' \eqn{Estimate \pm z \times SE}, where \eqn{z} is the appropriate quantile
#' from the standard normal distribution for the given confidence `level`.
#'
#' It relies on the results from \code{\link{coef.gkwfit}} and \code{\link{vcov.gkwfit}}
#' (or directly accesses \code{object$coefficients} and \code{object$vcov} if those
#' methods aren't defined). It checks for the validity of the variance-covariance
#' matrix before proceeding.
#'
#' Since all parameters of the GKw family distributions are constrained to be positive,
#' the lower bound of the confidence interval is truncated at a small positive value
#' (\code{.Machine$double.eps^0.5}) if the calculated lower bound is non-positive.
#'
#' If `parm` is specified, it selects the parameters for which to compute intervals.
#' Numeric indices in `parm` refer to the parameters that have calculable standard
#' errors, not necessarily all parameters in the model (if some were fixed or had
#' estimation issues).
#'
#' @return A matrix with columns giving lower and upper confidence limits for each
#'   parameter specified in `parm`. The columns are labeled with quantile percentages
#'   (e.g., \code{"2.5 %"} and \code{"97.5 %"} for \code{level = 0.95}). Row names
#'   are taken from the parameter names. Returns \code{NULL} or stops with an error
#'   if coefficients or a valid variance-covariance matrix cannot be extracted.
#'
#' @seealso \code{\link{gkwfit}}, \code{\link[stats]{confint}}, \code{\link{coef.gkwfit}}, \code{\link{vcov.gkwfit}}
#'
#' @examples
#' \donttest{
#' # Generate data and fit model
#' set.seed(2203)
#' y <- rkw(50, alpha = 2, beta = 3)
#' fit <- gkwfit(data = y, family = "kw", plot = FALSE, hessian = TRUE)
#'
#' # Calculate confidence intervals for all parameters
#' ci <- confint(fit)
#' print(ci)
#'
#' # 90% confidence interval
#' ci_90 <- confint(fit, level = 0.90)
#' print(ci_90)
#'
#' # Confidence interval for specific parameter
#' ci_alpha <- confint(fit, parm = "alpha")
#' print(ci_alpha)
#' }
#'
#' @keywords methods models
#' @author Lopes, J. E.
#' @export
confint.gkwfit <- function(object, parm, level = 0.95, ...) {
  if (!inherits(object, "gkwfit")) {
    stop("Input 'object' must be of class 'gkwfit'.")
  }
  if (!is.numeric(level) || length(level) != 1 || level <= 0 || level >= 1) {
    stop("'level' must be a single numeric value between 0 and 1.")
  }

  # Prefer methods if they exist and work
  cf <- tryCatch(stats::coef(object), error = function(e) object$coefficients)
  vc <- tryCatch(stats::vcov(object), error = function(e) object$vcov)

  if (is.null(cf)) {
    stop("Could not extract coefficients ('coefficients') from the object.")
  }
  if (is.null(vc)) {
    stop(
      "Could not extract variance-covariance matrix ('vcov') from the object.",
      "\nWas the model fitted with hessian=TRUE?"
    )
  }

  # --- Validate VCOV and Calculate SE ---
  coef_names <- names(cf)
  if (is.null(coef_names)) {
    stop("Coefficients must be named.")
  }
  if (!is.matrix(vc) || nrow(vc) != ncol(vc) || nrow(vc) != length(cf) ||
    is.null(rownames(vc)) || is.null(colnames(vc)) || # Check names exist
    !all(rownames(vc) == coef_names) || !all(colnames(vc) == coef_names)) {
    stop(
      "Variance-covariance matrix ('vcov') has unexpected dimensions or names",
      " relative to coefficients."
    )
  }
  variances <- diag(vc)
  names(variances) <- coef_names # Ensure names consistency
  if (anyNA(variances)) {
    warning(
      "NA values found in the diagonal of the vcov matrix.",
      " Parameters with NA variance will have NA intervals."
    )
  }
  if (any(variances[!is.na(variances)] < 0)) {
    warning(
      "Negative variances found in vcov matrix.",
      " Affected parameters will have NA intervals."
    )
    variances[variances < 0] <- NA # Set negative variances to NA
  }
  ses <- sqrt(variances) # Will produce NAs where variances were NA or negative

  # --- Determine Parameters for Intervals ---
  # Identify parameters with valid (non-NA) standard errors
  param_names_with_se <- coef_names[!is.na(ses)]

  if (length(param_names_with_se) == 0) {
    warning(
      "No parameters with valid standard errors available ",
      "for confidence interval calculation."
    )
    # Return an empty matrix with correct column names
    a <- (1 - level) / 2
    a <- c(a, 1 - a)
    pct <- paste(format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), "%")
    return(matrix(NA_real_, nrow = 0, ncol = 2, dimnames = list(NULL, pct)))
  }

  # Select parameters based on 'parm' argument
  if (missing(parm)) {
    params_to_compute <- param_names_with_se # Default: all with valid SE
  } else {
    if (is.numeric(parm)) {
      # Check numeric indices against the list of parameters *with SEs*
      if (any(parm <= 0) || any(parm > length(param_names_with_se))) {
        stop(
          "Numeric 'parm' indices are out of bounds (1 to ",
          length(param_names_with_se), ") for parameters with available SEs."
        )
      }
      params_to_compute <- param_names_with_se[parm]
    } else if (is.character(parm)) {
      unknown_parm <- setdiff(parm, coef_names) # Check against all coef names
      if (length(unknown_parm) > 0) {
        stop(
          "Unknown parameter(s) requested in 'parm': ",
          paste(dQuote(unknown_parm), collapse = ", ")
        )
      }
      # Filter requested parameters to only those with valid SEs
      params_to_compute <- intersect(parm, param_names_with_se)
      if (length(params_to_compute) == 0) {
        # Check if the requested parameters existed but just lacked SEs
        requested_but_no_se <- intersect(parm, setdiff(coef_names, param_names_with_se))
        if (length(requested_but_no_se) > 0) {
          stop(
            "None of the requested parameters have available standard errors. ",
            "Problematic parameters might include: ",
            paste(dQuote(requested_but_no_se), collapse = ", ")
          )
        } else {
          # Should not happen given the unknown_parm check, but as fallback
          stop("No valid parameters selected for confidence intervals.")
        }
      }
    } else {
      stop("'parm' must be missing, a numeric vector, or a character vector.")
    }
  }

  # --- Calculate Confidence Intervals ---
  a <- (1 - level) / 2
  a <- c(a, 1 - a)
  pct <- paste(format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), "%") # Format column names

  # Z-critical value for normal distribution
  z_crit <- stats::qnorm(a[2])

  # Subset coefficients and SEs for the selected parameters
  cf_sub <- cf[params_to_compute]
  ses_sub <- ses[params_to_compute] # NAs handled by calculation below

  # Calculate intervals matrix: Estimate +/- z * SE
  ci <- cf_sub + ses_sub %o% c(-z_crit, z_crit)

  # Ensure parameters are positive (all GKw parameters should be)
  # Using small positive value instead of exact zero
  # Apply only where the original estimate was positive, otherwise result is NA anyway
  ci <- ifelse(!is.na(ci) & ci < .Machine$double.eps^0.5, .Machine$double.eps^0.5, ci)

  # --- Format Output Matrix ---
  dimnames(ci) <- list(params_to_compute, pct) # Assign row and column names

  return(ci)
}


#' @title Extract Log-Likelihood from a gkwfit Object
#'
#' @description
#' Extracts the maximized log-likelihood value from a model fitted by \code{\link{gkwfit}}.
#' It returns an object of class \code{"logLik"}, which includes attributes for the
#' degrees of freedom (\code{"df"}) and the number of observations (\code{"nobs"}) used in the fit.
#'
#' @param object An object of class \code{"gkwfit"}, typically the result of a call to \code{\link{gkwfit}}.
#' @param ... Additional arguments (currently ignored).
#'
#' @details
#' This method provides compatibility with standard R functions that operate on
#' log-likelihood values, such as \code{\link[stats]{AIC}}, \code{\link[stats]{BIC}},
#' and likelihood ratio tests. It retrieves the log-likelihood stored during the
#' model fitting process (in \code{object$loglik}) and attaches the required
#' attributes (\code{object$df} for the number of estimated parameters and
#' \code{object$nobs} for the number of observations).
#'
#' @return An object of class \code{"logLik"}. This is the numeric log-likelihood value
#'   with the following attributes:
#'   \item{df}{The number of estimated parameters in the model (integer).}
#'   \item{nobs}{The number of observations used for fitting the model (integer).}
#'
#' @seealso \code{\link{gkwfit}}, \code{\link[stats]{AIC}}, \code{\link[stats]{BIC}}, \code{\link[stats]{logLik}}
#'
#' @examples
#' \donttest{
#' # Generate data and fit two models
#' set.seed(2203)
#' y <- rgkw(50, alpha = 2, beta = 3, gamma = 1, delta = 0, lambda = 1.5)
#'
#' fit1 <- gkwfit(data = y, family = "kkw", plot = FALSE) # KKw model
#' fit2 <- gkwfit(data = y, family = "ekw", plot = FALSE) # EKw model
#'
#' # Extract log-likelihood values
#' ll1 <- logLik(fit1)
#' ll2 <- logLik(fit2)
#'
#' print(ll1)
#' print(ll2)
#'
#' # Use for likelihood ratio test
#' lr_stat <- -2 * (as.numeric(ll1) - as.numeric(ll2))
#' df_diff <- attr(ll1, "df") - attr(ll2, "df")
#' p_value <- pchisq(lr_stat, df = abs(df_diff), lower.tail = FALSE)
#'
#' cat("LR statistic:", lr_stat, "\n")
#' cat("df:", df_diff, "\n")
#' cat("p-value:", p_value, "\n")
#' }
#'
#' @keywords methods models utility
#' @author Lopes, J. E.
#' @importFrom stats logLik
#' @method logLik gkwfit
#' @export
logLik.gkwfit <- function(object, ...) {
  # Ensure the input object is of the correct class
  if (!inherits(object, "gkwfit")) {
    stop("Input 'object' must be of class 'gkwfit'.")
  }

  # Check for the existence of required components
  required_comps <- c("loglik", "df", "nobs")
  missing_comps <- setdiff(required_comps, names(object))
  if (length(missing_comps) > 0) {
    stop(
      "The 'gkwfit' object provided to logLik.gkwfit is missing required component(s): ",
      paste(missing_comps, collapse = ", "), "."
    )
  }

  val <- object$loglik
  df_val <- object$df
  nobs_val <- object$nobs

  # Validate the types and values of the components
  if (!is.numeric(val) || length(val) != 1 || !is.finite(val)) {
    stop("Component 'loglik' must be a single finite numeric value in logLik.gkwfit.")
  }
  # df should be a non-negative integer count of estimated parameters
  if (!is.numeric(df_val) || length(df_val) != 1 || !is.finite(df_val) || df_val < 0 || (df_val %% 1 != 0)) {
    # Allow tolerance for floating point comparison, although df should be integer
    if (!isTRUE(all.equal(df_val, round(df_val))) || df_val < 0) {
      stop("Component 'df' (number of estimated parameters) must be a single non-negative integer in logLik.gkwfit.")
    }
    # Coerce potentially float-like integer (e.g. 3.0) to integer
    df_val <- as.integer(round(df_val))
  } else {
    df_val <- as.integer(df_val) # Ensure integer type
  }

  # nobs should be a positive integer count of observations
  if (!is.numeric(nobs_val) || length(nobs_val) != 1 || !is.finite(nobs_val) || nobs_val <= 0 || (nobs_val %% 1 != 0)) {
    if (!isTRUE(all.equal(nobs_val, round(nobs_val))) || nobs_val <= 0) {
      stop("Component 'nobs' (number of observations) must be a single positive integer in logLik.gkwfit.")
    }
    nobs_val <- as.integer(round(nobs_val))
  } else {
    nobs_val <- as.integer(nobs_val) # Ensure integer type
  }

  # Assign attributes and class
  attr(val, "df") <- df_val
  attr(val, "nobs") <- nobs_val
  class(val) <- "logLik"

  return(val)
}


#' @title Calculate AIC or BIC for gkwfit Objects
#'
#' @description
#' Computes the Akaike Information Criterion (AIC) or variants like the Bayesian
#' Information Criterion (BIC) for one or more fitted model objects of class \code{"gkwfit"}.
#'
#' @param object An object of class \code{"gkwfit"}, typically the result of a call to \code{\link{gkwfit}}.
#' @param ... Optionally, more fitted model objects of class \code{"gkwfit"}.
#' @param k Numeric scalar specifying the penalty per parameter. The default \code{k = 2}
#'   corresponds to the traditional AIC. Use \code{k = log(n)} (where n is the number
#'   of observations) for the BIC (Bayesian Information Criterion).
#'
#' @details
#' This function calculates an information criterion based on the formula
#' \eqn{-2 \times \log Likelihood + k \times df}, where \eqn{df} represents the
#' number of estimated parameters in the model (degrees of freedom).
#'
#' It relies on the \code{\link{logLik.gkwfit}} method to extract the log-likelihood
#' and the degrees of freedom for each model.
#'
#' When comparing multiple models fitted to the **same data**, the model with the
#' lower AIC or BIC value is generally preferred. The function returns a sorted
#' data frame to facilitate this comparison when multiple objects are provided.
#'
#' @return
#' \itemize{
#'   \item If only one \code{object} is provided: A single numeric value representing the calculated criterion (AIC or BIC).
#'   \item If multiple objects are provided: A \code{data.frame} with rows corresponding
#'     to the models and columns for the degrees of freedom (\code{df}) and the
#'     calculated criterion value (named \code{AIC}, regardless of the value of \code{k}).
#'     The data frame is sorted in ascending order based on the criterion values.
#'     Row names are derived from the deparsed calls of the fitted models.
#' }
#'
#' @seealso \code{\link{gkwfit}}, \code{\link[stats]{AIC}}, \code{\link{logLik.gkwfit}}, \code{\link{BIC.gkwfit}}
#'
#' @examples
#' \donttest{
#' set.seed(2203)
#' y <- rkw(1000, alpha = 2.5, beta = 1.5)
#'
#' # Fit different models to the same data
#' fit1_kw <- gkwfit(y, family = "kw", silent = TRUE)
#' fit2_bkw <- gkwfit(y, family = "bkw", silent = TRUE)
#' fit3_gkw <- gkwfit(y, family = "gkw", silent = TRUE)
#'
#' # Calculate AIC for a single model
#' aic1 <- AIC(fit1_kw)
#' print(aic1)
#'
#' # Compare AIC values for multiple models
#' aic_comparison <- c(AIC(fit1_kw), AIC(fit2_bkw), AIC(fit3_gkw))
#' print(aic_comparison)
#' }
#'
#' @keywords models methods
#' @author Lopes, J. E.
#' @importFrom stats AIC
#' @method AIC gkwfit
#' @export
AIC.gkwfit <- function(object, ..., k = 2) {
  # --- Input Validation ---
  if (!inherits(object, "gkwfit")) {
    stop("Input 'object' must be of class 'gkwfit'.")
  }
  objects <- list(object, ...)
  obj_classes <- sapply(objects, class)
  if (any(obj_classes != "gkwfit")) {
    warning("All objects passed to AIC should ideally be of class 'gkwfit'.")
  }
  if (!is.numeric(k) || length(k) != 1 || k < 0) {
    stop("'k' must be a single non-negative numeric value.")
  }


  # --- Use logLik method to get value and df attribute consistently ---
  lls <- lapply(objects, function(obj) {
    ll <- tryCatch(logLik(obj), error = function(e) NULL)
    if (is.null(ll) || !inherits(ll, "logLik")) {
      stop("Could not extract valid 'logLik' object for one of the models.")
    }
    if (is.null(attr(ll, "df")) || is.null(attr(ll, "nobs"))) {
      stop("The 'logLik' object is missing 'df' or 'nobs' attribute.")
    }
    ll
  })

  # --- Check if nobs are consistent for model comparison ---
  if (length(lls) > 1) {
    nobs_vals <- sapply(lls, attr, "nobs")
    if (length(unique(nobs_vals)) > 1) {
      warning(
        "Models were not all fitted to the same number of observations.",
        "\nAIC/BIC comparison might be problematic."
      )
    }
  }

  # --- Calculate Criterion ---
  vals <- sapply(lls, function(ll) -2 * as.numeric(ll) + k * attr(ll, "df"))
  dfs <- sapply(lls, function(ll) attr(ll, "df"))

  # --- Format Output ---
  if (length(objects) == 1) {
    return(vals)
  } else {
    # Try to get model names from call
    calls <- lapply(objects, function(obj) {
      # Attempt to retrieve the call safely
      obj_call <- tryCatch(obj$call, error = function(e) NULL)
      if (!is.call(obj_call)) obj_call <- NULL # Ensure it's a call or NULL
      obj_call
    })
    # Use deparse1 for concise name, provide default if call missing
    mnames <- sapply(calls, function(cc) {
      if (!is.null(cc)) deparse1(cc, collapse = " ") else paste0("Model", seq_along(calls))
    })
    # Handle potential duplicate names
    if (anyDuplicated(mnames)) mnames <- make.unique(mnames, sep = ".")

    result <- data.frame(
      df = dfs,
      AIC = vals # Column name remains "AIC" regardless of k, per stats::AIC convention
    )
    rownames(result) <- mnames

    # Sort by the criterion value
    result <- result[order(result$AIC), ]
    return(result)
  }
}


#' @title Calculate Bayesian Information Criterion (BIC) for gkwfit Objects
#'
#' @description
#' Computes the Bayesian Information Criterion (BIC), sometimes called the
#' Schwarz criterion (SIC), for one or more fitted model objects of class \code{"gkwfit"}.
#'
#' @param object An object of class \code{"gkwfit"}, typically the result of a call
#'   to \code{\link{gkwfit}}.
#' @param ... Optionally, more fitted model objects of class \code{"gkwfit"}.
#'
#' @details
#' This function calculates the BIC based on the formula
#' \eqn{-2 \times \log Likelihood + \log(n) \times df}, where \eqn{n} is the number
#' of observations and \eqn{df} represents the number of estimated parameters in the
#' model (degrees of freedom).
#'
#' It relies on the \code{\link{logLik.gkwfit}} method to extract the log-likelihood,
#' the degrees of freedom (\code{df}), and the number of observations (\code{nobs})
#' for each model. Ensure that \code{logLik.gkwfit} is defined and returns a valid
#' \code{"logLik"} object with appropriate attributes.
#'
#' When comparing multiple models fitted to the **same data**, the model with the
#' lower BIC value is generally preferred, as BIC tends to penalize model complexity
#' more heavily than AIC for larger sample sizes. The function returns a sorted
#' data frame to facilitate this comparison when multiple objects are provided. A
#' warning is issued if models were fitted to different numbers of observations.
#'
#' @return
#' \itemize{
#'   \item If only one \code{object} is provided: A single numeric value, the calculated BIC.
#'   \item If multiple objects are provided: A \code{data.frame} with rows corresponding
#'     to the models and columns for the degrees of freedom (\code{df}) and the
#'     calculated BIC value (named \code{BIC}). The data frame is sorted in
#'     ascending order based on the BIC values. Row names are generated from the
#'     deparsed calls or the names of the arguments passed to BIC.
#' }
#'
#' @seealso \code{\link{gkwfit}}, \code{\link[stats]{BIC}}, \code{\link{logLik.gkwfit}}, \code{\link{AIC.gkwfit}}
#'
#' @examples
#' \donttest{
#'
#' set.seed(2203)
#' y <- rkw(1000, alpha = 2.5, beta = 1.5)
#'
#' # Fit different models to the same data
#' fit1_kw <- gkwfit(y, family = "kw", silent = TRUE)
#' fit2_bkw <- gkwfit(y, family = "bkw", silent = TRUE)
#' fit3_gkw <- gkwfit(y, family = "gkw", silent = TRUE)
#'
#' # Calculate BIC for a single model
#' bic1 <- BIC(fit1_kw)
#' print(bic1)
#'
#' # Compare BIC values for multiple models
#' bic_comparison <- c(BIC(fit1_kw), BIC(fit2_bkw), BIC(fit3_gkw))
#' print(bic_comparison)
#' }
#'
#' @keywords models methods stats
#' @author Lopes, J. E. (with refinements)
#' @importFrom stats BIC logLik nobs
#' @method BIC gkwfit
#' @export
BIC.gkwfit <- function(object, ...) {
  # Combine the first object and any others passed via ...
  objects <- list(object, ...)
  # Store original argument names/expressions for potential use in rownames
  arg_names <- as.character(match.call(expand.dots = TRUE)[-1L])

  # --- Basic Input Validation ---
  obj_classes <- sapply(objects, function(o) class(o)[1]) # Get primary class
  if (!inherits(object, "gkwfit")) {
    stop("The first argument 'object' must be of class 'gkwfit'.")
  }
  if (any(!sapply(objects, inherits, "gkwfit"))) {
    # Find which ones don't inherit
    bad_obj_indices <- which(!sapply(objects, inherits, "gkwfit"))
    warning(
      "Argument(s) at position(s) ", paste(bad_obj_indices, collapse = ", "),
      " do not inherit from 'gkwfit'. Ensure all models are comparable."
    )
  }

  # --- Use logLik method to get value, df, and nobs attribute consistently ---
  lls <- vector("list", length(objects))
  for (i in seq_along(objects)) {
    current_obj <- objects[[i]]
    ll <- tryCatch(stats::logLik(current_obj), error = function(e) {
      # Provide context if logLik fails
      list(error = TRUE, message = paste("Failed to extract logLik for model #", i, ": ", e$message))
    })

    # Check for failure during logLik extraction
    if (is.list(ll) && isTRUE(ll$error)) {
      stop(ll$message)
    }

    # Validate the returned logLik object
    if (is.null(ll) || !inherits(ll, "logLik")) {
      stop(
        "Could not extract a valid 'logLik' object for model #", i,
        " (argument '", arg_names[i], "')."
      )
    }
    req_attrs <- c("df", "nobs")
    if (!all(req_attrs %in% names(attributes(ll)))) {
      stop(
        "The 'logLik' object is missing 'df' or 'nobs' attribute for model #", i,
        " (argument '", arg_names[i], "')."
      )
    }
    n_obs_val <- attr(ll, "nobs")
    if (is.null(n_obs_val) || !is.numeric(n_obs_val) || length(n_obs_val) != 1 || n_obs_val <= 0 || !is.finite(n_obs_val)) {
      stop(
        "Number of observations ('nobs' attribute of logLik) must be a single positive finite value ",
        "for BIC calculation for model #", i, " (argument '", arg_names[i], "')."
      )
    }
    # Store the validated logLik object
    lls[[i]] <- ll
  } # End loop through objects

  # --- Check if nobs are consistent for model comparison ---
  nobs_vals <- sapply(lls, attr, "nobs")
  if (length(unique(nobs_vals)) > 1) {
    warning(
      "Models were not all fitted to the same number of observations.",
      "\nBIC comparison across models with different 'nobs' may be problematic."
    )
  }

  # --- Calculate BIC ---
  # BIC = -2*logLik + log(nobs) * df
  vals <- sapply(lls, function(ll) -2 * as.numeric(ll) + log(attr(ll, "nobs")) * attr(ll, "df"))
  dfs <- sapply(lls, function(ll) attr(ll, "df"))

  # --- Format Output ---
  if (length(objects) == 1) {
    # Return single numeric value for single object
    return(vals)
  } else {
    # Create data frame for multiple objects
    # Try to get model names from call stored in the object, fall back to arg names
    mnames <- character(length(objects))
    for (i in seq_along(objects)) {
      obj_call <- tryCatch(objects[[i]]$call, error = function(e) NULL)
      if (!is.null(obj_call) && is.call(obj_call)) {
        # Use deparse1 for concise name from the object's call
        mnames[i] <- deparse1(obj_call, collapse = " ")
      } else {
        # Fallback to the name/expression used in the BIC() call
        mnames[i] <- arg_names[i]
      }
    }

    # Ensure unique names if needed (e.g., if default calls were used)
    if (anyDuplicated(mnames)) mnames <- make.unique(mnames, sep = ".")

    result <- data.frame(
      df = dfs,
      BIC = vals # Column name is BIC (standard)
    )
    rownames(result) <- mnames

    # Sort by the criterion value (ascending)
    result <- result[order(result$BIC), ]
    return(result)
  }
}


#' @title Compare Fitted gkwfit Models using Likelihood Ratio Tests
#'
#' @description
#' Computes Likelihood Ratio Tests (LRT) to compare two or more nested models
#' fitted using \code{\link{gkwfit}}. It produces a table summarizing the models
#' and the test statistics.
#'
#' @param object An object of class \code{"gkwfit"}, representing the first fitted model.
#' @param ... One or more additional objects of class \code{"gkwfit"}, representing
#'   subsequent fitted models, assumed to be nested within each other or the first model.
#'
#' @details
#' This function performs pairwise likelihood ratio tests between consecutively ordered
#' models (ordered by their degrees of freedom). It assumes the models are nested
#' and are fitted to the same dataset. A warning is issued if the number of
#' observations differs between models.
#'
#' The Likelihood Ratio statistic is calculated as \eqn{LR = 2 \times (\log L_{complex} - \log L_{simple})}.
#' This statistic is compared to a Chi-squared distribution with degrees of freedom
#' equal to the difference in the number of parameters between the two models
#' (\eqn{\Delta df = df_{complex} - df_{simple}}).
#'
#' The output table includes the number of parameters (`N.Par`), AIC, BIC, log-likelihood (`LogLik`),
#' the test description (`Test`), the LR statistic (`LR stat.`), and the p-value (`Pr(>Chi)`).
#' Models are ordered by increasing complexity (number of parameters).
#'
#' Warnings are issued if models do not appear correctly nested based on degrees of
#' freedom or if the log-likelihood decreases for a more complex model, as the LRT
#' results may not be meaningful in such cases.
#'
#' The function relies on a working \code{\link{logLik.gkwfit}} method to extract
#' necessary information (log-likelihood, df, nobs).
#'
#' @return An object of class \code{c("anova.gkwfit", "anova", "data.frame")}.
#'   This data frame contains rows for each model and columns summarizing the fit
#'   and the pairwise likelihood ratio tests. It includes:
#'   \item{N.Par}{Number of estimated parameters (degrees of freedom).}
#'   \item{AIC}{Akaike Information Criterion.}
#'   \item{BIC}{Bayesian Information Criterion.}
#'   \item{LogLik}{Log-likelihood value.}
#'   \item{Test}{Description of the pairwise comparison (e.g., "1 vs 2").}
#'   \item{LR stat.}{Likelihood Ratio test statistic.}
#'   \item{Pr(>Chi)}{P-value from the Chi-squared test.}
#'   The table is printed using a method that mimics \code{print.anova}.
#'
#' @seealso \code{\link{gkwfit}}, \code{\link{logLik.gkwfit}}, \code{\link{AIC.gkwfit}}, \code{\link{BIC.gkwfit}}, \code{\link[stats]{anova}}
#'
#' @examples
#' \dontrun{
#' # Load required packages
#' library(ggplot2)
#' library(patchwork)
#' library(betareg)
#' # Generate data from GKw distribution
#' set.seed(2203)
#' n <- 1000
#' y <- rgkw(n, alpha = 2, beta = 3, gamma = 1.5, delta = 0.2, lambda = 1.2)
#'
#' # Fit models from GKw family respecting their parameter structures
#' # Full GKw model: 5 parameters (alpha, beta, gamma, delta, lambda)
#' fit_gkw <- gkwfit(data = y, family = "gkw", plot = FALSE)
#'
#' # BKw model: 4 parameters (alpha, beta, gamma, delta)
#' fit_bkw <- gkwfit(data = y, family = "bkw", plot = FALSE)
#'
#' # KKw model: 4 parameters (alpha, beta, delta, lambda)
#' fit_kkw <- gkwfit(data = y, family = "kkw", plot = FALSE)
#'
#' # EKw model: 3 parameters (alpha, beta, lambda)
#' fit_ekw <- gkwfit(data = y, family = "ekw", plot = FALSE)
#'
#' # Mc model: 3 parameters (gamma, delta, lambda)
#' fit_mc <- gkwfit(data = y, family = "mc", plot = FALSE)
#'
#' # Kw model: 2 parameters (alpha, beta)
#' fit_kw <- gkwfit(data = y, family = "kw", plot = FALSE)
#'
#' # Beta model: 2 parameters (gamma, delta)
#' fit_beta <- gkwfit(data = y, family = "beta", plot = FALSE)
#'
#' # Test 1: BKw vs GKw (testing lambda)
#' # H0: lambda=1 (BKw) vs H1: lambda!=1 (GKw)
#' cat("=== Testing BKw vs GKw (adding lambda parameter) ===\n")
#' test_bkw_gkw <- anova(fit_bkw, fit_gkw)
#' print(test_bkw_gkw)
#'
#' # Test 2: KKw vs GKw (testing gamma)
#' # H0: gamma=1 (KKw) vs H1: gamma!=1 (GKw)
#' cat("\n=== Testing KKw vs GKw (adding gamma parameter) ===\n")
#' test_kkw_gkw <- anova(fit_kkw, fit_gkw)
#' print(test_kkw_gkw)
#'
#' # Test 3: Kw vs EKw (testing lambda)
#' # H0: lambda=1 (Kw) vs H1: lambda!=1 (EKw)
#' cat("\n=== Testing Kw vs EKw (adding lambda parameter) ===\n")
#' test_kw_ekw <- anova(fit_kw, fit_ekw)
#' print(test_kw_ekw)
#'
#' # Test 4: Beta vs Mc (testing lambda)
#' # H0: lambda=1 (Beta) vs H1: lambda!=1 (Mc)
#' cat("\n=== Testing Beta vs Mc (adding lambda parameter) ===\n")
#' test_beta_mc <- anova(fit_beta, fit_mc)
#' print(test_beta_mc)
#'
#' # Visualize model comparison
#' # Create dataframe summarizing all models
#' models_df <- data.frame(
#'   Model = c("GKw", "BKw", "KKw", "EKw", "Mc", "Kw", "Beta"),
#'   Parameters = c(
#'     paste("alpha,beta,gamma,delta,lambda"),
#'     paste("alpha,beta,gamma,delta"),
#'     paste("alpha,beta,delta,lambda"),
#'     paste("alpha,beta,lambda"),
#'     paste("gamma,delta,lambda"),
#'     paste("alpha,beta"),
#'     paste("gamma,delta")
#'   ),
#'   Param_count = c(5, 4, 4, 3, 3, 2, 2),
#'   LogLik = c(
#'     as.numeric(logLik(fit_gkw)),
#'     as.numeric(logLik(fit_bkw)),
#'     as.numeric(logLik(fit_kkw)),
#'     as.numeric(logLik(fit_ekw)),
#'     as.numeric(logLik(fit_mc)),
#'     as.numeric(logLik(fit_kw)),
#'     as.numeric(logLik(fit_beta))
#'   ),
#'   AIC = c(
#'     fit_gkw$AIC,
#'     fit_bkw$AIC,
#'     fit_kkw$AIC,
#'     fit_ekw$AIC,
#'     fit_mc$AIC,
#'     fit_kw$AIC,
#'     fit_beta$AIC
#'   ),
#'   BIC = c(
#'     fit_gkw$BIC,
#'     fit_bkw$BIC,
#'     fit_kkw$BIC,
#'     fit_ekw$BIC,
#'     fit_mc$BIC,
#'     fit_kw$BIC,
#'     fit_beta$BIC
#'   )
#' )
#'
#' # Sort by AIC
#' models_df <- models_df[order(models_df$AIC), ]
#' print(models_df)
#'
#' # Create comprehensive visualization
#' # Plot showing model hierarchy and information criteria
#' p1 <- ggplot(models_df, aes(x = Param_count, y = LogLik, label = Model)) +
#'   geom_point(size = 3) +
#'   geom_text(vjust = -0.8) +
#'   labs(
#'     title = "Log-likelihood vs Model Complexity",
#'     x = "Number of Parameters",
#'     y = "Log-likelihood"
#'   ) +
#'   theme_minimal()
#'
#' # Create information criteria comparison
#' models_df_long <- tidyr::pivot_longer(
#'   models_df,
#'   cols = c("AIC", "BIC"),
#'   names_to = "Criterion",
#'   values_to = "Value"
#' )
#'
#' p2 <- ggplot(models_df_long, aes(x = reorder(Model, -Value), y = Value, fill = Criterion)) +
#'   geom_bar(stat = "identity", position = "dodge") +
#'   labs(
#'     title = "Information Criteria Comparison",
#'     x = "Model",
#'     y = "Value (lower is better)"
#'   ) +
#'   theme_minimal() +
#'   theme(axis.text.x = element_text(angle = 45, hjust = 1))
#'
#' # Print plots
#' print(p1 + p2)
#'
#' # ============================================================================
#' # Manual LR tests to demonstrate underlying calculations
#' # ============================================================================
#' # Function to perform manual likelihood ratio test
#' manual_lr_test <- function(model_restricted, model_full, alpha = 0.05) {
#'   # Extract log-likelihoods
#'   ll_restricted <- as.numeric(logLik(model_restricted))
#'   ll_full <- as.numeric(logLik(model_full))
#'
#'   # Calculate test statistic
#'   lr_stat <- -2 * (ll_restricted - ll_full)
#'
#'   # Calculate degrees of freedom (parameter difference)
#'   df <- length(coef(model_full)) - length(coef(model_restricted))
#'
#'   # Calculate p-value
#'   p_value <- pchisq(lr_stat, df = df, lower.tail = FALSE)
#'
#'   # Return results
#'   list(
#'     lr_statistic = lr_stat,
#'     df = df,
#'     p_value = p_value,
#'     significant = p_value < alpha,
#'     critical_value = qchisq(1 - alpha, df = df)
#'   )
#' }
#'
#' # Example: Manual LR test for BKw vs GKw (testing lambda parameter)
#' cat("\n=== Manual LR test: BKw vs GKw ===\n")
#' lr_bkw_gkw <- manual_lr_test(fit_bkw, fit_gkw)
#' cat("LR statistic:", lr_bkw_gkw$lr_statistic, "\n")
#' cat("Degrees of freedom:", lr_bkw_gkw$df, "\n")
#' cat("P-value:", lr_bkw_gkw$p_value, "\n")
#' cat("Critical value (alpha=0.05):", lr_bkw_gkw$critical_value, "\n")
#' cat("Decision:", ifelse(lr_bkw_gkw$significant,
#'   "Reject H0: Lambda is significantly different from 1",
#'   "Fail to reject H0: Lambda is not significantly different from 1"
#' ), "\n")
#'
#' # Example: Manual LR test for Kw vs EKw (testing lambda parameter)
#' cat("\n=== Manual LR test: Kw vs EKw ===\n")
#' lr_kw_ekw <- manual_lr_test(fit_kw, fit_ekw)
#' cat("LR statistic:", lr_kw_ekw$lr_statistic, "\n")
#' cat("Degrees of freedom:", lr_kw_ekw$df, "\n")
#' cat("P-value:", lr_kw_ekw$p_value, "\n")
#' cat("Critical value (alpha=0.05):", lr_kw_ekw$critical_value, "\n")
#' cat("Decision:", ifelse(lr_kw_ekw$significant,
#'   "Reject H0: Lambda is significantly different from 1",
#'   "Fail to reject H0: Lambda is not significantly different from 1"
#' ), "\n")
#'
#' # ============================================================================
#' # Real data application with correct model nesting
#' # ============================================================================
#' if (requireNamespace("betareg", quietly = TRUE)) {
#'   data("ReadingSkills", package = "betareg")
#'   y <- ReadingSkills$accuracy
#'
#'   # Fit models
#'   rs_gkw <- gkwfit(data = y, family = "gkw", plot = FALSE)
#'   rs_bkw <- gkwfit(data = y, family = "bkw", plot = FALSE)
#'   rs_kkw <- gkwfit(data = y, family = "kkw", plot = FALSE)
#'   rs_kw <- gkwfit(data = y, family = "kw", plot = FALSE)
#'   rs_beta <- gkwfit(data = y, family = "beta", plot = FALSE)
#'
#'   # Test nested models
#'   cat("\n=== Real data: Testing BKw vs GKw (adding lambda) ===\n")
#'   rs_test_bkw_gkw <- anova(rs_bkw, rs_gkw)
#'   print(rs_test_bkw_gkw)
#'
#'   cat("\n=== Real data: Testing Kw vs KKw (adding delta and lambda) ===\n")
#'   rs_test_kw_kkw <- anova(rs_kw, rs_kkw)
#'   print(rs_test_kw_kkw)
#'
#'   # Compare non-nested models with information criteria
#'   cat("\n=== Real data: Comparing non-nested Beta vs Kw ===\n")
#'   rs_compare_beta_kw <- anova(rs_beta, rs_kw)
#'   print(rs_compare_beta_kw)
#'
#'   # Summarize all models
#'   cat("\n=== Real data: Model comparison summary ===\n")
#'   models_rs <- c("GKw", "BKw", "KKw", "Kw", "Beta")
#'   aic_values <- c(rs_gkw$AIC, rs_bkw$AIC, rs_kkw$AIC, rs_kw$AIC, rs_beta$AIC)
#'   bic_values <- c(rs_gkw$BIC, rs_bkw$BIC, rs_kkw$BIC, rs_kw$BIC, rs_beta$BIC)
#'   loglik_values <- c(
#'     as.numeric(logLik(rs_gkw)),
#'     as.numeric(logLik(rs_bkw)),
#'     as.numeric(logLik(rs_kkw)),
#'     as.numeric(logLik(rs_kw)),
#'     as.numeric(logLik(rs_beta))
#'   )
#'
#'   df_rs <- data.frame(
#'     Model = models_rs,
#'     LogLik = loglik_values,
#'     AIC = aic_values,
#'     BIC = bic_values
#'   )
#'   df_rs <- df_rs[order(df_rs$AIC), ]
#'   print(df_rs)
#'
#'   # Determine the best model for the data
#'   best_model <- df_rs$Model[which.min(df_rs$AIC)]
#'   cat("\nBest model based on AIC:", best_model, "\n")
#' }
#' }
#'
#' @keywords models methods regression htest
#' @author Lopes, J. E.
#' @export
anova.gkwfit <- function(object, ...) {
  # --- Gather objects and perform initial validation ---
  objects <- list(object, ...)
  is_gkwfit <- sapply(objects, inherits, "gkwfit")
  if (!all(is_gkwfit)) {
    stop("All objects provided must inherit from class 'gkwfit'.")
  }
  nmodels <- length(objects)
  if (nmodels < 2) {
    stop("Need at least two models to compare using anova().")
  }

  # Use substitute to capture original variable names if possible
  object_names_call <- match.call()
  object_names <- vapply(as.list(object_names_call[-1L])[seq_len(nmodels)], deparse1, "")

  lls <- vector("list", nmodels)
  for (i in seq_len(nmodels)) {
    lls[[i]] <- tryCatch(logLik(objects[[i]]), error = function(e) {
      stop("Could not extract valid 'logLik' object for model ", object_names[i], ": ", e$message)
    })
    if (!inherits(lls[[i]], "logLik") || is.null(attr(lls[[i]], "df")) || is.null(attr(lls[[i]], "nobs"))) {
      stop(
        "Invalid 'logLik' object returned for model ", object_names[i],
        " (missing class or 'df'/'nobs' attribute)."
      )
    }
    if (attr(lls[[i]], "nobs") <= 0) {
      stop("Number of observations ('nobs') must be positive for model ", object_names[i])
    }
  }

  dfs <- sapply(lls, attr, "df")
  nobs_vals <- sapply(lls, attr, "nobs")
  loglik_vals <- sapply(lls, as.numeric)

  if (length(unique(nobs_vals)) > 1) {
    warning(
      "Models were not all fitted to the same number of observations.\n",
      "Likelihood ratio tests assume comparison on the same dataset."
    )
  }
  n_obs <- nobs_vals[1] # Use first for AIC/BIC calculation, assumes consistency or accepts warning

  mnames <- object_names
  if (anyDuplicated(mnames)) mnames <- make.unique(mnames, sep = ".")

  ord <- order(dfs)
  objects <- objects[ord]
  lls <- lls[ord]
  dfs <- dfs[ord]
  loglik_vals <- loglik_vals[ord]
  mnames <- mnames[ord]

  aics <- -2 * loglik_vals + 2 * dfs
  bics <- -2 * loglik_vals + log(n_obs) * dfs

  # --- Perform pairwise LRTs ---
  lr_stat <- rep(NA_real_, nmodels)
  pr_chi <- rep(NA_real_, nmodels)
  delta_df <- rep(NA_integer_, nmodels)
  test_desc <- rep("", nmodels)

  for (i in 2:nmodels) {
    df_diff <- dfs[i] - dfs[i - 1]

    # Check if degrees of freedom increased (necessary for meaningful LRT)
    if (df_diff <= 0) {
      warning("Model ", mnames[i], " (", dfs[i], " df)",
        " does not have more parameters than model ", mnames[i - 1], " (", dfs[i - 1], " df).",
        "\nLRT requires models to be nested with increasing complexity.",
        call. = FALSE
      )
      next # Skip LRT calculation for this pair
    }

    # Check if log-likelihood increased (as expected for nested models)
    loglik_diff <- loglik_vals[i] - loglik_vals[i - 1]
    if (loglik_diff < -1e-6) { # Allow for small numerical errors
      warning("Log-likelihood decreased unexpectedly for the more complex model (",
        mnames[i], " vs ", mnames[i - 1], ").\nCheck model convergence or nesting.",
        call. = FALSE
      )
      lr <- NA_real_ # LRT statistic is not meaningful
    } else {
      lr <- max(0, 2 * loglik_diff) # Ensure non-negative LR statistic
    }

    delta_df[i] <- df_diff
    lr_stat[i] <- lr
    # Calculate p-value only if LR is not NA
    pr_chi[i] <- if (!is.na(lr)) stats::pchisq(lr, df_diff, lower.tail = FALSE) else NA_real_
    test_desc[i] <- paste(i - 1, "vs", i)
  }

  result_table <- data.frame(
    `N.Par` = dfs,
    AIC = aics,
    BIC = bics,
    LogLik = loglik_vals,
    Test = test_desc,
    `LR stat.` = lr_stat,
    `Pr(>Chi)` = pr_chi,
    row.names = mnames,
    check.names = FALSE # Prevent conversion of Pr(>Chi) etc.
  )

  heading <- c("Likelihood Ratio Test Comparison\n")
  attr(result_table, "heading") <- heading
  class(result_table) <- c("anova.gkwfit", "anova", "data.frame")

  return(result_table)
}

#' @title S3 method for class 'anova.gkwfit'
#' @param x An object of class \code{"anova.gkwfit"}.
#' @param digits Minimum number of significant digits to print.
#' @param signif.stars Logical; if TRUE, add significance stars.
#' @param ... Other args passed to
#' @return An object of class \code{"anova.gkwfit"} and prints a summary.
#' @export
print.anova.gkwfit <- function(x, digits = max(getOption("digits") - 2L, 3L),
                               signif.stars = getOption("show.signif.stars", TRUE), ...) {
  if (!inherits(x, "anova")) stop("x not of class anova") # Basic check

  # Print header if it exists
  if (!is.null(heading <- attr(x, "heading"))) {
    cat(heading, "\n")
  }

  # Use printCoefmat for standard ANOVA table formatting
  # Identify the P-value column index
  pval_col_idx <- which(colnames(x) == "Pr(>Chi)")
  # Identify the test statistic column index
  test_col_idx <- which(colnames(x) == "LR stat.")
  if (length(pval_col_idx) == 0) pval_col_idx <- NULL # Handle if column name changes
  if (length(test_col_idx) == 0) test_col_idx <- NULL

  # stats:::printCoefmat is not exported, use stats::printCoefmat directly
  # We need to tell it which column contains p-values
  stats::printCoefmat(x,
    digits = digits, signif.stars = signif.stars,
    has.Pvalue = !is.null(pval_col_idx), # Does it have P-values?
    P.values = !is.null(pval_col_idx), # Are they P-values? (for formatting)
    cs.ind = integer(), # No specific estimate/SE cols
    tst.ind = test_col_idx, # Which column has test stats?
    zap.ind = integer(),
    na.print = "", ...
  ) # Don't print NA for test on first line
  cat("\n")
  invisible(x)
}
Any scripts or data that you put into this service are public.
gkwreg documentation built on April 16, 2025, 1:10 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
gkwreg
Generalized Kumaraswamy Regression Models for Bounded Data

R/gkwfit.R
In gkwreg: Generalized Kumaraswamy Regression Models for Bounded Data

Defines functions .fit_tmb .determine_start_values .validate_parameters .validate_data .get_default_start .get_default_fixed .get_family_param_info .family_to_code

Documented in .determine_start_values .family_to_code .fit_tmb .get_default_fixed .get_default_start .get_family_param_info .validate_data .validate_parameters

Try the gkwreg package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gkwreg Generalized Kumaraswamy Regression Models for Bounded Data

R/gkwfit.R In gkwreg: Generalized Kumaraswamy Regression Models for Bounded Data

Defines functions .fit_tmb .determine_start_values .validate_parameters .validate_data .get_default_start .get_default_fixed .get_family_param_info .family_to_code

Documented in .determine_start_values .family_to_code .fit_tmb .get_default_fixed .get_default_start .get_family_param_info .validate_data .validate_parameters

Try the gkwreg package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gkwreg
Generalized Kumaraswamy Regression Models for Bounded Data

R/gkwfit.R
In gkwreg: Generalized Kumaraswamy Regression Models for Bounded Data