R/lrt_homogeneity_inad.R

Defines functions print.homogeneity_tests_inad print.test_homogeneity_inad run_homogeneity_tests_inad .count_params_homogeneity .fit_inad_heterogeneous test_homogeneity_inad

Documented in .count_params_homogeneity .fit_inad_heterogeneous print.homogeneity_tests_inad print.test_homogeneity_inad run_homogeneity_tests_inad test_homogeneity_inad

# File: R/test_homogeneity_inad.R
# Likelihood ratio tests for homogeneity across groups in INAD models
# Based on Section 3.7 of Li & Zimmerman (2026) Biostatistics

#' Likelihood ratio test for homogeneity across groups (INAD data)
#'
#' Tests hypotheses about parameter equality across treatment or grouping
#' factors in integer-valued antedependence models. Implements the homogeneity
#' testing framework from Section 3.7 of Li & Zimmerman (2026).
#'
#' @param y Integer matrix with n_subjects rows and n_time columns.
#' @param blocks Integer vector of length n_subjects specifying group membership.
#' @param order Antedependence order (0, 1, or 2).
#' @param thinning Thinning operator: "binom", "pois", or "nbinom".
#' @param innovation Innovation distribution: "pois", "bell", or "nbinom".
#' @param test Type of homogeneity test:
#'   \itemize{
#'     \item \code{"all"}: Tests M1 (pooled) vs M3 (fully heterogeneous)
#'     \item \code{"mean"}: Tests M1 (pooled) vs M2 (shared dependence, different means)
#'     \item \code{"dependence"}: Tests M2 (INADFE) vs M3 (fully heterogeneous)
#'   }
#' @param fit_pooled Optional pre-computed pooled fit (M1).
#' @param fit_inadfe Optional pre-computed INADFE fit (M2).
#' @param fit_hetero Optional pre-computed heterogeneous fit (M3).
#' @param ... Additional arguments passed to \code{fit_inad}.
#'
#' @return A list with class \code{"test_homogeneity_inad"} containing:
#' \describe{
#'   \item{method}{Inference method used (\code{"lrt"}).}
#'   \item{statistic}{Test statistic value}
#'   \item{lrt_stat}{Likelihood ratio test statistic}
#'   \item{df}{Degrees of freedom}
#'   \item{p_value}{P-value from chi-square distribution}
#'   \item{test}{Type of test performed}
#'   \item{fit_null}{Fitted model under H0}
#'   \item{fit_alt}{Fitted model under H1}
#'   \item{bic_null}{BIC under H0}
#'   \item{bic_alt}{BIC under H1}
#'   \item{bic_selected}{Which model BIC prefers}
#'   \item{table}{Summary data frame}
#' }
#'
#' @details
#' The function supports three nested model comparisons as described in the paper:
#'
#' \strong{M1 (Pooled)}: All parameters are common across groups. This corresponds
#' to fitting \code{fit_inad(y, blocks = NULL)}.
#'
#' \strong{M2 (INADFE)}: The thinning parameters \eqn{\alpha} are shared across
#' groups, but innovation means differ via block effects \eqn{\tau}. This is the
#' standard INADFE model fitted via \code{fit_inad(y, blocks = blocks)}.
#'
#' \strong{M3 (Fully Heterogeneous)}: Both \eqn{\alpha} and \eqn{\theta}
#' parameters can differ across groups. This is fitted by running separate
#' \code{fit_inad} calls for each group.
#'
#' The three test types correspond to:
#' \itemize{
#'   \item \code{"all"}: H0: M1 vs H1: M3 (complete homogeneity vs complete heterogeneity)
#'   \item \code{"mean"}: H0: M1 vs H1: M2 (test for group differences in means only)
#'   \item \code{"dependence"}: H0: M2 vs H1: M3 (test for group differences in dependence)
#' }
#'
#' Degrees of freedom are computed as the difference in free parameters between
#' the null and alternative models.
#'
#' @references
#' Li, C. and Zimmerman, D.L. (2026). Integer-valued antedependence models for
#' longitudinal count data. \emph{Biostatistics}. Section 3.7.
#'
#' @seealso \code{\link{fit_inad}}, \code{\link{test_order_inad}},
#'   \code{\link{test_stationarity_inad}}
#'
#' @examples
#' \donttest{
#' data("bolus_inad")
#' y <- bolus_inad$y
#' blocks <- bolus_inad$blocks
#'
#' # Test for any group differences (M1 vs M3)
#' test_all <- test_homogeneity_inad(y, blocks, order = 1,
#'                                   thinning = "nbinom", innovation = "bell",
#'                                   test = "all")
#' print(test_all)
#'
#' # Test only for mean differences (M1 vs M2)
#' test_mean <- test_homogeneity_inad(y, blocks, order = 1,
#'                                    thinning = "nbinom", innovation = "bell",
#'                                    test = "mean")
#' print(test_mean)
#'
#' # Test for dependence differences given different means (M2 vs M3)
#' test_dep <- test_homogeneity_inad(y, blocks, order = 1,
#'                                   thinning = "nbinom", innovation = "bell",
#'                                   test = "dependence")
#' print(test_dep)
#' }
#'
#' @importFrom stats pchisq
#' @export
test_homogeneity_inad <- function(y, blocks, order = 1,
                                  thinning = "binom", innovation = "pois",
                                  test = c("all", "mean", "dependence"),
                                  fit_pooled = NULL, fit_inadfe = NULL,
                                  fit_hetero = NULL, ...) {
  
  # Validate inputs
  if (!is.matrix(y)) y <- as.matrix(y)
  y_obs <- y[!is.na(y)]
  if (any(y_obs < 0) || any(y_obs != floor(y_obs))) {
    stop("y must contain non-negative integers")
  }
  
  n_subjects <- nrow(y)
  n_time <- ncol(y)
  
  # Validate blocks
  if (missing(blocks) || is.null(blocks)) {
    stop("blocks must be provided for homogeneity testing")
  }
  blocks <- as.integer(blocks)
  if (length(blocks) != n_subjects) {
    stop("blocks must have length equal to nrow(y)")
  }
  
  unique_blocks <- sort(unique(blocks))
  n_blocks <- length(unique_blocks)
  
  if (n_blocks < 2) {
    stop("Need at least 2 groups for homogeneity testing")
  }
  
  # Normalize block labels to 1, 2, ..., G
  block_map <- setNames(seq_along(unique_blocks), unique_blocks)
  blocks_norm <- as.integer(block_map[as.character(blocks)])
  
  # Validate other arguments
  test <- match.arg(test)
  thinning <- match.arg(thinning, c("binom", "pois", "nbinom"))
  innovation <- match.arg(innovation, c("pois", "bell", "nbinom"))
  order <- as.integer(order)
  if (!order %in% c(0L, 1L, 2L)) stop("order must be 0, 1, or 2")
  fit_args <- list(...)
  if (anyNA(y) && is.null(fit_args$na_action)) {
    fit_args$na_action <- "marginalize"
  }
  
  # Group sizes
  group_sizes <- table(blocks_norm)
  group_indices <- lapply(1:n_blocks, function(g) which(blocks_norm == g))
  
  # Fit required models
  # M1: Pooled model (no block effects)
  if (is.null(fit_pooled) && test %in% c("all", "mean")) {
    fit_pooled <- do.call(
      fit_inad,
      c(
        list(y = y, order = order, thinning = thinning, innovation = innovation, blocks = NULL),
        fit_args
      )
    )
  }
  
  # M2: INADFE model (shared dependence, different means via tau)
  if (is.null(fit_inadfe) && test %in% c("mean", "dependence")) {
    fit_inadfe <- do.call(
      fit_inad,
      c(
        list(y = y, order = order, thinning = thinning, innovation = innovation, blocks = blocks_norm),
        fit_args
      )
    )
  }
  
  # M3: Fully heterogeneous (separate fits per group)
  if (is.null(fit_hetero) && test %in% c("all", "dependence")) {
    fit_hetero <- do.call(
      .fit_inad_heterogeneous,
      c(
        list(
          y = y,
          blocks = blocks_norm,
          n_blocks = n_blocks,
          group_indices = group_indices,
          order = order,
          thinning = thinning,
          innovation = innovation
        ),
        fit_args
      )
    )
  }
  
  # Determine null and alternative models based on test type
  if (test == "all") {
    fit_null <- fit_pooled
    fit_alt <- fit_hetero
    null_name <- "M1 (Pooled)"
    alt_name <- "M3 (Heterogeneous)"
  } else if (test == "mean") {
    fit_null <- fit_pooled
    fit_alt <- fit_inadfe
    null_name <- "M1 (Pooled)"
    alt_name <- "M2 (INADFE)"
  } else {  # test == "dependence"
    fit_null <- fit_inadfe
    fit_alt <- fit_hetero
    null_name <- "M2 (INADFE)"
    alt_name <- "M3 (Heterogeneous)"
  }
  
  # Compute LRT statistic
  logL_null <- fit_null$log_l
  logL_alt <- fit_alt$log_l
  lrt_stat <- 2 * (logL_alt - logL_null)
  
  if (lrt_stat < 0) {
    warning("LRT statistic is negative; setting to 0")
    lrt_stat <- 0
  }
  
  # Count parameters
  n_params_null <- .count_params_homogeneity(fit_null, test, n_blocks, 
                                              order, n_time, thinning, innovation,
                                              is_null = TRUE)
  n_params_alt <- .count_params_homogeneity(fit_alt, test, n_blocks, 
                                             order, n_time, thinning, innovation,
                                             is_null = FALSE)
  
  df <- n_params_alt - n_params_null
  
  if (df <= 0) {
    warning("Degrees of freedom <= 0; p-value may not be meaningful")
    df <- max(1, df)
  }
  
  # P-value from chi-square distribution
  p_value <- pchisq(lrt_stat, df = df, lower.tail = FALSE)
  
  # Compute BIC
  bic_null <- -2 * logL_null + n_params_null * log(n_subjects)
  bic_alt <- -2 * logL_alt + n_params_alt * log(n_subjects)
  bic_selected <- if (bic_null <= bic_alt) "null" else "alt"
  
  # Summary table
  table_df <- data.frame(
    model = c(null_name, alt_name),
    logLik = c(logL_null, logL_alt),
    n_params = c(n_params_null, n_params_alt),
    BIC = c(bic_null, bic_alt),
    stringsAsFactors = FALSE
  )
  
  # Assemble output
  result <- list(
    method = "lrt",
    statistic = lrt_stat,
    lrt_stat = lrt_stat,
    df = df,
    p_value = p_value,
    test = test,
    fit_null = fit_null,
    fit_alt = fit_alt,
    bic_null = bic_null,
    bic_alt = bic_alt,
    bic_selected = bic_selected,
    table = table_df,
    settings = list(
      order = order,
      thinning = thinning,
      innovation = innovation,
      n_subjects = n_subjects,
      n_time = n_time,
      n_blocks = n_blocks,
      group_sizes = as.integer(group_sizes)
    )
  )
  
  class(result) <- "test_homogeneity_inad"
  result
}


#' Fit fully heterogeneous INAD model
#'
#' Internal function that fits separate INAD models for each group and
#' combines the log-likelihoods.
#'
#' @param y Data matrix
#' @param blocks Normalized block vector
#' @param n_blocks Number of groups
#' @param group_indices List of subject indices per group
#' @param order AD order
#' @param thinning Thinning type
#' @param innovation Innovation type
#' @param ... Additional arguments passed to fit_inad
#'
#' @return A list mimicking fit_inad output with combined log-likelihood
#'
#' @keywords internal
.fit_inad_heterogeneous <- function(y, blocks, n_blocks, group_indices,
                                     order, thinning, innovation, ...) {
  
  fits <- vector("list", n_blocks)
  total_log_l <- 0
  
  for (g in seq_len(n_blocks)) {
    y_g <- y[group_indices[[g]], , drop = FALSE]
    fits[[g]] <- fit_inad(y_g, order = order, thinning = thinning,
                          innovation = innovation, blocks = NULL, ...)
    total_log_l <- total_log_l + fits[[g]]$log_l
  }
  
  names(fits) <- paste0("group_", seq_len(n_blocks))
  
  # Return a list with combined info
  list(
    log_l = total_log_l,
    fits_by_group = fits,
    settings = list(
      order = order,
      thinning = thinning,
      innovation = innovation,
      n_blocks = n_blocks,
      heterogeneous = TRUE
    )
  )
}


#' Count parameters for homogeneity test
#'
#' @param fit Fitted model
#' @param test Test type
#' @param n_blocks Number of groups
#' @param order AD order
#' @param n_time Number of time points
#' @param thinning Thinning type
#' @param innovation Innovation type
#' @param is_null Whether this is the null model
#'
#' @return Number of free parameters
#'
#' @keywords internal
.count_params_homogeneity <- function(fit, test, n_blocks, order, n_time,
                                       thinning, innovation, is_null) {

  # Base parameter counts (per group, excluding NB innovation size)
  n_alpha <- switch(as.character(order),
                    "0" = 0,
                    "1" = n_time - 1,
                    "2" = 2 * (n_time - 2))
  n_theta <- n_time

  # NB innovation size params: count directly from the fit so this matches
  # what fit_inad() actually produced (length 1 or n_time per group). For
  # heterogeneous (M3) fits, sum across per-group fits.
  count_nb <- function(f) {
    if (innovation != "nbinom") return(0L)
    if (!is.null(f$nb_inno_size)) return(length(f$nb_inno_size))
    if (!is.null(f$fits_by_group)) {
      return(sum(vapply(f$fits_by_group,
                        function(g) length(g$nb_inno_size), integer(1))))
    }
    n_time  # conservative fallback
  }

  n_nb_inno <- count_nb(fit)
  k_single  <- n_alpha + n_theta + n_nb_inno

  if (test == "all") {
    if (is_null) {
      # M1: Pooled - single set of parameters (one shared NB size vector)
      return(k_single)
    } else {
      # M3: Heterogeneous - G sets of alpha+theta, NB sizes already counted
      # across groups via count_nb(fit)
      return(n_blocks * (n_alpha + n_theta) + n_nb_inno)
    }
  } else if (test == "mean") {
    if (is_null) {
      # M1: Pooled
      return(k_single)
    } else {
      # M2: INADFE - shared alpha, theta, and NB size; group-specific tau
      return(n_alpha + n_theta + (n_blocks - 1) + n_nb_inno)
    }
  } else {  # test == "dependence"
    if (is_null) {
      # M2: INADFE
      return(n_alpha + n_theta + (n_blocks - 1) + n_nb_inno)
    } else {
      # M3: Heterogeneous
      return(n_blocks * (n_alpha + n_theta) + n_nb_inno)
    }
  }
}


#' Run all homogeneity tests for INAD
#'
#' Convenience function to run all three homogeneity tests at once and
#' return a summary.
#'
#' @inheritParams test_homogeneity_inad
#'
#' @return A list with class \code{"homogeneity_tests_inad"} containing results
#'   for all three tests and a summary table.
#'
#' @examples
#' \donttest{
#' data("bolus_inad")
#' tests <- run_homogeneity_tests_inad(bolus_inad$y, bolus_inad$blocks,
#'                                      order = 1, thinning = "nbinom",
#'                                      innovation = "bell")
#' print(tests)
#' }
#'
#' @export
run_homogeneity_tests_inad <- function(y, blocks, order = 1,
                                        thinning = "binom", innovation = "pois",
                                        ...) {
  
  # Fit all three models once
  if (!is.matrix(y)) y <- as.matrix(y)
  n_subjects <- nrow(y)
  n_time <- ncol(y)
  fit_args <- list(...)
  if (anyNA(y) && is.null(fit_args$na_action)) {
    fit_args$na_action <- "marginalize"
  }
  
  blocks <- as.integer(blocks)
  unique_blocks <- sort(unique(blocks))
  n_blocks <- length(unique_blocks)
  block_map <- setNames(seq_along(unique_blocks), unique_blocks)
  blocks_norm <- as.integer(block_map[as.character(blocks)])
  group_indices <- lapply(1:n_blocks, function(g) which(blocks_norm == g))
  
  # Fit M1 (Pooled)
  fit_pooled <- do.call(
    fit_inad,
    c(
      list(y = y, order = order, thinning = thinning, innovation = innovation, blocks = NULL),
      fit_args
    )
  )
  
  # Fit M2 (INADFE)
  fit_inadfe <- do.call(
    fit_inad,
    c(
      list(y = y, order = order, thinning = thinning, innovation = innovation, blocks = blocks_norm),
      fit_args
    )
  )
  
  # Fit M3 (Heterogeneous)
  fit_hetero <- do.call(
    .fit_inad_heterogeneous,
    c(
      list(
        y = y,
        blocks = blocks_norm,
        n_blocks = n_blocks,
        group_indices = group_indices,
        order = order,
        thinning = thinning,
        innovation = innovation
      ),
      fit_args
    )
  )
  
  # Run all three tests
  test_all <- test_homogeneity_inad(y, blocks, order = order,
                                    thinning = thinning, innovation = innovation,
                                    test = "all",
                                    fit_pooled = fit_pooled,
                                    fit_hetero = fit_hetero,
                                    na_action = fit_args$na_action)
  
  test_mean <- test_homogeneity_inad(y, blocks, order = order,
                                     thinning = thinning, innovation = innovation,
                                     test = "mean",
                                     fit_pooled = fit_pooled,
                                     fit_inadfe = fit_inadfe,
                                     na_action = fit_args$na_action)
  
  test_dep <- test_homogeneity_inad(y, blocks, order = order,
                                    thinning = thinning, innovation = innovation,
                                    test = "dependence",
                                    fit_inadfe = fit_inadfe,
                                    fit_hetero = fit_hetero,
                                    na_action = fit_args$na_action)
  
  # Create summary table
  summary_table <- data.frame(
    test = c("M1 vs M3 (all)", "M1 vs M2 (mean)", "M2 vs M3 (dependence)"),
    method = c(test_all$method, test_mean$method, test_dep$method),
    statistic = c(test_all$statistic, test_mean$statistic, test_dep$statistic),
    lrt_stat = c(test_all$lrt_stat, test_mean$lrt_stat, test_dep$lrt_stat),
    df = c(test_all$df, test_mean$df, test_dep$df),
    p_value = c(test_all$p_value, test_mean$p_value, test_dep$p_value),
    bic_selected = c(test_all$bic_selected, test_mean$bic_selected, test_dep$bic_selected),
    stringsAsFactors = FALSE
  )
  
  # Model comparison table
  model_table <- data.frame(
    model = c("M1 (Pooled)", "M2 (INADFE)", "M3 (Heterogeneous)"),
    logLik = c(fit_pooled$log_l, fit_inadfe$log_l, fit_hetero$log_l),
    n_params = c(
      .count_params_homogeneity(fit_pooled, "mean", n_blocks, order, n_time, 
                                 thinning, innovation, is_null = TRUE),
      .count_params_homogeneity(fit_inadfe, "dependence", n_blocks, order, n_time, 
                                 thinning, innovation, is_null = TRUE),
      .count_params_homogeneity(fit_hetero, "all", n_blocks, order, n_time, 
                                 thinning, innovation, is_null = FALSE)
    ),
    stringsAsFactors = FALSE
  )
  model_table$BIC <- -2 * model_table$logLik + model_table$n_params * log(n_subjects)
  
  result <- list(
    method = "lrt",
    test_all = test_all,
    test_mean = test_mean,
    test_dependence = test_dep,
    summary_table = summary_table,
    model_table = model_table,
    fits = list(
      pooled = fit_pooled,
      inadfe = fit_inadfe,
      heterogeneous = fit_hetero
    ),
    best_model_bic = model_table$model[which.min(model_table$BIC)],
    settings = list(
      order = order,
      thinning = thinning,
      innovation = innovation,
      n_subjects = n_subjects,
      n_time = n_time,
      n_blocks = n_blocks
    )
  )
  
  class(result) <- "homogeneity_tests_inad"
  result
}


#' Print method for test_homogeneity_inad
#'
#' @param x Object of class \code{test_homogeneity_inad}
#' @param digits Number of digits for printing
#' @param ... Unused
#' @return Invisibly returns \code{x}.
#' @export
print.test_homogeneity_inad <- function(x, digits = 4, ...) {
  stat <- if (!is.null(x$statistic)) x$statistic else x$lrt_stat
  cat("\nLikelihood Ratio Test for Homogeneity (INAD)\n")
  cat("=============================================\n\n")
  
  test_desc <- switch(x$test,
    "all" = "H0: M1 (Pooled) vs H1: M3 (Heterogeneous)",
    "mean" = "H0: M1 (Pooled) vs H1: M2 (INADFE - different means)",
    "dependence" = "H0: M2 (INADFE) vs H1: M3 (Heterogeneous - different dependence)"
  )
  cat(test_desc, "\n\n")
  
  cat("Number of groups:", x$settings$n_blocks, "\n")
  cat("Group sizes:", paste(x$settings$group_sizes, collapse = ", "), "\n")
  cat("Order:", x$settings$order, "\n")
  cat("Thinning:", x$settings$thinning, " Innovation:", x$settings$innovation, "\n\n")
  
  print(x$table, row.names = FALSE, digits = digits)
  
  cat("\nLRT statistic:", round(stat, digits), "\n")
  cat("Degrees of freedom:", x$df, "\n")
  cat("P-value:", format.pval(x$p_value, digits = digits), "\n")
  cat("BIC selects:", ifelse(x$bic_selected == "null", 
                              x$table$model[1], x$table$model[2]), "\n")
  
  invisible(x)
}


#' Print method for homogeneity_tests_inad
#'
#' @param x Object of class \code{homogeneity_tests_inad}
#' @param digits Number of digits for printing
#' @param ... Unused
#' @return Invisibly returns \code{x}.
#' @export
print.homogeneity_tests_inad <- function(x, digits = 4, ...) {
  cat("\nHomogeneity Tests for INAD Models\n")
  cat("==================================\n\n")
  
  cat("Settings: Order", x$settings$order, ",", 
      x$settings$thinning, "thinning,", x$settings$innovation, "innovations\n")
  cat("Groups:", x$settings$n_blocks, "  N:", x$settings$n_subjects, 
      "  T:", x$settings$n_time, "\n\n")
  
  cat("Model Comparison:\n")
  print(x$model_table, row.names = FALSE, digits = digits)
  cat("\n")
  
  cat("Test Results:\n")
  print(x$summary_table, row.names = FALSE, digits = digits)
  cat("\n")
  
  cat("Best model by BIC:", x$best_model_bic, "\n")
  
  invisible(x)
}

Try the antedep package in your browser

Any scripts or data that you put into this service are public.

antedep documentation built on April 25, 2026, 1:06 a.m.