R/jsd_discrete_ci.R

Defines functions jsd_discrete_ci

Documented in jsd_discrete_ci

#' Bootstrap confidence interval for discrete JSD
#'
#' @param x Vector for group 1.
#' @param y Vector for group 2.
#' @param B Number of bootstrap replicates.
#' @param conf_level Confidence level. Defaults to 0.95.
#' @param support Optional support values.
#' @param base Logarithm base. Defaults to 2. Use `exp(1)` for nats.
#' @param eps Small constant for numerical stability.
#' @param add_smoothing Logical; add 1 to each cell count?
#' @param seed Optional random seed.
#' @param na_rm Logical; remove missing values?
#' @param na_rm_failed Logical; drop failed bootstrap draws when summarizing?
#'
#' @return An object of class `"jsd_ci"`.
#' @export
jsd_discrete_ci <- function(x, y,
                            B = 1000,
                            conf_level = 0.95,
                            support = NULL,
                            base = 2,
                            eps = 1e-12,
                            add_smoothing = FALSE,
                            seed = NULL,
                            na_rm = TRUE,
                            na_rm_failed = TRUE) {
  check_base(base)
  check_conf_level(conf_level)

  cleaned <- validate_xy(x, y, min_n = 2, na_rm = na_rm, finite_only = FALSE)
  x <- cleaned$x
  y <- cleaned$y

  if (!is.null(seed)) {
    set.seed(seed)
  }

  support <- make_support(x, y, support = support)

  est_obj <- jsd_discrete(
    x, y,
    support = support,
    base = base,
    eps = eps,
    add_smoothing = add_smoothing,
    na_rm = FALSE
  )

  estimate <- est_obj$estimate
  n <- length(x)
  m <- length(y)

  boot_values <- rep(NA_real_, B)

  for (b in seq_len(B)) {
    xb <- sample(x, size = n, replace = TRUE)
    yb <- sample(y, size = m, replace = TRUE)

    boot_values[b] <- tryCatch(
      jsd_discrete(
        xb, yb,
        support = support,
        base = base,
        eps = eps,
        add_smoothing = add_smoothing,
        na_rm = FALSE
      )$estimate,
      error = function(e) NA_real_
    )
  }

  boot_valid <- if (na_rm_failed) boot_values[is.finite(boot_values)] else boot_values

  if (sum(is.finite(boot_valid)) < 10) {
    warning("Too few valid bootstrap replicates.")
  }

  alpha <- 1 - conf_level
  conf_int <- stats::quantile(
    boot_valid,
    probs = c(alpha / 2, 1 - alpha / 2),
    na.rm = TRUE,
    names = FALSE
  )

  out <- list(
    estimate = estimate,
    conf_int = c(lower = conf_int[1], upper = conf_int[2]),
    conf_level = conf_level,
    type = "discrete",
    method = "PMF bootstrap",
    base = base,
    n_x = length(x),
    n_y = length(y),
    support = support,
    boot_values = boot_values,
    boot_valid_n = sum(is.finite(boot_values)),
    boot_mean = mean(boot_valid, na.rm = TRUE),
    boot_median = stats::median(boot_valid, na.rm = TRUE),
    boot_se = stats::sd(boot_valid, na.rm = TRUE),
    boot_bias = mean(boot_valid, na.rm = TRUE) - estimate,
    settings = list(
      B = B,
      eps = eps,
      add_smoothing = add_smoothing
    )
  )

  class(out) <- c("jsd_ci", "list")
  out
}

Try the jsdtools package in your browser

Any scripts or data that you put into this service are public.

jsdtools documentation built on March 31, 2026, 1:06 a.m.