R/beta_lerouxcar.R

Defines functions beta_lerouxcar

Documented in beta_lerouxcar

#' @title Small Area Estimation using Hierarchical Bayesian Method under Spatial Beta-Leroux CAR Model
#'
#' @description This function gives small area estimator under Spatial Leroux CAR Model. It is implemented to a variable of interest (y) that is assumed to follow a Beta Distribution. The range of data is \eqn{0 < y < 1}.
#'
#' @param formula Formula that describes the fitted model.
#' @param proxmat \eqn{N \times N} spatial binary adjacency matrix with values \code{0} or \code{1} representing the neighborhood structure between areas. The diagonal elements must be \code{0}.
#' @param data The data frame.
#' @param iter.update Number of updates performed during Gibbs sampling. Default is \code{3}.
#' @param iter.mcmc Total number of MCMC iterations per chain. Default is \code{2000}.
#' @param thin Thinning rate for MCMC sampling. Must be a positive integer. Default is \code{1}.
#' @param burn.in Number of burn-in iterations discarded from each MCMC chain. Default is \code{1000}.
#' @param chains Number of parallel MCMC chains. Default is \code{2}.
#' @param n.adapt Number of iterations used for the adaptation phase in JAGS. Default is \code{1000}.
#' @param coef Optional vector containing the mean of the prior distribution of the regression model coefficients.
#' @param var.coef Optional vector containing the variances of the prior distribution of the regression model coefficients.
#' @param tau.v Initial value or shape for the random effect precision. Default is \code{1}.
#' @param seed An integer seed for the random number generator to ensure reproducibility. Default is \code{123}.
#' @param quiet Logical; if \code{TRUE}, suppresses the JAGS terminal output. Default is \code{FALSE}.
#' @param plot Logical; if \code{TRUE}, generates MCMC diagnostic trace, autocorrelation, and density plots. Default is \code{TRUE}.
#' @param keep.fit Logical; if \code{TRUE}, keeps the raw MCMC \code{coda} samples object in the output list. Default is \code{FALSE}.
#'
#' @return This function returns a list with the following objects:
#' \describe{
#'   \item{est}{A dataframe containing the posterior mean estimates, posterior standard deviations, and 95\% credible intervals of the small area means estimated using the Hierarchical Bayesian method.}
#'   \item{randeff}{A dataframe containing the posterior mean estimates, posterior standard deviations, and 95\% credible intervals of the area-specific random effects \eqn{(v)}.}
#'   \item{refvar}{A dataframe containing the posterior mean estimates, posterior standard deviations, and 95\% credible intervals of the area-specific random effect variances \eqn{(a.var)}.}
#'   \item{coefficient}{A dataframe containing the posterior mean estimates, posterior standard deviations, 95\% credible intervals, Rhat convergence diagnostics, and Effective Sample Sizes (ESS) for the regression coefficients \eqn{(\beta)}, the spatial autoregressive parameter \eqn{(\rho)}, and the global precision parameter \eqn{(\phi)}.}
#' }
#'
#' @examples
#' # Load dataset and proximity matrix
#' data(databeta)
#' data(adjacency_mat)
#'
#' \donttest{
#' # Fit the Spatial Beta-Leroux CAR model
#' result <- beta_lerouxcar(
#'   formula = y ~ x1 + x2,
#'   proxmat = adjacency_mat,
#'   data = databeta
#' )
#'
#' # View the estimation results
#' # 1. Small Area Estimates
#' result$est
#' # 2. Estimated area-specific random effects
#' result$randeff
#' # 3. Estimated variance of the random effects
#' result$refvar
#' # 4. Estimated regression coefficients, spatial, and precision parameters
#' result$coefficient
#' }
#'
#' @import rjags
#' @import coda
#' @import stats
#' @import grDevices
#' @import graphics
#'
#' @export beta_lerouxcar
beta_lerouxcar <- function(formula, proxmat, data,
                          iter.update = 3, iter.mcmc = 2000,
                          thin = 1, burn.in = 1000, chains = 2, n.adapt = 1000,
                          coef = NULL, var.coef = NULL, tau.v = 1,
                          seed = 123, quiet = FALSE, plot = TRUE, keep.fit = FALSE) {

  result <- list(est = NA, randeff = NA, refvar = NA, coefficient = NA)

  formuladata <- stats::model.frame(formula, data, na.action = NULL)
  y <- formuladata[, 1, drop = FALSE]

  if (ncol(formuladata) < 2) stop("Formula must include response and at least 1 predictor.")
  if (any(is.na(formuladata[, -1, drop = FALSE]))) stop("Auxiliary variables contain NA values.")

  y_all <- as.numeric(y[, 1])
  N     <- length(y_all)
  if (N < 2) stop("Need at least 2 areas.")
  if (any(y_all[!is.na(y_all)] <= 0) || any(y_all[!is.na(y_all)] >= 1)) {
    stop("Response variable must satisfy 0 < y < 1.")
  }

  if (iter.update < 3) stop("The number of iteration updates must be at least 3.")

  xmat <- stats::model.matrix(formula, data = formuladata)
  X    <- as.matrix(xmat[, -1, drop = FALSE])
  P    <- ncol(X)
  nvar <- P + 1

  W <- as.matrix(proxmat)
  if (any(is.na(W))) stop("Proximity matrix contains NA.")
  if (nrow(W) != N || ncol(W) != N) stop("Proximity matrix must be N x N.")

  D_mat <- diag(rowSums(W))
  R_mat <- D_mat - W
  I     <- diag(1, N)
  O     <- rep(0, N)

  mu_beta  <- if (!is.null(coef)) coef else rep(0, nvar)
  tau_beta <- if (!is.null(var.coef)) 1/var.coef else rep(1, nvar)
  tau.va <- 1
  tau.vb <- 1

  inits_list <- lapply(1:chains, function(c) {
    list(v = rep(0, N), beta = mu_beta, tau_v = tau.v, rho = 0.5, phi = 1,
         .RNG.name = "base::Wichmann-Hill", .RNG.seed = seed + c)
  })

  # Model definitions
  # Model 1: Fully Sampled Data (CAR Leroux)
  model_sampled <- "model {
    for (i in 1:N) {
      y[i] ~ dbeta(shape1[i], shape2[i])
      shape1[i] <- mu[i] * phi
      shape2[i] <- (1 - mu[i]) * phi

      logit(mu[i]) <- beta[1] + inprod(beta[2:(P+1)], X[i, ]) + v[i]
      a.var[i] <- sig_v[i, i]
    }

    for (i in 1:N) {
      for (j in 1:N) {
        Q[i,j] <- tau_v * ((1 - rho) * I[i,j] + rho * R_mat[i,j])
      }
    }

    sig_v <- inverse(Q)
    v ~ dmnorm(O, Q)

    for (k in 1:(P+1)) {
      beta[k] ~ dnorm(mu_beta[k], tau_beta[k])
    }
    tau_v ~ dgamma(tau.va, tau.vb)
    rho ~ dunif(0, 1)
    sigma2_v <- 1 / tau_v

    phi ~ dgamma(4, 0.2)
  }"

  # Model 2: Data with NAs (CAR Leroux)
  model_nonsampled <- "model {
    for (i in 1:N_samp) {
      y_samp[i] ~ dbeta(shape1[i], shape2[i])
      shape1[i] <- mu[idx_samp[i]] * phi
      shape2[i] <- (1 - mu[idx_samp[i]]) * phi
    }

    for (j in 1:N) {
      logit(mu[j]) <- beta[1] + inprod(beta[2:(P+1)], X[j, ]) + v[j]
      a.var[j] <- sig_v[j, j]
    }

    for (i in 1:N) {
      for (j in 1:N) {
        Q[i,j] <- tau_v * ((1 - rho) * I[i,j] + rho * R_mat[i,j])
      }
    }

    sig_v <- inverse(Q)
    v ~ dmnorm(O, Q)

    for (k in 1:(P+1)) {
      beta[k] ~ dnorm(mu_beta[k], tau_beta[k])
    }
    tau_v ~ dgamma(tau.va, tau.vb)
    rho ~ dunif(0, 1)
    sigma2_v <- 1 / tau_v

    phi ~ dgamma(4, 0.2)
  }"

  params <- c("mu", "a.var", "beta", "rho", "tau_v", "sigma2_v", "v", "phi")

  if (!any(is.na(y_all))) {
    for (i in 1:iter.update) {
      dat <- list(N = N, P = P, y = y_all, X = X,
                  R_mat = R_mat, I = I, O = O,
                  mu_beta = mu_beta, tau_beta = tau_beta, tau.va = tau.va, tau.vb = tau.vb)

      jags.m <- rjags::jags.model(file = textConnection(model_sampled), data = dat,
                                  inits = inits_list, n.chains = chains, n.adapt = n.adapt, quiet = quiet)

      samps <- rjags::coda.samples(jags.m, params, n.iter = iter.mcmc, thin = thin,
                                   progress.bar = if(quiet) "none" else "text")
      samps1 <- stats::window(samps, start = start(samps) + burn.in)

      res_sum = summary(samps1)
      beta_temp = res_sum$statistics[grep("^beta\\[", rownames(res_sum$statistics)), 1:2]
      for (k in 1:nvar) {
        mu_beta[k]  = beta_temp[k, 1]
        tau_beta[k] = 1/(beta_temp[k, 2]^2)
      }
      tau_idx = grep("^tau_v", rownames(res_sum$statistics))
      tau.va = res_sum$statistics[tau_idx, 1]^2 / res_sum$statistics[tau_idx, 2]^2
      tau.vb = res_sum$statistics[tau_idx, 1] / res_sum$statistics[tau_idx, 2]^2
    }
  } else {
    idx_samp <- which(!is.na(y_all))
    N_samp   <- length(idx_samp)
    y_samp   <- y_all[idx_samp]

    for (i in 1:iter.update) {
      dat <- list(N = N, P = P, N_samp = N_samp, y_samp = y_samp, idx_samp = idx_samp,
                  X = X,
                  R_mat = R_mat, I = I, O = O,
                  mu_beta = mu_beta, tau_beta = tau_beta, tau.va = tau.va, tau.vb = tau.vb)

      jags.m <- rjags::jags.model(file = textConnection(model_nonsampled), data = dat,
                                  inits = inits_list, n.chains = chains, n.adapt = n.adapt, quiet = quiet)

      samps <- rjags::coda.samples(jags.m, params, n.iter = iter.mcmc, thin = thin,
                                   progress.bar = if(quiet) "none" else "text")
      samps1 <- stats::window(samps, start = start(samps) + burn.in)

      res_sum = summary(samps1)
      beta_temp = res_sum$statistics[grep("^beta\\[", rownames(res_sum$statistics)), 1:2]
      for (k in 1:nvar) {
        mu_beta[k]  = beta_temp[k, 1]
        tau_beta[k] = 1/(beta_temp[k, 2]^2)
      }
      tau_idx = grep("^tau_v", rownames(res_sum$statistics))
      tau.va = res_sum$statistics[tau_idx, 1]^2 / res_sum$statistics[tau_idx, 2]^2
      tau.vb = res_sum$statistics[tau_idx, 1] / res_sum$statistics[tau_idx, 2]^2
    }
  }

  #Output
  res_sum <- summary(samps1)
  ESS <- coda::effectiveSize(samps1)
  if (chains > 1) {
    Rhat_raw <- coda::gelman.diag(samps1, multivariate = FALSE, autoburnin = FALSE)$psrf[, 1]
  } else {
    Rhat_raw <- rep(NA, length(ESS))
  }

  mu_idx <- grep("^mu\\[", rownames(res_sum$statistics))
  estimation <- data.frame(res_sum$statistics[mu_idx, 1:2], res_sum$quantiles[mu_idx, c(1,5)])
  colnames(estimation) <- c("Estimate", "Est.Error", "l-95% CI", "u-95% CI")

  v_idx <- grep("^v\\[", rownames(res_sum$statistics))
  randeff <- data.frame(res_sum$statistics[v_idx, 1:2], res_sum$quantiles[v_idx, c(1,5)])
  colnames(randeff) <- c("Estimate", "Est.Error", "l-95% CI", "u-95% CI")

  a_var_idx <- grep("^a\\.var\\[", rownames(res_sum$statistics))
  refvar <- data.frame(res_sum$statistics[a_var_idx, 1:2], res_sum$quantiles[a_var_idx, c(1,5)])
  colnames(refvar) <- c("Estimate", "Est.Error", "l-95% CI", "u-95% CI")

  b_idx   <- grep("^beta\\[", rownames(res_sum$statistics))
  rho_idx <- grep("^rho", rownames(res_sum$statistics))
  phi_idx <- grep("^phi", rownames(res_sum$statistics))

  coef_stats <- rbind(res_sum$statistics[b_idx, 1:2], res_sum$statistics[rho_idx, 1:2], res_sum$statistics[phi_idx, 1:2])
  coef_quant <- rbind(res_sum$quantiles[b_idx, c(1,5)], res_sum$quantiles[rho_idx, c(1,5)], res_sum$quantiles[phi_idx, c(1,5)])
  coef_rhat  <- c(Rhat_raw[b_idx], Rhat_raw[rho_idx], Rhat_raw[phi_idx])
  coef_ess   <- c(ESS[b_idx], ESS[rho_idx], ESS[phi_idx])

  coefficient <- data.frame(coef_stats, coef_quant, coef_rhat, coef_ess)

  b_varnames <- character(nvar)
  for (i in 1:nvar) {
    b_varnames[i] <- paste0("beta[", i - 1, "]")
  }
  rownames(coefficient) <- c(b_varnames, "rho", "phi")
  colnames(coefficient) <- c("Estimate", "Est.Error", "l-95% CI", "u-95% CI", "Rhat", "ESS")

  result$est         <- estimation
  result$randeff     <- randeff
  result$refvar      <- refvar
  result$coefficient <- coefficient

  if (keep.fit) result$fit <- samps1

  if (plot) {
    plot_idx <- c(b_idx, rho_idx, phi_idx)
    result_mcmc <- samps1[, plot_idx, drop = FALSE]
    coda::varnames(result_mcmc) <- rownames(coefficient)

    oldpar <- graphics::par(no.readonly = TRUE)
    on.exit(graphics::par(oldpar))
    graphics::par(mar = c(2, 2, 2, 2))

    coda::autocorr.plot(result_mcmc, col = "brown2", lwd = 2)
    plot(result_mcmc, col = "brown2", lwd = 2)
  }

  return(result)
}

Try the saeHB.Spatial.Beta package in your browser

Any scripts or data that you put into this service are public.

saeHB.Spatial.Beta documentation built on July 1, 2026, 5:07 p.m.