R/inclusion_prob.R

Defines functions inclusion_prob inclusion_prob_ ta_units unbounded_pi as_stratum

Documented in inclusion_prob

#' Coerce to a factor that represents a stratum
#' @noRd
as_stratum <- function(strata) {
  strata <- as.factor(strata)
  if (anyNA(strata)) {
    stop("cannot have missing strata")
  }
  if (nlevels(strata) < 1L) {
    stop("there must be at least one stratum")
  }
  strata
}

#' Calculate unconstrained inclusion probabilities
#' @noRd
unbounded_pi <- function(x, n) {
  # n == 0 should be a strong zero
  if (n == 0L) {
    rep.int(0, length(x))
  } else {
    x * (n / sum(x))
  }
}

#' Find the units that belong in a TA stratum
#' @noRd
ta_units <- function(x, n, alpha) {
  # partial sorting is not stable, so if x[n] == x[n + 1] after sorting then
  # it is possible for the result to not resolve ties according to x
  # (as documented) when alpha is large enough to make at least one unit with
  # x[n] TA
  ord <- order(x, decreasing = TRUE)
  s <- seq_len(n)
  possible_ta <- rev(ord[s])
  x_ta <- x[possible_ta] # ties are in reverse
  definite_ts <- ord[seq.int(n + 1, length.out = length(x) - n)]

  p <- x_ta * s / (sum(x[definite_ts]) + cumsum(x_ta))
  # the sequence given by p has the following properties
  # 1. if p[k] < 1, then p[k + 1] >= p[k]
  # 2. if p[k] >= 1, then p[k + 1] >= 1
  # consequently, if p[k] >= 1 - alpha, then p[k + m] >= 1 - alpha
  possible_ta[p >= 1 - alpha]
}

#' Calculate inclusion probabilities for a single stratum
#' @noRd
pi <- function(x, n, alpha, cutoff) {
  if (any(x < 0)) {
    stop("sizes must be greater than or equal to 0")
  }
  if (n < 0L) {
    stop("sample size must be greater than or equal to 0")
  }
  if (n > sum(x > 0)) {
    stop(
      "sample size cannot be greater than the number of available units with ",
      "non-zero sizes in the population"
    )
  }
  if (alpha < 0 || alpha > 1) {
    stop("'alpha' must be between 0 and 1")
  }
  if (cutoff <= 0) {
    stop("'cutoff' must be greater than 0")
  }

  ta <- which(x >= cutoff)
  if (length(ta) > n) {
    stop("'n' is not large enough to include all units with 'x' above 'cutoff'")
  }

  x[ta] <- 0
  res <- unbounded_pi(x, n - length(ta))
  if (any(res >= 1 - alpha)) {
    ta2 <- ta_units(x, n - length(ta), alpha)
    x[ta2] <- 0
    ta <- c(ta, ta2)
    res <- unbounded_pi(x, n - length(ta))
  }

  res[ta] <- 1
  res
}

#' Calculate inclusion probabilities by stratum
#' @noRd
inclusion_prob_ <- function(x, n, strata, alpha, cutoff) {
  if (length(x) != length(strata)) {
    stop("the vectors for sizes and strata must be the same length")
  }
  if (length(n) != 1L && length(n) != nlevels(strata)) {
    stop(
      "there must be a single sample size",
      if (nlevels(strata) > 1) " for each stratum"
    )
  }
  if (length(alpha) != 1L && length(alpha) != nlevels(strata)) {
    stop(
      "'alpha' must be a single value",
      if (nlevels(strata) > 1) " or have a value for each stratum"
    )
  }
  if (length(cutoff) != 1L && length(cutoff) != nlevels(strata)) {
    stop(
      "'cutoff' must be a single value",
      if (nlevels(strata) > 1) " or have a value for each stratum"
    )
  }
  Map(pi, split(x, strata), n, alpha, cutoff)
}

#' Calculate inclusion probabilities
#'
#' Calculate stratified (first-order) inclusion probabilities.
#'
#' Within a stratum, the inclusion probability for a unit is given by
#' \eqn{\pi = nx / \sum x}{\pi = n * x / \sum x}. These values can be greater
#' than 1 in practice, and so they are constructed iteratively by taking units
#' with \eqn{\pi \geq 1 - \alpha}{\pi >= 1 - \alpha} (from largest to smallest)
#' and assigning these units an inclusion probability of 1, with the remaining
#' inclusion probabilities recalculated at each step. If \eqn{\alpha > 0}, then
#' any ties among units with the same size are broken by their position.
#'
#' @inheritParams sps
#'
#' @returns
#' A numeric vector of inclusion probabilities for each unit in the population.
#'
#' @seealso
#' [sps()] for drawing a sequential Poisson sample.
#'
#' @examples
#' # Make a population with units of different size
#' x <- c(1:10, 100)
#'
#' # Use the inclusion probabilities to calculate the variance of the
#' # sample size for Poisson sampling
#' pi <- inclusion_prob(x, 5)
#' sum(pi * (1 - pi))
#'
#' @export
inclusion_prob <- function(x,
                           n,
                           strata = gl(1, length(x)),
                           alpha = 1e-3,
                           cutoff = Inf) {
  x <- as.numeric(x)
  n <- as.integer(n)
  strata <- as_stratum(strata)
  alpha <- as.numeric(alpha)
  cutoff <- as.numeric(cutoff)
  unsplit(inclusion_prob_(x, n, strata, alpha, cutoff), strata)
}

Try the sps package in your browser

Any scripts or data that you put into this service are public.

sps documentation built on Oct. 16, 2023, 9:07 a.m.