Nothing
#' Create a Poisson binomial distribution
#'
#' The Poisson binomial distribution is a generalization of the
#' \code{\link[distributions3]{Binomial}} distribution. It is also a sum
#' of \eqn{n} independent Bernoulli experiments. However, the success
#' probabilities can vary between the experiments so that they are
#' not identically distributed.
#'
#' @param ... An arbitrary number of numeric vectors or matrices
#' of success probabilities in `[0, 1]` (with matching number of rows).
#'
#' @return A `PoissonBinomial` object.
#' @export
#'
#' @family discrete distributions
#'
#' @details
#'
#' The Poisson binomial distribution comes up when you consider the number
#' of successes in independent binomial experiments (coin flips) with
#' potentially varying success probabilities.
#'
#' The \code{PoissonBinomial} distribution class in \pkg{distributions3}
#' is mostly based on the \pkg{PoissonBinomial} package, providing fast
#' \pkg{Rcpp} implementations of efficient algorithms. Hence, it is
#' recommended to install the \pkg{PoissonBinomial} package when working
#' with this distribution. However, as a fallback for when the \pkg{PoissonBinomial}
#' package is not installed the methods for the \code{PoissonBinomial}
#' distribution employ a normal approximation.
#'
#' We recommend reading the following documentation on
#' <https://alexpghayes.github.io/distributions3/>, where the math
#' will render with additional detail.
#'
#' In the following, let \eqn{X} be a Poisson binomial random variable with
#' success probabilities \eqn{p_1} to \eqn{p_n}.
#'
#' **Support**: \eqn{\{0, 1, 2, ..., n\}}{{0, 1, 2, ..., n}}
#'
#' **Mean**: \eqn{p_1 + \dots + p_n}{p_1 + ... + p_n}
#'
#' **Variance**: \eqn{p_1 \cdot (1 - p_1) + \dots + p_1 \cdot (1 - p_1)}{p_1 (1 - p_1) + ... + p_n (1 - p_n)}
#'
#' **Probability mass function (p.m.f)**:
#'
#' \deqn{
#' P(X = k) = \sum_A \prod_{i \in A} p_i \prod_{j \in A^C} (1 - p_j)
#' }{
#' P(X = k) = sum_A prod_{i in A} p_i prod_{j in A^C} (1 - p_j)
#' }
#'
#' where the sum is taken over all sets \eqn{A} with \eqn{k} elements from
#' \eqn{\{0, 1, 2, ..., n\}}{{0, 1, 2, ..., n}}. \eqn{A^C} is the complement
#' of \eqn{A}.
#'
#' **Cumulative distribution function (c.d.f)**:
#'
#' \deqn{
#' P(X \le k) = \sum_{i=0}^{\lfloor k \rfloor} P(X = i)
#' }{
#' P(X \le k) = \sum_{i=0}^k P(X = i)
#' }
#'
#' **Moment generating function (m.g.f)**:
#'
#' \deqn{
#' E(e^{tX}) = \prod_{i = 1}^n (1 - p_i + p_i e^t)
#' }{
#' E(e^(tX)) = prod_{i = 1}^n (1 - p_i + p_i e^t)
#' }
#'
#' @examples
#'
#' set.seed(27)
#'
#' X <- PoissonBinomial(0.5, 0.3, 0.8)
#' X
#'
#' mean(X)
#' variance(X)
#' skewness(X)
#' kurtosis(X)
#'
#' random(X, 10)
#'
#' pdf(X, 2)
#' log_pdf(X, 2)
#'
#' cdf(X, 2)
#' quantile(X, 0.8)
#'
#' cdf(X, quantile(X, 0.8))
#' quantile(X, cdf(X, 2))
#'
#' ## equivalent definitions of four Poisson binomial distributions
#' ## each summing up three Bernoulli probabilities
#' p <- cbind(
#' p1 = c(0.1, 0.2, 0.1, 0.2),
#' p2 = c(0.5, 0.5, 0.5, 0.5),
#' p3 = c(0.8, 0.7, 0.9, 0.8))
#' PoissonBinomial(p)
#' PoissonBinomial(p[, 1], p[, 2], p[, 3])
#' PoissonBinomial(p[, 1:2], p[, 3])
#' @export
PoissonBinomial <- function(...) {
d <- list(...)
if (length(d) == 1L && is.null(dim(d))) d[[1L]] <- rbind(d[[1L]])
d <- do.call("cbind", d)
if(any(d < 0) || any(d > 1)) stop("all probabilities must be in [0, 1]")
d <- as.data.frame(d)
names(d) <- paste0("p", seq_along(d))
class(d) <- c("PoissonBinomial", "distribution")
d
}
#' @export
format.PoissonBinomial <- function(x, digits = pmax(3L, getOption("digits") - 3L), cutoff = 4L, ...) {
cl <- class(x)[1L]
if (length(x) < 1L) return(character(0))
n <- names(x)
if (is.null(attr(x, "row.names"))) attr(x, "row.names") <- 1L:length(x)
class(x) <- "data.frame"
if (ncol(x) > cutoff) {
x <- x[, 1L:(cutoff - 1L)]
dots <- "..."
} else {
dots <- NULL
}
f <- sprintf("%s(%s)", cl, apply(
rbind(apply(as.matrix(x), 2L, format, digits = digits, ...)), 1L, function(p)
paste(c(paste(names(x), "=", as.vector(p)), dots), collapse = ", ")))
setNames(f, n)
}
#' @export
mean.PoissonBinomial <- function(x, ...) {
n <- names(x)
rval <- rowSums(as.matrix(x), na.rm = TRUE)
setNames(rval, n)
}
#' @export
variance.PoissonBinomial <- function(x, ...) {
n <- names(x)
x <- as.matrix(x)
rval <- rowSums(x * (1 - x), na.rm = TRUE)
setNames(rval, n)
}
#' @export
skewness.PoissonBinomial <- function(x, ...) {
n <- names(x)
x <- as.matrix(x)
v <- rowSums(x * (1 - x), na.rm = TRUE)
rval <- rowSums(x * (1 - x) * (1 - 2 * x), na.rm = TRUE) / (v^(3/2))
setNames(rval, n)
}
#' @export
kurtosis.PoissonBinomial <- function(x, ...) {
n <- names(x)
x <- as.matrix(x)
v <- rowSums(x * (1 - x), na.rm = TRUE)
rval <- rowSums(x * (1 - x) * (1 - 6 * (1 - x) * x), na.rm = TRUE) / (v^2)
setNames(rval, n)
}
#' Draw a random sample from a PoissonBinomial distribution
#'
#' @inherit PoissonBinomial examples
#'
#' @param x A `PoissonBinomial` object created by a call to [PoissonBinomial()].
#' @param n The number of samples to draw. Defaults to `1L`.
#' @param drop logical. Should the result be simplified to a vector if possible?
#' @param ... Unused. Unevaluated arguments will generate a warning to
#' catch mispellings or other possible errors.
#'
#' @return Integers containing values between `0` and `x$size`.
#' In case of a single distribution object or `n = 1`, either a numeric
#' vector of length `n` (if `drop = TRUE`, default) or a `matrix` with `n` columns
#' (if `drop = FALSE`).
#' @export
#'
random.PoissonBinomial <- function(x, n = 1L, drop = TRUE, ...) {
n <- make_positive_integer(n)
if (n == 0L) {
return(numeric(0L))
}
if(requireNamespace("PoissonBinomial")) {
p <- as.matrix(x)
rval <- numeric(nrow(p))
if (nrow(p) >= 1L) rval <- vapply(1L:nrow(p), function(i) PoissonBinomial::rpbinom(n, probs = p[i,], ...), numeric(n))
rval <- if (n == 1L) as.matrix(rval) else t(rval)
rownames(rval) <- names(x)
colnames(rval) <- paste("r", seq_len(n), sep = "_")
if (n == 1L && drop) rval <- rval[, 1L]
if (length(x) == 1L && drop) rval <- as.vector(rval)
} else {
FUN <- function(at, d) {
p <- as.matrix(d)
r <- matrix(runif(length(p)), nrow = nrow(p), ncol = ncol(p))
rowSums(0 + (r < p), na.rm = TRUE)
}
rval <- apply_dpqr(d = x, FUN = FUN, at = n, type = "random", drop = drop)
}
return(rval)
}
#' Evaluate the probability mass function of a PoissonBinomial distribution
#'
#' @inherit PoissonBinomial examples
#'
#' @param d A `PoissonBinomial` object created by a call to [PoissonBinomial()].
#' @param x A vector of elements whose probabilities you would like to
#' determine given the distribution `d`.
#' @param drop logical. Should the result be simplified to a vector if possible?
#' @param elementwise logical. Should each distribution in \code{d} be evaluated
#' at all elements of \code{x} (\code{elementwise = FALSE}, yielding a matrix)?
#' Or, if \code{d} and \code{x} have the same length, should the evaluation be
#' done element by element (\code{elementwise = TRUE}, yielding a vector)? The
#' default of \code{NULL} means that \code{elementwise = TRUE} is used if the
#' lengths match and otherwise \code{elementwise = FALSE} is used.
#' @param log,... Arguments to be passed to \code{\link[PoissonBinomial]{dpbinom}}
#' or \code{\link[stats]{pnorm}}, respectively.
#' @param verbose logical. Should a warning be issued if the normal approximation
#' is applied because the \pkg{PoissonBinomial} package is not installed?
#'
#' @return In case of a single distribution object, either a numeric
#' vector of length `probs` (if `drop = TRUE`, default) or a `matrix` with
#' `length(x)` columns (if `drop = FALSE`). In case of a vectorized distribution
#' object, a matrix with `length(x)` columns containing all possible combinations.
#' @export
#'
pdf.PoissonBinomial <- function(d, x, drop = TRUE, elementwise = NULL, log = FALSE, verbose = TRUE, ...) {
n <- length(d)
k <- length(x)
if(n > 0L && k > 0L && requireNamespace("PoissonBinomial")) {
## basic properties
rnam <- names(d)
## elementwise evaluation?
if(is.null(elementwise)) elementwise <- k > 1L && k == n && is.null(dim(x))
if(elementwise && k > 1L && k != n) stop(
sprintf("lengths of distribution 'd' and argument 'x' do not match: %s != %s", n, k))
## apply dpbinom to each row of the parameter matrix
p <- as.matrix(d)
if (elementwise) {
rval <- vapply(1L:n, function(i) PoissonBinomial::dpbinom(x[i], probs = p[i,], log = log, ...), numeric(1L))
rval <- matrix(rval, ncol = 1L, dimnames = list(rnam, "d"))
if (drop) rval <- rval[, 1L]
} else {
rval <- vapply(1L:n, function(i) PoissonBinomial::dpbinom(x, probs = p[i,], log = log, ...), numeric(k))
rval <- if (k == 1L) as.matrix(rval) else t(rval)
rownames(rval) <- rnam
colnames(rval) <- paste("d", make_suffix(x, digits = pmax(3L, getOption("digits") - 3L)), sep = "_")
if (k == 1L && drop) rval <- rval[, 1L]
if (n == 1L && drop) rval <- as.vector(rval)
}
} else {
if (n > 0L && k > 0L && verbose) warning("using normal approximation, for exact algorithm install the 'PoissonBinomial' package")
FUN <- function(at, d) {
pn <- if (log) rep.int(-Inf, length(at)) else numeric(length(at))
idx <- abs(at - round(at)) < .Machine$double.eps^0.75
pn <- pnorm(q = at + 0.5, mean = mean(d), sd = sqrt(variance(d))) -
pnorm(q = at - 0.5, mean = mean(d), sd = sqrt(variance(d)))
if (log) pn <- log(pn)
at <- rep_len(at, length(pn))
pn[abs(at - round(at)) >= .Machine$double.eps^0.75 | at < 0 | at >= length(unclass(d))] <- if (log) -Inf else 0
return(pn)
}
rval <- apply_dpqr(d = d, FUN = FUN, at = x, type = "density", drop = drop, elementwise = elementwise)
}
return(rval)
}
#' @rdname pdf.PoissonBinomial
#' @export
log_pdf.PoissonBinomial <- function(d, x, drop = TRUE, elementwise = NULL, ...) {
pdf(d, x, drop = drop, elementwise = elementwise, log = TRUE, ...)
}
#' Evaluate the cumulative distribution function of a PoissonBinomial distribution
#'
#' @inherit PoissonBinomial examples
#'
#' @param d A `PoissonBinomial` object created by a call to [PoissonBinomial()].
#' @param x A vector of elements whose cumulative probabilities you would
#' like to determine given the distribution `d`.
#' @param drop logical. Should the result be simplified to a vector if possible?
#' @param elementwise logical. Should each distribution in \code{d} be evaluated
#' at all elements of \code{x} (\code{elementwise = FALSE}, yielding a matrix)?
#' Or, if \code{d} and \code{x} have the same length, should the evaluation be
#' done element by element (\code{elementwise = TRUE}, yielding a vector)? The
#' default of \code{NULL} means that \code{elementwise = TRUE} is used if the
#' lengths match and otherwise \code{elementwise = FALSE} is used.
#' @param lower.tail,log.p,... Arguments to be passed to
#' \code{\link[PoissonBinomial]{ppbinom}} or \code{\link[stats]{pnorm}}, respectively.
#' @param verbose logical. Should a warning be issued if the normal approximation
#' is applied because the \pkg{PoissonBinomial} package is not installed?
#'
#' @return In case of a single distribution object, either a numeric
#' vector of length `probs` (if `drop = TRUE`, default) or a `matrix` with
#' `length(x)` columns (if `drop = FALSE`). In case of a vectorized distribution
#' object, a matrix with `length(x)` columns containing all possible combinations.
#' @export
#'
cdf.PoissonBinomial <- function(d, x, drop = TRUE, elementwise = NULL, lower.tail = TRUE, log.p = FALSE, verbose = TRUE, ...) {
n <- length(d)
k <- length(x)
if(n > 0L && k > 0L && requireNamespace("PoissonBinomial")) {
## basic properties
rnam <- names(d)
## elementwise evaluation?
if(is.null(elementwise)) elementwise <- k > 1L && k == n && is.null(dim(x))
if(elementwise && k > 1L && k != n) stop(
sprintf("lengths of distribution 'd' and argument 'x' do not match: %s != %s", n, k))
## apply ppbinom to each row of the parameter matrix
p <- as.matrix(d)
if (elementwise) {
rval <- vapply(1L:n, function(i) PoissonBinomial::ppbinom(floor(x[i]), probs = p[i,], lower.tail = lower.tail, log.p = log.p, ...), numeric(1L))
rval <- matrix(rval, ncol = 1L, dimnames = list(rnam, "p"))
if (drop) rval <- rval[, 1L]
} else {
rval <- vapply(1L:n, function(i) PoissonBinomial::ppbinom(floor(x), probs = p[i,], lower.tail = lower.tail, log.p = log.p, ...), numeric(k))
rval <- if (k == 1L) as.matrix(rval) else t(rval)
rownames(rval) <- rnam
colnames(rval) <- paste("p", make_suffix(x, digits = pmax(3L, getOption("digits") - 3L)), sep = "_")
if (k == 1L && drop) rval <- rval[, 1L]
if (n == 1L && drop) rval <- as.vector(rval)
}
} else {
if (n > 0L && k > 0L && verbose) warning("using normal approximation, for exact algorithm install the 'PoissonBinomial' package")
FUN <- function(at, d) {
pn <- pnorm(q = floor(at) + 0.5, mean = mean(d), sd = sqrt(variance(d)), lower.tail = lower.tail, log.p = log.p)
if (lower.tail) {
pn[at < 0] <- if (log.p) -Inf else 0
pn[at >= length(unclass(d))] <- if (log.p) 0 else 1
} else {
pn[at < 0] <- if (log.p) 0 else 1
pn[at >= length(unclass(d))] <- if (log.p) -Inf else 0
}
return(pn)
}
rval <- apply_dpqr(d = d, FUN = FUN, at = x, type = "probability", drop = drop, elementwise = elementwise)
}
return(rval)
}
#' Determine quantiles of a PoissonBinomial distribution
#'
#' `quantile()` is the inverse of `cdf()`.
#'
#' @inherit PoissonBinomial examples
#' @inheritParams random.PoissonBinomial
#'
#' @param probs A vector of probabilities.
#' @param drop logical. Shoul the result be simplified to a vector if possible?
#' @param elementwise logical. Should each distribution in \code{x} be evaluated
#' at all elements of \code{probs} (\code{elementwise = FALSE}, yielding a matrix)?
#' Or, if \code{x} and \code{probs} have the same length, should the evaluation be
#' done element by element (\code{elementwise = TRUE}, yielding a vector)? The
#' default of \code{NULL} means that \code{elementwise = TRUE} is used if the
#' lengths match and otherwise \code{elementwise = FALSE} is used.
#' @param lower.tail,log.p,... Arguments to be passed to
#' \code{\link[PoissonBinomial]{qpbinom}} or \code{\link[stats]{qnorm}}, respectively.
#' @param verbose logical. Should a warning be issued if the normal approximation
#' is applied because the \pkg{PoissonBinomial} package is not installed?
#'
#' @return In case of a single distribution object, either a numeric
#' vector of length `probs` (if `drop = TRUE`, default) or a `matrix` with
#' `length(probs)` columns (if `drop = FALSE`). In case of a vectorized
#' distribution object, a matrix with `length(probs)` columns containing all
#' possible combinations.
#' @export
#'
quantile.PoissonBinomial <- function(x, probs, drop = TRUE, elementwise = NULL, lower.tail = TRUE, log.p = FALSE, verbose = TRUE, ...) {
n <- length(x)
k <- length(probs)
if(n > 0L && k > 0L && requireNamespace("PoissonBinomial")) {
## basic properties
rnam <- names(x)
## elementwise evaluation?
if(is.null(elementwise)) elementwise <- k > 1L && k == n && is.null(dim(probs))
if(elementwise && k > 1L && k != n) stop(
sprintf("lengths of distribution 'x' and argument 'probs' do not match: %s != %s", n, k))
## apply ppbinom to each row of the parameter matrix
p <- as.matrix(x)
if (elementwise) {
rval <- vapply(1L:n, function(i) PoissonBinomial::qpbinom(probs[i], probs = p[i,], lower.tail = lower.tail, log.p = log.p, ...), numeric(1L))
rval <- matrix(rval, ncol = 1L, dimnames = list(rnam, "q"))
if (drop) rval <- rval[, 1L]
} else {
rval <- vapply(1L:n, function(i) PoissonBinomial::qpbinom(probs, probs = p[i,], lower.tail = lower.tail, log.p = log.p, ...), numeric(k))
rval <- if (k == 1L) as.matrix(rval) else t(rval)
rownames(rval) <- rnam
colnames(rval) <- paste("q", make_suffix(probs, digits = pmax(3L, getOption("digits") - 3L)), sep = "_")
if (k == 1L && drop) rval <- rval[, 1L]
if (n == 1L && drop) rval <- as.vector(rval)
}
} else {
if (n > 0L && k > 0L && verbose) warning("using normal approximation, for exact algorithm install the 'PoissonBinomial' package")
FUN <- function(at, d) {
qn <- qnorm(p = at, mean = mean(d), sd = sqrt(variance(d)), lower.tail = lower.tail, log.p = log.p) - 0.5
qn <- ceiling(qn)
qn[qn < 0] <- 0
qn[qn > length(unclass(d))] <- length(unclass(d))
return(qn)
}
rval <- apply_dpqr(d = x, FUN = FUN, at = probs, type = "quantile", drop = drop, elementwise = elementwise)
}
return(rval)
}
#' Return the support of the PoissonBinomial distribution
#'
#' @param d A `PoissonBinomial` object created by a call to [PoissonBinomial()].
#' @param drop logical. Shoul the result be simplified to a vector if possible?
#' @param ... Currently not used.
#'
#' @return A vector of length 2 with the minimum and maximum value of the support.
#'
#' @export
support.PoissonBinomial <- function(d, drop = TRUE, ...) {
rlang::check_dots_used()
min <- rep.int(0L, length(d))
p <- d
class(p) <- "data.frame"
p <- as.matrix(p)
max <- rowSums(!is.na(p))
make_support(min, max, d, drop = drop)
}
#' @exportS3Method
is_discrete.PoissonBinomial <- function(d, ...) {
rlang::check_dots_used()
setNames(rep.int(TRUE, length(d)), names(d))
}
#' @exportS3Method
is_continuous.PoissonBinomial <- function(d, ...) {
rlang::check_dots_used()
setNames(rep.int(FALSE, length(d)), names(d))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.