Nothing
#' Categorical distribution
#'
#' Probability mass function, distribution function, quantile function and random generation
#' for the categorical distribution.
#'
#' @param x,q vector of quantiles.
#' @param p vector of probabilities.
#' @param n number of observations. If \code{length(n) > 1},
#' the length is taken to be the number required.
#' @param prob,log_prob vector of length \eqn{m}, or \eqn{m}-column matrix
#' of non-negative weights (or their logarithms in \code{log_prob}).
#' @param log,log.p logical; if TRUE, probabilities p are given as log(p).
#' @param lower.tail logical; if TRUE (default), probabilities are \eqn{P[X \le x]}
#' otherwise, \eqn{P[X > x]}.
#' @param labels if provided, labeled \code{factor} vector is returned.
#' Number of labels needs to be the same as
#' number of categories (number of columns in prob).
#'
#' @details
#' Probability mass function
#'
#' \deqn{
#' \Pr(X = k) = \frac{w_k}{\sum_{j=1}^m w_j}
#' }{
#' Pr(X = k) = w[k]/sum(w)
#' }
#'
#' Cumulative distribution function
#' \deqn{
#' \Pr(X \le k) = \frac{\sum_{i=1}^k w_i}{\sum_{j=1}^m w_j}
#' }{
#' Pr(X <= k) = sum(w[1:k])/sum(w)
#' }
#'
#' It is possible to sample from categorical distribution parametrized
#' by vector of unnormalized log-probabilities
#' \eqn{\alpha_1,\dots,\alpha_m}{\alpha[1],...,\alpha[m]}
#' without leaving the log space by employing the Gumbel-max trick (Maddison, Tarlow and Minka, 2014).
#' If \eqn{g_1,\dots,g_m}{g[1],...,g[m]} are samples from Gumbel distribution with
#' cumulative distribution function \eqn{F(g) = \exp(-\exp(-g))}{F(g) = exp(-exp(-g))},
#' then \eqn{k = \mathrm{arg\,max}_i \{g_i + \alpha_i\}}{k = argmax(g[i]+\alpha[i])}
#' is a draw from categorical distribution parametrized by
#' vector of probabilities \eqn{p_1,\dots,p_m}{p[1]....,p[m]}, such that
#' \eqn{p_i = \exp(\alpha_i) / [\sum_{j=1}^m \exp(\alpha_j)]}{p[i] = exp(\alpha[i])/sum(exp(\alpha))}.
#' This is implemented in \code{rcatlp} function parametrized by vector of
#' log-probabilities \code{log_prob}.
#'
#' @references
#' Maddison, C. J., Tarlow, D., & Minka, T. (2014). A* sampling.
#' [In:] Advances in Neural Information Processing Systems (pp. 3086-3094).
#' \url{https://arxiv.org/abs/1411.0030}
#'
#' @examples
#'
#' # Generating 10 random draws from categorical distribution
#' # with k=3 categories occuring with equal probabilities
#' # parametrized using a vector
#'
#' rcat(10, c(1/3, 1/3, 1/3))
#'
#' # or with k=5 categories parametrized using a matrix of probabilities
#' # (generated from Dirichlet distribution)
#'
#' p <- rdirichlet(10, c(1, 1, 1, 1, 1))
#' rcat(10, p)
#'
#' x <- rcat(1e5, c(0.2, 0.4, 0.3, 0.1))
#' plot(prop.table(table(x)), type = "h")
#' lines(0:5, dcat(0:5, c(0.2, 0.4, 0.3, 0.1)), col = "red")
#'
#' p <- rdirichlet(1, rep(1, 20))
#' x <- rcat(1e5, matrix(rep(p, 2), nrow = 2, byrow = TRUE))
#' xx <- 0:21
#' plot(prop.table(table(x)))
#' lines(xx, dcat(xx, p), col = "red")
#'
#' xx <- seq(0, 21, by = 0.01)
#' plot(ecdf(x))
#' lines(xx, pcat(xx, p), col = "red", lwd = 2)
#'
#' pp <- seq(0, 1, by = 0.001)
#' plot(ecdf(x))
#' lines(qcat(pp, p), pp, col = "red", lwd = 2)
#'
#' @name Categorical
#' @aliases Categorical
#' @aliases dcat
#'
#' @keywords distribution
#' @concept Univariate
#' @concept Discrete
#'
#' @export
dcat <- function(x, prob, log = FALSE) {
if (is.vector(prob))
prob <- matrix(prob, nrow = 1L)
else if (!is.matrix(prob))
prob <- as.matrix(prob)
cpp_dcat(as.numeric(x), prob, log[1L])
}
#' @rdname Categorical
#' @export
pcat <- function(q, prob, lower.tail = TRUE, log.p = FALSE) {
if (is.vector(prob))
prob <- matrix(prob, nrow = 1L)
else if (!is.matrix(prob))
prob <- as.matrix(prob)
cpp_pcat(as.numeric(q), prob, lower.tail[1L], log.p[1L])
}
#' @rdname Categorical
#' @export
qcat <- function(p, prob, lower.tail = TRUE, log.p = FALSE, labels) {
if (is.vector(prob))
prob <- matrix(prob, nrow = 1L)
else if (!is.matrix(prob))
prob <- as.matrix(prob)
x <- cpp_qcat(p, prob, lower.tail[1L], log.p[1L])
if (!missing(labels)) {
if (length(labels) != ncol(prob))
warning("Wrong number of labels.")
else
return(factor(x, levels = 1:ncol(prob), labels = labels))
}
return(x)
}
#' @rdname Categorical
#' @export
rcat <- function(n, prob, labels) {
if (length(n) > 1) n <- length(n)
if (is.vector(prob)) {
k <- length(prob)
if (anyNA(prob) || any(prob < 0) || length(prob) == 0) {
warning("NAs produced")
x <- rep(NA, n)
} else {
x <- sample.int(k, size = n, replace = TRUE, prob = prob)
}
} else {
k <- ncol(prob)
x <- cpp_rcat(n, prob)
}
if (!missing(labels)) {
if (length(labels) != k)
warning("Wrong number of labels.")
else
return(factor(x, levels = 1:k, labels = labels))
}
return(x)
}
#' @rdname Categorical
#' @export
rcatlp <- function(n, log_prob, labels) {
if (length(n) > 1) n <- length(n)
if (is.vector(log_prob))
log_prob <- matrix(log_prob, nrow = 1L)
x <- cpp_rcatlp(n, log_prob)
k <- ncol(log_prob)
if (!missing(labels)) {
if (length(labels) != k)
warning("Wrong number of labels.")
else
return(factor(x, levels = 1:k, labels = labels))
}
return(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.