R/bint_dist.R
In hint: Tools for Hypothesis Testing Based on Hypergeometric Intersection Distributions

Documented in dbint pbint qbint rbint

###########################################################################
#
# Functions for calculating binomial intersection distributions.
#
# Author: Alex T. Kalinka (alex.t.kalinka@gmail.com)
#
###########################################################################


#### Main user-callable functions that do the dispatching to user-invisible worker functions below. ####
#
# Split into two sets: distribution, quantile, etc functions, and a more formal test function that will
#  take more flexible user input for testing (objects, which will be placed into categories internally).
#
# 1. Density, distribution (probability mass function), quantile, and random generation functions:


.bint.check.params <- function(n, A)
	{
	la <- length(A)
	if(!n>0 ){
		stop("the following constraint must be met:\nn > 0\n", call. = FALSE)
		}
	ll <- list()
	for(i in 1:la){
		if(!A[i]>=0 || !A[i]<=n){
			stop("the following constraints must be met:\n0 <= a <= n\n", call. = FALSE)
			}
		ll[[i]] <- A[i]
		}
	vmin <- max(sum(A)-(la-1)*n,0)
	vmax <- Reduce(min, ll)
	vrange <- vmin:vmax
	return(vrange)
	}


#' The Binomial Intersection Distribution
#' 
#' Density, distribution function, quantile function and random generation for the binomial intersection distribution.
#' 
#' @param n An integer specifying the number of categories in the urns.
#' @param A A vector of integers specifying the numbers of balls drawn from each urn. The length of the vector equals the number of urns.
#' @details The binomial intersection distribution is given by \deqn{  P(X = v|N) = {b \choose v} \left(\prod_{i=1}^{N-1} p_{i}\right)^{v} \left(1 - \prod_{i=1}^{N-1} p_{i}\right)^{b-v} }{ P(X = v|N) = choose(b,v)*((prod_{i=1}^{N-1} p_i)^v) * (1 - prod_{i=1}^{N-1} p_i)^(b-v) } where b gives the sample size which is smallest. This is an approximation for the hypergeometric intersection distribution when \eqn{n}{n} is large and \eqn{b}{b} is small relative to the samples taken from the \eqn{N-1}{N-1} other urns.
#' @name Binomialintersection
NULL
#> NULL

#' @rdname Binomialintersection
#' @param range A vector of integers specifying the intersection sizes for which probabilities (dhint) or cumulative probabilites (phint) should be computed (can be a single number). If range is NULL (default) then probabilities will be returned over the entire range of possible values.
#' @param log Logical. If TRUE, probabilities p are given as log(p). Defaults to FALSE.
#' @export
#' @examples 
#' ## Generate the distribution of intersections sizes:
#' dd <- dbint(20, c(10, 12, 11, 14))
#' ## Restrict the range of intersections.
#' dd <- dbint(20, c(10, 12), range = 0:5)
dbint <- function(n, A, range = NULL, log = FALSE)
	{
	# range is a vector giving intersection sizes for which the user wishes to retrieve probabilities.
	# A is a vector of integers giving sample sizes from each urn (N = length(A)).
	vrange <- .bint.check.params(n, A)
	if(is.null(range)){
		range <- vrange
		}
	la <- length(A)
	rn <- intersect(range, vrange)
	if(length(rn)==0){
		dist <- data.frame(v=range, p=rep(0,length(range)))
	}else{
		dist <- .bint.multi.N(n, A, range = rn)
		}
	if(log){
		dist[,2] <- log(dist[,2])
		}
	return(dist)
	}


#' @rdname Binomialintersection
#' @param vals A vector of integers specifying the intersection sizes for which probabilities (dhint) or cumulative probabilites (phint) should be computed (can be a single number). If range is NULL (default) then probabilities will be returned over the entire range of possible values.
#' @param log.p Logical. If TRUE, probabilities p are given as log(p). Defaults to FALSE.
#' @param upper.tail Logical. If TRUE, probabilities are P(X >= v), else P(X <= v). Defaults to TRUE.
#' @export
#' @examples 
#' ## Generate cumulative probabilities.
#' pp <- pbint(29, c(15, 8), vals = 5)
#' pp <- pbint(29, c(15, 8), vals = 2, upper.tail = FALSE)
pbint <- function(n, A, vals, upper.tail = TRUE, log.p = FALSE)
	{
	# vals are the values of v for which we want cumulative probabilities.
	dist <- dbint(n, A)
	if(log.p){
		dist[,2] <- log(dist[,2])
		}
	vrange <- .bint.check.params(n, A)
	rn <- intersect(vals, vrange)
	if(length(rn)==0){
		pp <- NULL
		for(i in 1:length(vals)){
			if(vals[i] < min(vrange)){
				if(upper.tail){
					pp[i] <- 1
				}else{
					pp[i] <- 0
					}
			}else{
				if(upper.tail){
					pp[i] <- 0
				}else{
					pp[i] <- 1
					}
				}
			}
		pval <- data.frame(v=vals, cum.p=rep(pp, length(vals)))
	}else{
		pv <- NULL
		for(i in 1:length(rn)){
			if(upper.tail){
				inds <- which(dist[,1]==rn[i]):nrow(dist)
			}else{
				inds <- 1:which(dist[,1]==rn[i])
				}
			pv[i] <- sum(dist[inds, 2])
			}
		pval <- data.frame(v=rn, cum.p=pv)
		}
	if(log.p){
		pval[,2] <- log(pval[,2])
		}
	return(pval)
	}


#' @rdname Binomialintersection
#' @param p A probability between 0 and 1.
#' @export
#' @examples 
#' ## Extract quantiles:
#' qq <- qbint(0.15, 23, c(12, 10))
qbint <- function(p, n, A, upper.tail = TRUE, log.p = FALSE)
	{
	# p is a probability.
	if(!p>=0 || !p<=1){
		stop("p must be between 0 and 1\n", call. = FALSE)
		}
	vrange <- .bint.check.params(n, A)
	vals <- vrange
	dist <- dbint(n, A, range=vals)
	pvals <- pbint(n, A, upper.tail=upper.tail, vals=vals)
	inds <- which(pvals[,2]<=p)
	pv <- sum(dist[inds, 2])
	qq <- pvals[max(inds),1]
	if(log.p){
		pv <- log(pv)
		}
	ret <- data.frame(v=qq, cum.p=pv)
	return(ret)
	}


#' @rdname Binomialintersection
#' @param num An integer specifying the number of random numbers to generate. Defaults to 5.
#' @export
#' @examples 
#' ## Generate random samples from Binomial intersection distributions.
#' rr <- rbint(num = 10, 18, c(9, 14))
rbint <- function(num = 5, n, A)
	{
	vrange <- .bint.check.params(n, A)
	probs <- dbint(n, A, range=vrange)
	samp <- sample(vrange, num, prob = probs[,2], replace = TRUE)
	return(samp)
	}


# Worker functions. #

# Binomial approximation for large n.
.bint.multi.N <- function(n, A, range)
	{
	ll <- list(); P <- NULL
	for(i in 1:length(A)){
		ll[[i]] <- A[i]
		P <- append(P, (A[i]/n))
		}
	b <- Reduce(min, ll)
	P <- P[-which(P==(b/n))[1]]
	dist <- NULL
	for(v in range){
		qs <- choose(b,v)*((1-prod(P))^(b-v))*(prod(P))^v		
		
		dist <- append(dist, qs)
		}
	ret <- data.frame(v=range, p=dist)
	return(ret)
	}

Any scripts or data that you put into this service are public.

hint documentation built on Feb. 2, 2022, 5:10 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

hint
Tools for Hypothesis Testing Based on Hypergeometric Intersection Distributions

R/bint_dist.R
In hint: Tools for Hypothesis Testing Based on Hypergeometric Intersection Distributions

Defines functions .bint.multi.N rbint qbint pbint dbint .bint.check.params

Documented in dbint pbint qbint rbint

Try the hint package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hint Tools for Hypothesis Testing Based on Hypergeometric Intersection Distributions

R/bint_dist.R In hint: Tools for Hypothesis Testing Based on Hypergeometric Intersection Distributions

Defines functions .bint.multi.N rbint qbint pbint dbint .bint.check.params

Documented in dbint pbint qbint rbint

Try the hint package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hint
Tools for Hypothesis Testing Based on Hypergeometric Intersection Distributions

R/bint_dist.R
In hint: Tools for Hypothesis Testing Based on Hypergeometric Intersection Distributions