Nothing
#' Calculate Qn
#'
#' \code{Qn} calculates \eqn{Q_n}, the posterior probability that \eqn{\lambda}
#' came from the first component of the mixture, given \emph{N = n} (Eq. 6,
#' DuMouchel 1999). \eqn{Q_n} is the mixture fraction for the posterior
#' distribution.
#'
#' @param theta_hat A numeric vector of hyperparameter estimates (likely from
#' \code{\link{autoHyper}} or from directly minimizing
#' \code{\link{negLLsquash}}) ordered as:
#' \eqn{\alpha_1, \beta_1, \alpha_2, \beta_2, P}.
#' @param N A whole number vector of actual counts from
#' \code{\link{processRaw}}.
#' @param E A numeric vector of expected counts from \code{\link{processRaw}}.
#' @return A numeric vector of probabilities.
#'
#' @details The hyperparameter estimates (\code{theta_hat}) are:
#' \itemize{
#' \item{\eqn{\alpha_1, \beta_1}: }{Parameter estimates of the first
#' component of the prior distribution}
#' \item{\eqn{\alpha_2, \beta_2}: }{Parameter estimates of the second
#' component}
#' \item{\eqn{P}: }{Mixture fraction estimate of the prior distribution}
#' }
#'
#' @examples
#' data.table::setDTthreads(2) #only needed for CRAN checks
#' theta_init <- data.frame(
#' alpha1 = c(0.5, 1),
#' beta1 = c(0.5, 1),
#' alpha2 = c(2, 3),
#' beta2 = c(2, 3),
#' p = c(0.1, 0.2)
#' )
#' data(caers)
#' proc <- processRaw(caers)
#' squashed <- squashData(proc, bin_size = 300, keep_pts = 10)
#' squashed <- squashData(squashed, count = 2, bin_size = 13, keep_pts = 10)
#' theta_hat <- autoHyper(data = squashed, theta_init = theta_init)$estimates
#' qn <- Qn(theta_hat, N = proc$N, E = proc$E)
#' head(qn)
#'
#' @references DuMouchel W (1999). "Bayesian Data Mining in Large Frequency
#' Tables, With an Application to the FDA Spontaneous Reporting System."
#' \emph{The American Statistician}, 53(3), 177-190.
#' @family posterior distribution functions
#' @keywords openEBGM
#' @seealso \code{\link{autoHyper}}, \code{\link{exploreHypers}},
#' \code{\link{negLLsquash}}, \code{\link{negLL}},
#' \code{\link{negLLzero}}, and \code{\link{negLLzeroSquash}} for
#' hyperparameter estimation.
#' @seealso \code{\link{processRaw}} for finding counts.
#' @importFrom stats dnbinom
#' @export
Qn <- function(theta_hat, N, E) {
.checkInputs_Qn(theta_hat, N, E)
prob_f1 <- theta_hat[2] / (theta_hat[2] + E) #beta1 / (beta1 + E)
prob_f2 <- theta_hat[4] / (theta_hat[4] + E) #beta2 / (beta2 + E)
f1_NB <- dnbinom(N, size = theta_hat[1], prob = prob_f1) #Eq.5 (1999)
f2_NB <- dnbinom(N, size = theta_hat[3], prob = prob_f2)
P_hat <- theta_hat[5]
Qn_num <- P_hat * f1_NB
Qn_den <- (P_hat * f1_NB) + ((1 - P_hat) * f2_NB)
Qn_num / Qn_den #Eq. 6 (1999)
}
#' Calculate EBGM scores
#'
#' \code{ebgm} calculates the Empirical Bayes Geometric Mean (\emph{EBGM}),
#' which is \sQuote{the geometric mean of the empirical Bayes posterior
#' distribution of the \dQuote{true} \emph{RR}} (DuMouchel 1999, see Eq.11). The
#' \emph{EBGM} is essentially a version of the relative reporting ratio
#' (\emph{RR}) that uses Bayesian shrinkage.
#'
#' @inheritParams Qn
#' @param qn A numeric vector of posterior probabilities that \eqn{\lambda} came
#' from the first component of the mixture, given \emph{N = n} (i.e., the
#' mixture fraction). See function \code{\link{Qn}}.
#' @param digits A scalar whole number that determines the number of decimal
#' places used when rounding the results.
#' @return A numeric vector of EBGM scores.
#'
#' @details The hyperparameter estimates (\code{theta_hat}) are:
#' \itemize{
#' \item{\eqn{\alpha_1, \beta_1}: }{Parameter estimates of the first
#' component of the prior distribution}
#' \item{\eqn{\alpha_2, \beta_2}: }{Parameter estimates of the second
#' component}
#' \item{\eqn{P}: }{Mixture fraction estimate of the prior distribution}
#' }
#'
#' @examples
#' data.table::setDTthreads(2) #only needed for CRAN checks
#' theta_init <- data.frame(
#' alpha1 = c(0.5, 1),
#' beta1 = c(0.5, 1),
#' alpha2 = c(2, 3),
#' beta2 = c(2, 3),
#' p = c(0.1, 0.2)
#' )
#' data(caers)
#' proc <- processRaw(caers)
#' squashed <- squashData(proc, bin_size = 300, keep_pts = 10)
#' squashed <- squashData(squashed, count = 2, bin_size = 13, keep_pts = 10)
#' theta_hat <- autoHyper(data = squashed, theta_init = theta_init)$estimates
#' qn <- Qn(theta_hat, N = proc$N, E = proc$E)
#' proc$EBGM <- ebgm(theta_hat, N = proc$N, E = proc$E, qn = qn)
#' head(proc)
#'
#' @references DuMouchel W (1999). "Bayesian Data Mining in Large Frequency
#' Tables, With an Application to the FDA Spontaneous Reporting System."
#' \emph{The American Statistician}, 53(3), 177-190.
#' @family posterior distribution functions
#' @keywords openEBGM
#' @seealso \code{\link{autoHyper}}, \code{\link{exploreHypers}},
#' \code{\link{negLLsquash}}, \code{\link{negLL}},
#' \code{\link{negLLzero}}, and \code{\link{negLLzeroSquash}} for
#' hyperparameter estimation.
#' @seealso \code{\link{processRaw}} for finding counts.
#' @seealso \code{\link{Qn}} for finding mixture fractions.
#' @export
ebgm <- function(theta_hat, N, E, qn, digits = 2) {
.checkInputs_ebgm(theta_hat, N, E, qn, digits)
# If X ~ gamma(alpha, beta), then E[X] = alpha / beta
# Also, E[ln(X)] = digamma(alpha) - ln(beta)
expectation1 <- digamma(theta_hat[1] + N) - log(theta_hat[2] + E) #Eq.9
expectation2 <- digamma(theta_hat[3] + N) - log(theta_hat[4] + E)
exp_log_lamda <- (qn * expectation1) + ((1 - qn) * expectation2) #Eq.9 (1999)
EBlog2 <- exp_log_lamda / log(2) #Eq.10 (1999)
EBGM <- 2 ^ EBlog2 #Eq.11 (1999)
round(EBGM, digits)
}
#' Find quantiles of the posterior distribution
#'
#' \code{quantBisect} finds the desired quantile of the posterior distribution
#' using the bisection method. Used to create credibility limits.
#'
#' @inheritParams Qn
#' @inheritParams ebgm
#' @param percent A numeric scalar between 1 and 99 for the desired
#' percentile (e.g., 5 for 5th percentile).
#' @param limits A whole number vector of length 2 for the upper and lower
#' bounds of the search space.
#' @param max_iter A whole number scalar for the maximum number of iterations.
#' Used to prevent infinite loops.
#' @return A numeric vector of quantile estimates.
#'
#' @details The hyperparameter estimates (\code{theta_hat}) are:
#' \itemize{
#' \item{\eqn{\alpha_1, \beta_1}: }{Parameter estimates of the first
#' component of the prior distribution}
#' \item{\eqn{\alpha_2, \beta_2}: }{Parameter estimates of the second
#' component}
#' \item{\eqn{P}: }{Mixture fraction estimate of the prior distribution}
#' }
#' @details Although this function can find any quantile of the posterior
#' distribution, it will often be used to calculate the 5th and 95th
#' percentiles to create a 90\% credibility interval.
#' @details The quantile is calculated by solving for \eqn{x} in the general
#' equation \eqn{F(x) = cutoff}, or equivalently, \eqn{F(x) - cutoff = 0},
#' where \eqn{F(x)} is the cumulative distribution function of the posterior
#' distribution and \eqn{cutoff} is the appropriate cutoff level (e.g., 0.05
#' for the 5th percentile).
#' @section Warning:
#' The \code{digits} argument determines the tolerance for the bisection
#' algorithm. The more decimal places you want returned, the longer the run
#' time.
#'
#' @examples
#' data.table::setDTthreads(2) #only needed for CRAN checks
#' theta_init <- data.frame(
#' alpha1 = c(0.5, 1),
#' beta1 = c(0.5, 1),
#' alpha2 = c(2, 3),
#' beta2 = c(2, 3),
#' p = c(0.1, 0.2)
#' )
#' data(caers)
#' proc <- processRaw(caers)
#' squashed <- squashData(proc, bin_size = 300, keep_pts = 10)
#' squashed <- squashData(squashed, count = 2, bin_size = 13, keep_pts = 10)
#' theta_hat <- autoHyper(data = squashed, theta_init = theta_init)$estimates
#' qn <- Qn(theta_hat, N = proc$N, E = proc$E)
#' proc$QUANT_05 <- quantBisect(percent = 5, theta = theta_hat, N = proc$N,
#' E = proc$E, qn = qn)
#' \dontrun{proc$QUANT_95 <- quantBisect(percent = 95, theta = theta_hat,
#' N = proc$N, E = proc$E, qn = qn)}
#' head(proc)
#'
#' @family posterior distribution functions
#' @seealso \url{https://en.wikipedia.org/wiki/Bisection_method}
#' @keywords openEBGM
#' @seealso \code{\link{autoHyper}}, \code{\link{exploreHypers}},
#' \code{\link{negLLsquash}}, \code{\link{negLL}},
#' \code{\link{negLLzero}}, and \code{\link{negLLzeroSquash}} for
#' hyperparameter estimation.
#' @seealso \code{\link{processRaw}} for finding counts.
#' @seealso \code{\link{Qn}} for finding mixture fractions.
#' @importFrom stats pgamma
#' @export
quantBisect <- function(percent, theta_hat, N, E, qn, digits = 2,
limits = c(-100000, 100000), max_iter = 2000) {
.checkInputs_quantBisect(percent, theta_hat, N, E, qn, digits, limits, max_iter)
cdf_error <- function(guess, cut_point, qn, theta_hat, N, E) {
# Difference between F(x) & the "cut point" (e.g. 0.05 for 5th percentile)
cdf1 <- pgamma(guess, shape = theta_hat[1] + N, rate = theta_hat[2] + E)
cdf2 <- pgamma(guess, shape = theta_hat[3] + N, rate = theta_hat[4] + E)
post_cdf <- (qn * cdf1) + ((1 - qn) * cdf2)
post_cdf - cut_point
}
tol <- 0.5 * (10 ^ (-digits))
cut_point <- percent / 100
num_pts <- length(N)
guess_init <- rep(1, num_pts) #null hypothesis: RR = 1
error_init <- cdf_error(guess_init, cut_point, qn, theta_hat, N, E)
is_too_big <- error_init > 0
left <- rep(limits[1], num_pts)
right <- rep(limits[2], num_pts)
left <- ifelse(is_too_big, left, guess_init)
right <- ifelse(is_too_big, guess_init, right)
iter_count <- 1L
while (iter_count <= max_iter) {
mid_pt <- (left + right) / 2
error <- cdf_error(mid_pt, cut_point, qn, theta_hat, N, E)
if (max((right - left) / 2) < tol) {
quantiles <- round(mid_pt, digits)
if (max(quantiles) == limits[2]) {
stop("increase maximum for 'limits'")
}
return(quantiles)
} else {
iter_count <- iter_count + 1
error_left <- cdf_error(left, cut_point, qn, theta_hat, N, E)
left <- ifelse(sign(error_left) == sign(error), mid_pt, left)
right <- ifelse(sign(error_left) == sign(error), right, mid_pt)
}
}
stop("failed to converge -- try adjusting 'limits' or 'max_iter'")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.