#### Packages --------------------------------
library(dplyr)
#' @title
#' Calculate confidence interval for crude incidence rate
#'
#' @description
#' 7 different methods that I found online for calculating the confidence
#' interval for a crude incidence rate. Note that these are all two sided. Most
#' of these come from https://www.openepi.com and their documentation and formulas.
#'
#' Mid-P exact test seems to be the preferred method
#'
#' + "Mid-P exact test" using Miettinen's (1974d) modification, as described in Epidemiologic Analysis with a Programmable Calculator, 1979.
#' + "Fisher's exact test" based on the formula (Armitage,1971; Snedecor & Cochran,1965) as described in Epidemiologic Analysis with a Programmable Calculator, 1979.
#' + "Exact Poisson is the same as the Fisher's exact but calculated differently.
#' + "Normal approximation" to the Poisson distribution as described by Rosner, Fundamentals of Biostatistics (5th Ed).
#' + "Byar approx. Poisson" as described in Rothman and Boice, Epidemiologic Analysis with a Programmable Calculator, 1979.
#' + "Rothman/Greenland" as described in Rothman and Greenland, Modern Epidemiology (2nd Ed).
#' + "CCRB" comes from the follwing website and they do not offer documentation for their methods: http://www2.ccrb.cuhk.edu.hk/stat/confidence%20interval/CI%20for%20single%20rate.htm
#'
#' @param a Number of clinical events
#' @param N Person-time at risk
#' @param pt_units Factor to multiply rate by to get Person-time; default is 100.
#' @param alpha Significance level for two-sided confidence interval
#' @param interval a vector containing the end-points of the interval to be
#' searched for the root. The function `base::uniroot()` is used to solve for
#' some confidence intervals iteratively.
#'
#' @references
#' http://epid.blogspot.com/2012/08/how-to-calculate-confidence-interval-of.html
#' https://www.openepi.com/PDFDocs/PersonTime1Doc.pdf
#' https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm
#'
#' @importFrom dplyr bind_rows
#' @importFrom dplyr mutate_at
#' @importFrom dplyr vars
#' @importFrom stats qchisq
#' @importFrom stats uniroot
#' @importFrom tibble tibble
#'
#'
#' @return
#' A tbl_df
#'
#' @export
#'
#' @examples
#' # options(pillar.sigfig = 3)
#' ir_calc(a = 18,
#' N = 352 + 10.5,
#' alpha = 0.05)
#'
#' #### From https://www.openepi.com --------------------------------
#'
#' # Person-Time Rate and 95% Confidence Intervals
#' # Per 100 Person-Time Units
#' # Number of cases: 18
#' # Person-Time: 362.5
#' #
#' # Lower CL Rate Upper CL
#' # Mid-P exact test 3.035 4.966 7.696
#' # Fisher's exact test 2.943 7.848
#' # Normal approximation 2.672 7.259
#' # Byar approx. Poisson 2.941 7.848
#' # Rothman/Greenland 3.129 7.881
ir_calc <- function(a,
N,
pt_units = 100,
alpha = 0.05,
interval = c(0, 10000000)) {
# options(pillar.sigfig = 6)
dplyr::bind_rows(
ir_calc_mid_p(a, N, pt_units, alpha, interval),
ir_calc_fisher(a, N, pt_units, alpha, interval),
ir_calc_exact_poisson(a, N, pt_units, alpha),
ir_calc_normal(a, N, pt_units, alpha),
ir_calc_byar(a, N, pt_units, alpha),
ir_calc_roth_green(a, N, pt_units, alpha),
ir_calc_ccrb(a, N, pt_units, alpha),
)
}
#### Helper functions --------------------------------
## Mid-P exact test ----------------
ir_calc_mid_p <- function(a,
N,
pt_units = 100,
alpha = 0.05,
interval = c(0, 10000000)) {
# https://www.openepi.com/Menu/OE_Menu.htm
# https://cran.r-project.org/web/packages/exact2x2/vignettes/midpAdjustment.pdf
k <- 0:(a - 1)
lower_bound <- function(x) {
(1 / 2) * exp(-x) * (x ^ a) / factorial(a) +
sum(exp(-x) * (x ^ k) / factorial(k)) - (1 - alpha / 2)
}
upper_bound <- function(x) {
(1 / 2) * exp(-x) * (x ^ a) / factorial(a) +
sum(exp(-x) * (x ^ k) / factorial(k)) - (alpha / 2)
}
tibble::tibble(
method = "Mid-P exact test",
number_of_cases = a,
person_time = N,
rate = a / N,
se = NA_real_,
lower_ci = uniroot(lower_bound,
interval = interval)$root / N,
upper_ci = uniroot(upper_bound,
interval = interval)$root / N
) %>%
mutate_at(.vars = dplyr::vars(rate,
lower_ci,
upper_ci),
.funs = list(~ pt_units * .))
}
# ir_calc_mid_p(a = 18,
# N = 352 + 10.5,
# alpha = 0.05)
## Fisher's exact test ----------------
# Turns out to be the same as the Exact Poisson...
ir_calc_fisher <- function(a,
N,
pt_units = 100,
alpha = 0.05,
interval = c(0, 10000000)) {
k_l <- 0:(a - 1)
k_u <- 0:a
lower_bound <- function(x) {
sum(exp(-x) * (x ^ k_l) / factorial(k_l)) - (1 - alpha / 2)
}
upper_bound <- function(x) {
sum(exp(-x) * (x ^ k_u) / factorial(k_u)) - (alpha / 2)
}
tibble::tibble(
method = "Fisher's exact test",
number_of_cases = a,
person_time = N,
rate = a / N,
se = NA_real_,
lower_ci = uniroot(lower_bound,
interval = interval)$root / N,
upper_ci = uniroot(upper_bound,
interval = interval)$root / N
) %>%
mutate_at(.vars = dplyr::vars(rate,
lower_ci,
upper_ci),
.funs = list(~ pt_units * .))
}
# ir_calc_fisher(a = 18,
# N = 352 + 10.5,
# alpha = 0.05)
## Exact Poisson ----------------
ir_calc_exact_poisson <- function(a,
N,
pt_units = 100,
alpha = 0.05) {
# http://www2.ccrb.cuhk.edu.hk/stat/confidence%20interval/CI%20for%20single%20rate.htm
# https://www.statsdirect.com/help/rates/poisson_rate_ci.htm
# https://www.openepi.com/PersonTime1/PersonTime1.htm
# https://www.openepi.com/PDFDocs/ProportionDoc.pdf
# http://epid.blogspot.com/2012/08/how-to-calculate-confidence-interval-of.html
# Ulm, 1990
tibble::tibble(
method = "Exact Poisson",
number_of_cases = a,
person_time = N,
rate = a / N,
lower_ci = qchisq(p = alpha / 2,
df = 2 * a) / 2,
upper_ci = qchisq(p = 1 - alpha / 2,
df = 2 * (a + 1)) / 2
) %>%
mutate(lower_ci = lower_ci / N,
upper_ci = upper_ci / N) %>%
mutate_at(.vars = dplyr::vars(rate,
lower_ci,
upper_ci),
.funs = list(~ pt_units * .))
}
# ir_calc_exact_poisson(a = 18,
# N = 352 + 10.5,
# alpha = 0.05)
## Normal Approximation ----------------
ir_calc_normal <- function(a,
N,
pt_units = 100,
alpha = 0.05) {
# If a is large enough
# http://epid.blogspot.com/2012/08/how-to-calculate-confidence-interval-of.html
z_score <- qnorm(p = 1 - alpha / 2,
mean = 0,
sd = 1,
lower.tail = TRUE)
rate <- a / N
se <- sqrt(a / N ^ 2)
tibble::tibble(
method = "Normal approximation",
number_of_cases = a,
person_time = N,
rate = rate,
se = se,
lower_ci = rate - z_score * se,
upper_ci = rate + z_score * se
) %>%
mutate_at(.vars = dplyr::vars(rate,
lower_ci,
upper_ci),
.funs = list(~ pt_units * .))
}
# ir_calc_normal(a = 18,
# N = 352 + 10.5,
# alpha = 0.05)
## Byar approx. Poisson ----------------
ir_calc_byar <- function(a,
N,
pt_units = 100,
alpha = 0.05) {
z_score <- qnorm(p = 1 - alpha / 2,
mean = 0,
sd = 1,
lower.tail = TRUE)
lower_ci <- a * (1 - 1 / (9 * a) - (z_score / 3) * sqrt(1 / a)) ^ 3
upper_ci <- (a + 1) * (1 - 1 / (9 * (a + 1)) + (z_score / 3) * sqrt(1 / (a + 1))) ^ 3
tibble::tibble(
method = "Byar approx. Poisson",
number_of_cases = a,
person_time = N,
rate = a / N,
se = NA_real_,
lower_ci = lower_ci / N,
upper_ci = upper_ci / N
) %>%
mutate_at(.vars = dplyr::vars(rate,
lower_ci,
upper_ci),
.funs = list(~ pt_units * .))
}
# ir_calc_byar(a = 18,
# N = 352 + 10.5,
# alpha = 0.05)
## Rothman-Greenland ----------------
ir_calc_roth_green <- function(a,
N,
pt_units = 100,
alpha = 0.05) {
z_score <- qnorm(p = 1 - alpha / 2,
mean = 0,
sd = 1,
lower.tail = TRUE)
rate <- a / N
# se <- sqrt((1 - rate) / a)
se <- sqrt(1 / a)
tibble::tibble(
method = "Rothman-Greenland",
number_of_cases = a,
person_time = N,
rate = rate,
se = se,
lower_ci = exp(log(rate) - (z_score * se)),
upper_ci = exp(log(rate) + (z_score * se))
) %>%
mutate_at(.vars = dplyr::vars(rate,
lower_ci,
upper_ci),
.funs = list(~ pt_units * .))
}
# ir_calc_roth_green(a = 18,
# N = 352 + 10.5,
# alpha = 0.05)
## http://www2.ccrb.cuhk.edu.hk/stat/confidence%20interval/CI%20for%20single%20rate.htm ----------------
# http://epid.blogspot.com/2012/08/how-to-calculate-confidence-interval-of.html
# https://stats.stackexchange.com/questions/301777/how-to-calculate-confidence-interval-of-incidence-rate-under-the-poisson-distrib
ir_calc_ccrb <- function(a,
N,
pt_units = 100,
alpha = 0.05) {
# http://www2.ccrb.cuhk.edu.hk/stat/confidence%20interval/CI%20for%20single%20rate.htm
# https://www.statsdirect.com/help/rates/poisson_rate_ci.htm
# They define their standard error as sqrt((1 - r) / a) where r = a / N with
# no further documentation
z_score <- qnorm(p = 1 - alpha / 2,
mean = 0,
sd = 1,
lower.tail = TRUE)
tibble::tibble(
method = "CCRB",
number_of_cases = a,
person_time = N,
rate = a / N,
se = sqrt((1 - rate) / a),
lower_ci = log(rate) - (z_score * se),
upper_ci = log(rate) + (z_score * se)) %>%
mutate_at(.vars = dplyr::vars(lower_ci, upper_ci),
.funs = list(~ exp(.))) %>%
mutate_at(.vars = dplyr::vars(rate,
lower_ci,
upper_ci),
.funs = list(~ pt_units * .))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.