#' Calculate a normal distribution confidence interval
#'
#' @description Calculate a normal distribution confidence interval.
#'
#' @param interval A scalar between 0 and 1, indicating the width of the
#' interval. For example, use `0.95` to calculate a 95% confidence interval.
#' @param estimate A numeric vector containing the point estimates.
#' @param variance A numeric vector containing the variances.
#'
#' @details This function doesn't need to know which type of epidemiological
#' measure has been passed as `estimate` (e.g., rate, risk). It also assumes
#' that the values of the input arguments `estimate` and `variance` are aligned
#' and that the vectors are the same length.
#'
#' This function is used to construct confidence intervals in [get_spec_rt()] and
#' [get_ds_rt()]. In general, *the normal distribution should not be used to construct
#' confidence intervals when the standard deviation is greater than a third of
#' the mean*. The function throws a warning if this is the case.
#'
#' @return A data frame with columns `lower` and `upper`, which contain the lower
#' and upper limits of a confidence interval, respectively.
#'
#' @references \href{http://documentation.sas.com/doc/en/pgmsascdc/9.4_3.4/statug/statug_stdrate_details.htm}{The STDRATE Procedure}
#'
#' @examples
#' \dontrun{
#' # calculate a single confidence interval
#' get_ci_norm(interval = 0.95, estimate = 10, variance = 4)
#'
#' # calculate confidence intervals around multiple estimates
#' get_ci_norm(interval = 0.95, estimate = c(10, 20), variance = c(4, 9))
#'
#' # use columns of a data frame as inputs
#' df <- data.frame(estimate = c(10, 20, 30), variance = c(4, 9, 16))
#'
#' # using dplyr
#' df %>%
#' dplyr::mutate(get_ci_norm(0.975, estimate, variance))
#'
#' # using base
#' get_ci_norm(interval = 0.975, estimate = df$estimate, variance = df$variance)
#' }
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @seealso [get_ci_lnorm()], [get_ci_pois()], [get_ci_gamma()]
#' @export
get_ci_norm <- function(interval, estimate, variance) {
# TODO integrate normal CI into functions that calculate risk, standardized risk, SMR, rate difference, and risk difference
# check validity of inputs
if (!is.numeric(interval)) {
stop("`interval` must be a scalar between 0 and 1")
}
if (length(interval) != 1) {
stop("`interval` must be a scalar between 0 and 1")
}
if (interval < 0 | 1 < interval) {
stop("`interval` must be a scalar between 0 and 1")
}
if (!is.numeric(estimate)) {
stop("`estimate` must be numeric")
}
if (!is.numeric(variance)) {
stop("`variance` must be numeric and greater than or equal to 0")
}
if (!all(variance >= 0, na.rm = TRUE)) {
stop("`variance` must be numeric and greater than or equal to 0")
}
if (length(estimate) != length(variance)) {
stop("length of `variance` must be equal to length of `estimate`")
}
# define alpha
alpha <- 1 - interval
# check if distribution should be avoided
sd <- sqrt(variance)
threshold <- (1 - 0.997) / 2
mass_below_0 <- stats::pnorm(0, mean = estimate, sd = sd)
if (any(mass_below_0 > threshold, na.rm = TRUE)) {
warning("more than 0.15% of the probability mass lies below 0 for at least one of the estimates, consider using a different probability distribution")
}
# follow SAS implementation
result <- data.frame(
lower = estimate - stats::qnorm(p = 1 - (alpha / 2), mean = 0, sd = 1) * sd,
upper = estimate + stats::qnorm(p = 1 - (alpha / 2), mean = 0, sd = 1) * sd
) %>%
dplyr::select(.data$lower, .data$upper)
# result <- purrr::map2(estimate, sqrt(variance), ~ stats::qnorm(c((alpha/2), 1 - (alpha/2)), .x, .y)) %>%
# data.frame() %>%
# data.table::transpose() %>%
# dplyr::rename(lower = 1, upper = 2)
return(result)
}
#' Calculate a log normal distribution confidence interval
#'
#' @description Calculate a log normal distribution confidence interval.
#'
#' @param interval A scalar between 0 and 1, indicating the width of the
#' interval. For example, use `0.95` to calculate a 95% confidence interval.
#' @param estimate A numeric vector containing the point estimates.
#' @param variance_log A numeric vector containing the variances of the
#' log-transformed `estimate`.
#'
#' @details This function doesn't need to know which type of epidemiological
#' measure has been passed as `estimate` (e.g., rate, risk). It also assumes
#' that the values of the input arguments `estimate` and `variance_log` are aligned
#' and that the vectors are the same length.
#'
#' This function is used to construct confidence intervals in [get_spec_rt()] and
#' [get_ds_rt()].
#'
#' @return A data frame with columns `lower` and `upper`, which contain the lower
#' and upper limits of a confidence interval, respectively.
#'
#' @references \href{http://documentation.sas.com/doc/en/pgmsascdc/9.4_3.4/statug/statug_stdrate_details.htm}{The STDRATE Procedure}
#'
#' @examples
#' \dontrun{
#' # calculate a single confidence interval
#' get_ci_lnorm(interval = 0.95, estimate = 0.2, variance_log = 0.05)
#'
#' # calculate confidence intervals around multiple estimates
#' get_ci_lnorm(interval = 0.95, estimate = c(0.2, 0.4), variance_log = c(0.05, 0.025))
#'
#' # use columns of a data frame as inputs
#' df <- data.frame(estimate = c(0.2, 0.4), variance_log = c(0.05, 0.025))
#'
#' # using dplyr
#' df %>%
#' dplyr::mutate(get_ci_lnorm(0.975, estimate, variance_log))
#'
#' # using base
#' get_ci_lnorm(interval = 0.975, estimate = df$estimate, variance_log = df$variance_log)
#' }
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @seealso [get_ci_norm()], [get_ci_pois()], [get_ci_gamma()]
#' @export
get_ci_lnorm <- function(interval, estimate, variance_log) {
# TODO integrate log normal CI into functions that calculate risk, standardized risk, SMR, rate ratio, and risk ratio
# check validity of inputs
if (!is.numeric(interval)) {
stop("`interval` must be a scalar between 0 and 1")
}
if (length(interval) != 1) {
stop("`interval` must be a scalar between 0 and 1")
}
if (interval < 0 | 1 < interval) {
stop("`interval` must be a scalar between 0 and 1")
}
if (!is.numeric(estimate)) {
stop("`estimate` must be numeric")
}
if (!is.numeric(variance_log)) {
stop("`variance_log` must be numeric and greater than or equal to 0")
}
if (!all(variance_log >= 0, na.rm = TRUE)) {
stop("`variance_log` must be numeric and greater than or equal to 0")
}
if (length(estimate) != length(variance_log)) {
stop("length of `variance_log` must be equal to length of `estimate`")
}
# define alpha
alpha <- 1 - interval
# follow SAS implementation
result <- data.frame(
lower = estimate * exp(-(stats::qnorm(p = 1 - (alpha / 2), mean = 0, sd = 1) * sqrt(variance_log))),
upper = estimate * exp(stats::qnorm(p = 1 - (alpha / 2), mean = 0, sd = 1) * sqrt(variance_log))
)
return(result)
}
#' Calculate a Poisson distribution confidence interval
#'
#' @description Calculate a Poisson distribution confidence interval. This
#' function imitates [stats::qpois()] using [stats::qchisq()] to allow for the
#' continuous extension of the estimate.
#'
#' @param interval A scalar between 0 and 1, indicating the width of the
#' interval. For example, use `0.95` to calculate a 95% confidence interval.
#' @param x A numeric vector of positive integers. It can contain the
#' stratum-specific number of events or the observed number of events in the
#' study population.
#' @param y A vector of positive numbers. It can contain the population-time for
#' each stratum or the expected number of events. The default value is `NULL`.
#'
#' @details When working with rates, pass the stratum-specific
#' number of events as `x` and the population-time for each stratum as `y`.
#' When working with standardized morbidity/mortality ratios, pass the observed
#' number of events as `x` and the expected number of events as `y`. Ensure
#' that the values of `x` and `y` are aligned and that the vectors are the same
#' length.
#'
#' To calculate a confidence interval around count data, pass a vector
#' of counts as `x`.
#'
#' This function is used to construct confidence intervals in [get_spec_rt()].
#'
#' @return A data frame with columns `lower` and `upper`, which contain the lower
#' and upper limits of a confidence interval, respectively.
#'
#' @examples
#' \dontrun{
#' # calculate a single confidence interval
#' get_ci_pois(interval = 0.95, x = 49, y = 52) # rate
#' get_ci_pois(interval = 0.95, x = 11) # count
#'
#' # use columns of a data frame as inputs
#' df <- data.frame(x = c(24, 49), y = c(94, 52))
#'
#' # using dplyr
#' df %>%
#' dplyr::mutate(get_ci_pois(0.95, x, y))
#'
#' # using base
#' get_ci_pois(0.95, df$x, df$y)
#' }
#'
#' @references \href{http://documentation.sas.com/doc/en/pgmsascdc/9.4_3.4/statug/statug_stdrate_details.htm}{The STDRATE Procedure}
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @seealso [get_ci_norm()], [get_ci_lnorm()], [get_ci_gamma()]
#' @export
get_ci_pois <- function(interval, x, y = NULL) {
# TODO integrate Poisson CI into function that calculates SMR
# check validity of inputs
if (!is.numeric(interval)) {
stop("`interval` must be a scalar between 0 and 1")
}
if (length(interval) != 1) {
stop("`interval` must be a scalar between 0 and 1")
}
if (interval < 0 | 1 < interval) {
stop("`interval` must be a scalar between 0 and 1")
}
if (!is.numeric(x)) {
stop("`x` must be a numeric vector of positive integers")
}
if (!all(x %% 1 == 0, na.rm = TRUE)) {
stop("`x` must be a numeric vector of positive integers")
}
if (!all(x > 0, na.rm = TRUE)) {
stop("`x` must be a numeric vector of positive integers")
}
if (!is.null(y)) {
if (!is.numeric(y)) {
stop("`y` must be a numeric vector of positive numbers")
}
if (!all(y > 0, na.rm = TRUE)) {
stop("`y` must be a numeric vector of positive numbers")
}
if (length(x) != length(y)) {
stop("length of `y` is not compatible with that of `x`")
}
} else {
# `x` is count data so recycle `y` to match length of `x`
y <- 1
}
# determine quantiles of interest
alpha <- 1 - interval
q_upper <- 1 - (alpha / 2)
q_lower <- alpha / 2
result <- data.frame(x = x, y = y) %>%
dplyr::mutate(
lower = stats::qchisq(q_lower, df = 2 * x) / (2 * y),
upper = stats::qchisq(q_upper, df = 2 * (x + 1)) / (2 * y)
) %>%
dplyr::select(.data$lower, .data$upper)
return(result)
}
#' Calculate a gamma distribution confidence interval
#'
#' @description Derive an approximate gamma distribution confidence interval
#' using [stats::qchisq()].
#'
#' @param interval A scalar between 0 and 1, indicating the width of the
#' interval. For example, use `0.95` to calculate a 95% confidence interval.
#' @param estimate A numeric vector containing the point estimates.
#' @param weights A list of numeric vectors where each vector contains the
#' weights of each strata in the directly standardized rate calculation.
#' @param variance A numeric vector containing the variances.
#' @param method A string to indicate which method to use to calculate the
#' confidence interval. The default is `"tcz06"`, which refers to the method
#' proposed by Tiwari, Clegg, and Zou (2006). Use `"ff97"` for a more
#' conservative (wider) confidence interval, as proposed by Fay and Feuer (1997).
#'
#' @details This function assumes that the values of the input arguments
#' `estimate`, `weights`, and `variance` are aligned and that they are the same
#' length.
#'
#' This function is used to construct confidence intervals in [get_ds_rt()].
#'
#' @return A data frame with columns `lower` and `upper`, which contain the lower
#' and upper limits of a confidence interval, respectively.
#'
#' @references \href{http://documentation.sas.com/doc/en/pgmsascdc/9.4_3.4/statug/statug_stdrate_details.htm}{The STDRATE Procedure}
#'
#' @examples
#' \dontrun{
#' # calculate a single confidence interval
#' e_1 <- 0.015
#' w_1 <- c(
#' 3.7e-05, 2.2e-05, 2.0e-05, 1.2e-05, 2.3e-05,
#' 7.0e-05, 3.8e-05, 6.0e-05, 1.7e-05, 2.6e-05
#' )
#' v_1 <- 1e-06
#'
#' get_ci_gamma(interval = 0.95, estimate = e_1, weights = list(w_1), variance = v_1)
#' get_ci_gamma(interval = 0.9, estimate = e_1, weights = list(w_1), variance = v_1, method = "ff97")
#'
#' # calculate confidence intervals around multiple estimates
#' e_2 <- 0.004
#' w_2 <- c(
#' 3.4e-06, 2.0e-06, 1.8e-06, 1.1e-06, 2.1e-06,
#' 6.3e-06, 3.5e-06, 5.4e-06, 1.5e-06, 2.3e-06
#' )
#' v_2 <- 1e-08
#'
#' get_ci_gamma(
#' interval = 0.9, estimate = c(e_1, e_2),
#' weights = list(w_1, w_2), variance = c(v_1, v_2)
#' )
#' }
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @seealso [get_ci_norm()], [get_ci_lnorm()], [get_ci_pois()]
#' @export
get_ci_gamma <- function(interval, estimate, weights, variance, method = "tcz06") {
# check validity of inputs
if (!is.numeric(interval)) {
stop("`interval` must be a scalar between 0 and 1")
}
if (length(interval) != 1) {
stop("`interval` must be a scalar between 0 and 1")
}
if (interval < 0 | 1 < interval) {
stop("`interval` must be a scalar between 0 and 1")
}
if (!is.numeric(estimate)) {
stop("`estimate` must be numeric")
}
if (!is.list(weights)) {
stop("`weights` must be a list of numeric vectors")
}
if (!all(sapply(weights, is.numeric))) {
stop("`weights` must be a list of numeric vectors")
}
if (!is.numeric(variance)) {
stop("`variance` must be numeric and greater than or equal to 0")
}
if (!all(variance >= 0, na.rm = TRUE)) {
stop("`variance` must be numeric and greater than or equal to 0")
}
if (stringr::str_detect(method,
stringr::regex("(^ff97)$|^(tcz06)", ignore_case = TRUE),
negate = TRUE
)) {
stop("`method` must be either 'tcz06' or 'ff97'")
}
if (length(unique(purrr::map_dbl(list(estimate, weights, variance), length))) != 1) {
stop("length of `estimate`, `weights`, and `variance` must be the same")
}
# determine quantiles of interest
alpha <- 1 - interval
q_lower <- alpha / 2
q_upper <- 1 - (alpha / 2)
dt <- data.table::data.table(estimate = estimate, variance = variance, weights = weights)
# define variables specific to method chosen by user
if (method == "tcz06") { # default
dt <- dt %>%
dplyr::mutate(
w = purrr::map_dbl(weights, mean), # w_j
w_sq = purrr::map(weights, ~ .x**2) %>%
purrr::map_dbl(mean)
) # w_j**2
} else if (method == "ff97") { # less conservative method
dt <- dt %>%
dplyr::mutate(
w = purrr::map_dbl(weights, max), # w_x
w_sq = .data$w**2
) # w_x**2
}
# construct confidence interval
dt <- dt %>%
dplyr::mutate(
df_lower = (2 * (estimate**2)) / variance,
df_upper = (2 * ((estimate + .data$w)**2)) / (variance + .data$w_sq),
lower = (variance / (2 * estimate)) * stats::qchisq(q_lower, .data$df_lower),
upper = ((variance + .data$w_sq) / (2 * (estimate + .data$w))) * stats::qchisq(q_upper, .data$df_upper)
) %>%
dplyr::select(.data$lower, .data$upper) %>%
as.data.frame() # coerce back to data.frame
return(dt)
}
#' Use regex to detect a probability distribution name
#'
#' @param dist A string provided by the user
#'
#' @return One of "normal", "lognormal", "poisson", or "gamma". If there is no
#' match, an error is thrown.
#'
#' @keywords internal
get_dist_name <- function(dist) {
if (stringr::str_detect(dist, stringr::regex("^normal$", ignore_case = TRUE))) {
name <- "normal"
} else if (stringr::str_detect(dist, stringr::regex("^log(-|\\s)?normal$", ignore_case = TRUE))) {
name <- "lognormal"
} else if (stringr::str_detect(dist, stringr::regex("^poisson$", ignore_case = TRUE))) {
name <- "poisson"
} else if (stringr::str_detect(dist, stringr::regex("^gamma$", ignore_case = TRUE))) {
name <- "gamma"
} else {
stop("string does not match name of probability distribution supported by package")
}
return(name)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.