# NOTICE: This file contains code modified from epiR::epi.2by2 version 1.0-2
# between 20 August 2019 and 22 August 2019.
#
# Many of the changes involve abstracting repetative routines into
# functions, splitting nested calculations into separate variables, and
# modifying variable names. The source lines of the original code are
# noted on the relevant sections of code below.
#' calculate ratios (odds/risk) from a 2x2x2 table.
#'
#' @param x a 2x2x2 table with exposure in rows, outcome in columns, and strata in the third dimension
#' @param measure one of "OR", "RR", or "IRR"
#'
#' @return a data frame with crude and stratified ratios
#' @noRd
#'
#' @examples
#'
#' arr <- c(10, 35, 90, 465, 36, 25, 164, 175)
#' arr <- array(arr,
#' dim = c(2, 2, 2),
#' dimnames = list(
#' risk = c(TRUE, FALSE),
#' outcome = c(TRUE, FALSE),
#' old = c(FALSE, TRUE)
#' )
#' )
#' strata_ratio_table(arr)
strata_ratio_table <- function(x, measure = "OR") {
d <- data_frame_from_2x2(x)
if (measure == "OR") {
data.frame(
exp_cases = d$A_exp_cases,
unexp_cases = d$C_unexp_cases,
cases_odds = d$A_exp_cases / d$C_unexp_cases,
exp_controls = d$B_exp_controls,
unexp_controls = d$D_unexp_controls,
controls_odds = d$B_exp_controls / d$D_unexp_controls
)
} else if (measure == "RR") {
# DIM 1: exposure
# DIM 2: outcome (case, control)
# DIM 3: strata
data.frame(
exp_cases = d$A_exp_cases,
exp_total = d$total_exposed,
exp_risk = (d$A_exp_cases / d$total_exposed) * 100,
unexp_cases = d$C_unexp_cases,
unexp_total = d$total_unexposed,
unexp_risk = (d$C_unexp_cases / d$total_unexposed) * 100
)
} else if (measure == "IRR") {
# DIM 1: exposure
# DIM 2: outcome (outcome, person time)
# DIM 3: strata
data.frame(
exp_cases = d$A_exp_cases,
exp_perstime = d$B_exp_controls,
exp_incidence = (d$A_exp_cases / d$B_exp_controls) * 100,
unexp_cases = d$C_unexp_cases,
unexp_perstime = d$D_unexp_controls,
unexp_incidence = (d$C_unexp_cases / d$D_unexp_controls) * 100
)
} else {
stop(glue::glue("the measure {measure} is not recognised"), call. = FALSE)
}
}
#' get ratio estimates from a 2x2x2 array
#'
#' @param x a 2x2x2 array
#' @param measure one of OR, RR, or IRR
#' @param conf the confidence level
#'
#' @return a matrix with three columns, ratio, lower, and upper
#' @noRd
#'
#' @examples
#' arr <- c(10, 35, 90, 465, 36, 25, 164, 175)
#' arr <- array(arr,
#' dim = c(2, 2, 2),
#' dimnames = list(
#' risk = c(TRUE, FALSE),
#' outcome = c(TRUE, FALSE),
#' old = c(FALSE, TRUE)
#' )
#' )
#' get_ratio_est(arr, "OR")
#' get_ratio_est(arr, "RR")
#' get_ratio_est(arr, "IRR")
get_ratio_est <- function(x, measure = "OR", conf = 0.95, strata_name = "strata") {
d <- data_frame_from_2x2(x)
CS <- get_chisq_pval(x)
has_strata <- nrow(d) > 1
n <- 1L
rnames <- "crude"
if (has_strata) {
n <- n + dim(x)[[3]] + 1L
rnames <- c(rnames, strata = glue::glue("{strata_name}: {dimnames(x)[[3]]}"), "MH")
if (measure != "IRR") {
n <- n + 1L
woolf <- get_woolf_pval(x, measure)
rnames <- c(rnames, "woolf")
} else {
woolf <- NULL
}
MH <- get_mh(x, measure, conf)
} else {
MH <- NULL
woolf <- NULL
}
# Risk ratio:
# A / (A + B) exposed cases / all cases
# ----------- = ------------------------------
# C / (C + D) exposed controls / all controls
ratio <- switch(measure,
RR = ratio_est(d$A_exp_cases,
d$total_exposed,
d$C_unexp_cases,
d$total_unexposed,
measure = "RR",
conf = conf
),
# Odds ratio:
# A / B exposed cases / unexposed cases
# ----- = -------------------------------------
# C / D exposed controls / unexposed controls
OR = ratio_est(d$A_exp_cases,
d$C_unexp_cases,
d$B_exp_controls,
d$D_unexp_controls,
measure = "OR",
conf = conf
),
# Incidence Rate Ratio:
# A / B exposed cases / exposed person-time
# ----- = ----------------------------------------
# C / D exposed controls / unexposed person-time
IRR = ratio_est(d$A_exp_cases,
d$B_exp_controls,
d$C_unexp_cases,
d$D_unexp_controls,
measure = "IRR",
conf = conf
),
)
if (has_strata) {
res <- data.frame(
ratio = numeric(n),
lower = numeric(n),
upper = numeric(n),
p.value = numeric(n)
)
na <- function(x, m = measure) if (is.null(x) && m != "IRR") NA else x
res$ratio <- c(ratio[["ratio"]], MH[["ratio"]], na(woolf[["ratio"]]))
res$lower <- c(ratio[["lower"]], MH[["lower"]], na(woolf[["lower"]]))
res$upper <- c(ratio[["upper"]], MH[["upper"]], na(woolf[["upper"]]))
res$p.value <- c(CS[["p.value"]], NA_real_, na(woolf[["p.value"]]))
} else {
ratio$p.value <- CS$p.value
res <- ratio
}
rownames(res) <- rnames
res
}
#' get chi square p-value from a 2D or 3D array
#'
#' @param x a 2x2 or 2x2x2 table
#'
#' @return a 1 or 3x2 matrix with chi square statistics and p-values
#' @noRd
#'
#' @examples
#' arr <- c(10, 35, 90, 465, 36, 25, 164, 175)
#' arr <- array(arr,
#' dim = c(2, 2, 2),
#' dimnames = list(
#' risk = c(TRUE, FALSE),
#' outcome = c(TRUE, FALSE),
#' old = c(FALSE, TRUE)
#' )
#' )
#' get_chisq_pval(arr)
get_chisq_pval <- function(x) {
ndim <- length(dim(x))
n <- if (ndim == 3L) dim(x)[ndim] + 1L else 1L
rnames <- if (n == 3L) dimnames(x)[[3]] else NULL
# res <- matrix(ncol = 2, nrow = n,
# dimnames = list(c("crude", rnames),
# c("statistic", "p.value")))
res <- data.frame(statistic = numeric(n), p.value = numeric(n))
# No matter the dimensions, we can always get the crude p-value
crude <- stats::chisq.test(apply(x, MARGIN = 1:2, FUN = sum), correct = FALSE)
res$statistic[1] <- crude$statistic
res$p.value[1] <- crude$p.value
# if there are strata, we can apply over that dimension and get the statistic
# for each.
if (ndim == 3) {
strat <- apply(x, MARGIN = 3, FUN = stats::chisq.test, correct = FALSE)
for (i in seq_along(strat)) {
res$statistic[i + 1L] <- strat[[i]]$statistic
res$p.value[i + 1L] <- strat[[i]]$p.value
}
}
res
}
#' calculate ratio estimates for odds, risk, and incidence rate ratios
#'
#' @param N1 first numerator
#' @param D1 first denominator
#' @param N2 second numerator
#' @param D2 second denominator
#' @param measure one of "OR", "RR", or "IRR"
#' @param conf confidence level
#'
#' @return a vector containing the ratio estimate, lower bound, and upper bound
#'
#' @note This was adapted from the code in the epiR::epi.2by2 version 1.0-2.
#'
#' OR is from .funORwald (294--308)
#' RR is from .funRRwald (156--171)
#' IRR is from lines 787--797 (no specific function)
#'
#' Many of the changes involve abstracting repetative routines into functions,
#' splitting nested calculations into separate variables, and modifying
#' variable names.
#'
#' @details This function produces odds ratio, risk ratio, and incidence rate
#' ratio estimates with confidence intervals. I have generalized the methods to
#' use as little code duplication as possible.
#'
#' @noRd
ratio_est <- function(N1, D1, N2, D2, measure = "OR", conf = 0.95) {
z <- get_z(conf)
# inverse numbers for the harmonic
iN1 <- 1 / N1
iD1 <- 1 / D1
iN2 <- 1 / N2
iD2 <- 1 / D2
case <- N1 / D1
control <- N2 / D2
ratio <- case / control
log_ratio <- log(ratio)
log_ratio_var <- switch(measure,
RR = (iN1 - iD1) + (iN2 - iD2),
OR = (iN1 + iD1) + (iN2 + iD2),
IRR = iN1 + iN2
)
if (measure == "IRR") {
A <- N1
B <- D1
C <- N2
D <- D2
alpha <- 1 - ((1 - conf) / 2)
A_quantile <- stats::qf(1 - alpha, 2 * A, 2 * C + 2)
pl <- A / (A + (C + 1) * (1 / A_quantile))
C_quantile <- stats::qf(1 - alpha, 2 * C, 2 * A + 2)
ph <- (A + 1) / (A + 1 + (C / (1 / C_quantile)))
lower_limit <- pl * D / ((1 - pl) * B)
upper_limit <- ph * D / ((1 - ph) * B)
} else {
se_limits <- get_ci_from_var(log_ratio, log_ratio_var, z)
lower_limit <- se_limits[["ll"]]
upper_limit <- se_limits[["ul"]]
}
data.frame(ratio, lower = lower_limit, upper = upper_limit)
}
get_z <- function(conf) {
alpha <- 1 - ((1 - conf) / 2)
z <- stats::qnorm(alpha, mean = 0, sd = 1)
z
}
get_ci_from_var <- function(log_ratio, log_ratio_var, z) {
log_ratio_se <- sqrt(log_ratio_var)
lower_limit <- exp(log_ratio - (z * log_ratio_se))
upper_limit <- exp(log_ratio + (z * log_ratio_se))
data.frame(se = log_ratio_se, ll = lower_limit, ul = upper_limit)
}
# Mantel-Haenszel tests (no p-values)
get_mh <- function(arr, measure = "OR", conf = 0.95) {
switch(measure,
OR = mh_or(arr, conf),
RR = mh_rr(arr, conf),
IRR = mh_irr(arr, conf)
)
}
# These functions are adapted from epiR::epi.2by2 lines 1185--1204
#
# Many of the changes involve abstracting repetative routines into functions,
# splitting nested calculations into separate variables, and modifying
# variable names.
mh_rr <- function(arr, conf = 0.95) {
d <- data_frame_from_2x2(arr)[-1, ] # remove crude estimates
z <- get_z(conf)
## Summary incidence risk ratio (Rothman 2002 p 148 and 152, equation 8-2):
A <- d$A_exp_cases
C <- d$C_unexp_cases
N <- d$total
A_TU <- A * d$total_unexposed
C_TE <- C * d$total_exposed
MH_risk_ratio <- sum(A_TU / N) / sum(C_TE / N)
total_prod <- d$total_cases * d$total_exposed * d$total_unexposed
var_numerator <- sum((total_prod / N^2) - ((A * C) / N))
var_denominator <- sum(A_TU / N) * sum(C_TE / N)
MH_risk_ratio_var <- var_numerator / var_denominator
se_limits <- get_ci_from_var(log(MH_risk_ratio), MH_risk_ratio_var, z)
data.frame(ratio = MH_risk_ratio, lower = se_limits[["ll"]], upper = se_limits[["ul"]])
}
mh_irr <- function(arr, conf = 0.95) {
d <- data_frame_from_2x2(arr)[-1, ]
## Summary incidence rate ratio (Rothman 2002 p 153, equation 8-5):
A <- d$A_exp_cases
B <- d$B_exp_controls
C <- d$C_unexp_cases
D <- d$D_unexp_controls
cases <- d$total_cases # M1
person_time <- d$total_controls # M0
numerator <- sum((A * D) / person_time)
denominator <- sum((C * B) / person_time)
MH_IRR <- numerator / denominator
MH_IRR_var <- sum((cases * B * D) / (person_time^2)) / (numerator * denominator)
se_limits <- get_ci_from_var(log(MH_IRR), MH_IRR_var, get_z(conf))
data.frame(ratio = MH_IRR, lower = se_limits[["ll"]], upper = se_limits[["ul"]])
}
# The M-H statistic for odds ratios already exist in R, so it's just a matter of
# pulling out the correct values
mh_or <- function(arr, conf = 0.95) {
MH <- stats::mantelhaen.test(arr, conf.level = conf)
data.frame(ratio = MH$estimate, lower = MH$conf.int[1], upper = MH$conf.int[2])
}
get_woolf_pval <- function(x, measure = "OR") {
d <- data_frame_from_2x2(x)[-1, ]
nstrata <- dim(x)[[3]]
A <- d$A_exp_cases
B <- d$B_exp_controls
C <- d$C_unexp_cases
D <- d$D_unexp_controls
exposed <- d$total_exposed
unexposed <- d$total_unexposed
# This is taken from lines 1320--1340 of epiR::epi.2by2
#
# Many of the changes involve abstracting repetative routines into functions,
# splitting nested calculations into separate variables, and modifying
# variable names.
if (measure == "RR") {
## Test of homogeneity of risk ratios (Jewell 2004, page 154). First work
## out the Woolf estimate of the adjusted risk ratio (labelled adj_log_est
## here) based on Jewell (2004, page 134):
log_est <- log((A / exposed) / (C / unexposed))
log_est_var <- (B / (A * exposed)) + (D / (C * unexposed))
} else {
## Test of homogeneity of odds ratios (Jewell 2004, page 154). First work
## out the Woolf estimate of the adjusted odds ratio (labelled adj_log_est
## here) based on Jewell (2004, page 129):
log_est <- log(((A + 0.5) * (D + 0.5)) / ((B + 0.5) * (C + 0.5)))
log_est_var <- (1 / (A + 0.5)) + (1 / (B + 0.5)) + (1 / (C + 0.5)) + (1 / (D + 0.5))
}
inv_log_est_var <- 1 / log_est_var
adj_log_est <- sum(inv_log_est_var * log_est) / sum(inv_log_est_var)
## Equation 10.3 from Jewell (2004):
res <- sum(inv_log_est_var * (log_est - adj_log_est)^2)
p_value <- 1 - stats::pchisq(res, df = nstrata - 1)
data.frame(statistic = res, df = nstrata - 1, p.value = p_value)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.