#' @title RR exact CI, TOSST method.
#' @description Estimates confidence interval for the risk ratio or prevented
#' fraction; exact method based on the score statistic (inverts two one-sided
#' tests).
#' @details Estimates confidence intervals based on the score statistic that are
#' 'exact' in the sense of accounting for discreteness. Inverts two one-sided
#' score tests. The score statistic is used to select tail area tables, and
#' the binomial probability is estimated over the tail area by taking the
#' maximum over the nuisance parameter. Algorithm is a simple step search.
#'
#' The data may also be a matrix. In that case `Y` would be entered as
#'
#' `matrix(c(y1, n1-y1, y2, n2-y2), 2, 2, byrow = TRUE)`.
#' @param y Data vector c(y1, n1, y2, n2) where y are the positives, n are the
#' total, and group 1 is compared to group 2 (control or reference group).
#' @param formula Formula of the form `cbind(y, n) ~ x`, where y is the number
#' positive, n is the group size, x is a factor with two levels of treatment.
#' @param data data.frame containing variables of formula.
#' @param compare Text vector stating the factor levels: `compare[1]` is the
#' vaccinate group to which `compare[2]` (control or reference) is compared.
#' @param alpha Complement of the confidence level.
#' @param pf Estimate *RR* or its complement *PF*?
#' @param trace.it Verbose tracking of the iterations?
#' @param iter.max Maximum number of iterations
#' @param converge Convergence criterion
#' @param rnd Number of digits for rounding. Affects display only, not
#' estimates.
#' @param stepstart starting interval for step search
#' @param nuisance.points number of points over which to evaluate nuisance
#' parameter
#' @param gamma parameter for Berger-Boos correction (restricts range of
#' nuisance parameter evaluation)
#' @returns A [rr1] object with the following fields.
#' * `estimate`: vector with point and interval estimate
#' * `estimator`: either `"PF"` or `"RR"`
#' * `y`: data.frame with "y1", "n1", "y2", "n2" values.
#' * `rnd`: how many digits to round the display
#' * `alpha`: complement of confidence level
#' @export
#' @references Koopman PAR, 1984. Confidence intervals for the ratio of two
#' binomial proportions. *Biometrics* 40:513-517.
#'
#' Agresti A, Min Y, 2001. On small-sample confidence intervals for
#' parameters in discrete distribution. *Biometrics* 57: 963-971.
#'
#' Berger RL, Boos DD, 1994. P values maximized over a confidence set for the
#' nuisance parameter. *Journal of the American Statistical Association*
#' 89:214-220.
#' @author [PF-package]
#' @seealso [RRotsst], [rr1]
#'
#' @examples
#' # Both examples represent the same observation, with data entry by vector
#' # and matrix notation.
#'
#' y_vector <- c(4, 24, 12, 28)
#' RRtosst(y_vector)
#'
#' # PF
#' # 95% interval estimates
#'
#' # PF LL UL
#' # 0.611 0.012 0.902
#'
#' y_matrix <- matrix(c(4, 20, 12, 16), 2, 2, byrow = TRUE)
#' # [, 1] [, 2]
#' # [1, ] 4 20
#' # [2, ] 12 16
#'
#' RRtosst(y_matrix)
#'
#' # PF
#' # 95% interval estimates
#'
#' # PF LL UL
#' # 0.611 0.012 0.902
#'
#' require(dplyr)
#' data1 <- data.frame(group = rep(c("treated", "control"), each = 2),
#' y = c(1, 3, 7, 5),
#' n = c(12, 12, 14, 14),
#' cage = rep(paste("cage", 1:2), 2))
#' data2 <- data1 |>
#' group_by(group) |>
#' summarize(sum_y = sum(y),
#' sum_n = sum(n))
#' RRtosst(data = data2, formula = cbind(sum_y, sum_n) ~ group,
#' compare = c("treated", "control"))
#'
#' # PF
#' # 95% interval estimates
#'
#' # PF LL UL
#' # 0.611 0.012 0.902
#' @importFrom stats qbeta
RRtosst <- function(y = NULL,
formula = NULL,
data = NULL,
compare = c("vac", "con"),
alpha = 0.05,
pf = TRUE,
stepstart = .1,
iter.max = 36,
converge = 1e-6,
rnd = 3,
trace.it = FALSE,
nuisance.points = 120,
gamma = 1e-6) {
###########################################
## Error handling for input options
## - y can be matrix or vector (expects formula and data to be NULL)
## - if formula is specified, data is required (expects y is null)
###########################################
.check_3input_cases_freq(data = data, formula = formula, y = y)
# Estimates exact confidence interval by the TOSST method Score statistic
# used to select tail area tables Binomial probability estimated over the
# tail area by taking the maximum over the nuisance parameter
# Written 9/17/07 by Siev
# Functions called by rrcix():
#
# .rr.score.asymp - gets asymptotic interval for starting value of upper
# bound found in this file below
#
# (if want to eliminate calling this function would have to search
# down from r.max)
#
# binci - gets Clopper-Pearson intervals for Berger-Boos method
# included here now, but may be moved to another package
binci <- function(y,
n,
alpha = .05,
show.warnings = FALSE) {
w <- 1 * show.warnings - 1
options(warn = w)
p <- y / n
cpl <- ifelse(y > 0, qbeta(alpha / 2, y, n - y + 1), 0)
cpu <- ifelse(y < n, qbeta(1 - alpha / 2, y + 1, n - y), 1)
out <- cbind(y, n, p, cpl, cpu)
dimnames(out) <-
list(names(y), c("y", "n", "p.hat", "cp low", "cp high"))
options(warn = 0)
return(out)
}
###########################################
## Data reshaping
## - y can be matrix or vector (expects formula and data to be NULL)
## - if formula is specified, data is required (expects y is null)
###########################################
if (is.null(y)) {
# extract from data+formula to vector c(y1, n1, y2, n2)
y <- .extract_freqvec(formula, data, compare)
} else if (is.matrix(y)) {
# Data entry y = c(x2, n2, x1, n1) Vaccinates First (order same but
# subscripts reversed) data vector
y <- c(t(cbind(y[, 1], apply(y, 1, sum))))
# NOTE: the subscripts are reversed compared to the other functions
}
x2 <- y[1] ## vacc
n2 <- y[2] ## vacc
x1 <- y[3] ## con
n1 <- y[4] ## con
p1 <- x1 / n1
p2 <- x2 / n2
rho.mle <- p2 / p1
# itemize all possible tables in omega (17.26)
Y <- data.frame(y1 = rep(0:n1, (n2 + 1)),
y2 = rep(0:n2, rep(n1 + 1, n2 + 1)))
observed <- (seq_len(nrow(Y)))[Y[, 1] == x1 & Y[, 2] == x2]
Y$C <- choose(n1, Y$y1) * choose(n2, Y$y2)
# score statistic - with pi.tilde by quadratic formula
scst <- function(rho, y1, n1, y2, n2) {
pih1 <- y1 / n1 # unrestricted MLE of current data
pih2 <- y2 / n2
if (y1 == 0 && y2 == 0) {
sc <- 0
} else if (y2 == n2) {
sc <- 0
} else {
A <- rho * (n1 + n2)
B <- -(rho * (y1 + n2) + y2 + n1)
C <- y1 + y2
pit1 <- (-B - sqrt(B^2 - 4 * A * C)) / (2 * A)
pit2 <- rho * pit1
sc <- (pih2 - rho * pih1) /
sqrt(rho^2 * pit1 * (1 - pit1) / n1 + pit2 * (1 - pit2) / n2)
}
return(sc)
}
# get Clopper-Pearson intervals for Berger-Boos method
cp <- binci(c(x1, x2), c(n1, n2), alpha = gamma)[, c("cp low", "cp high")]
L1 <- cp[1, 1]
U1 <- cp[1, 2]
L2 <- cp[2, 1]
U2 <- cp[2, 2]
r.min <- L2 / U1
r.max <- U2 / L1
if (rho.mle == 0) {
low <- 0
} else {
# search for lower endpoint
iter <- 0
step <- stepstart
# start above 0 (for quadratic formula)
low <- max(0.0001, r.min)
repeat {
iter <- iter + 1
if (iter > iter.max)
break
if (iter > 1) {
low <- low + step
}
scst_y <- rep(NA, nrow(Y))
for (i in seq_along(scst_y))
scst_y[i] <- scst(low, Y$y1[i], n1, Y$y2[i], n2)
q_set <- Y[scst_y >= scst_y[observed], ]
q_set$n1y1 <- n1 - q_set$y1
q_set$n2y2 <- n2 - q_set$y2
if (gamma > 0)
# Berger-Boos method 17.164
pn <- seq(max(L1, L2 / low), min(U1, U2 / low),
length = nuisance.points)
else
# simple method 17.138
pn <- seq(0, min(1 / low, 1), length = nuisance.points)
if (sum(pn > 1) > 0) {
cat("\nIteration",
iter,
"nuisance parameter outside parameter space\n")
next
}
fy <- rep(NA, nuisance.points)
for (i in 1:nuisance.points) {
pni <- pn[i]
fy[i] <-
sum(
q_set$C * pni^q_set$y1 *
(1 - pni)^q_set$n1y1 *
(low * pni)^q_set$y2 *
(1 - low * pni)^q_set$n2y2
)
}
max.fy <- max(fy)
if (trace.it)
cat("\nIteration", iter, "rho.low", low, "tail", max.fy, "\n")
if (abs(max.fy - (alpha / 2 - gamma / 2)) < converge)
break
if (max.fy > (alpha / 2 - gamma / 2)) {
step <- step / 2
low <- low - step * 2
}
} # end repeat
} # end else
# search for upper endpoint upward from just below asymptotic
# rather than downward from r.max
# get asymptotic interval for starting
# koopman version (slightly narrower interval than mn)
ci.asymp <- .rr.score.asymp(c(x2, n2, x1, n1))
high <- ci.asymp[3] * .9
iter <- 0
step <- stepstart
repeat {
iter <- iter + 1
if (iter > iter.max)
break
if (iter > 1) {
high <- high + step
}
scst_y <- rep(NA, nrow(Y))
for (i in seq_along(scst_y))
scst_y[i] <- scst(high, Y$y1[i], n1, Y$y2[i], n2)
p_set <- Y[scst_y <= scst_y[observed], ]
p_set$n1y1 <- n1 - p_set$y1
p_set$n2y2 <- n2 - p_set$y2
if (gamma > 0)
# Berger-Boos method 17.164
pn <- seq(max(L1, L2 / high),
min(U1, U2 / high),
length = nuisance.points)
else
# simple method 17.138
pn <- seq(0, min(1 / high, 1), length = nuisance.points)
if (sum(pn > 1) > 0) {
cat("\nIteration",
iter,
"nuisance parameter outside parameter space\n")
next
}
fy <- rep(NA, nuisance.points)
for (i in 1:nuisance.points) {
pni <- pn[i]
fy[i] <-
sum(
p_set$C * pni^p_set$y1 *
(1 - pni)^p_set$n1y1 *
(high * pni)^p_set$y2 *
(1 - high * pni)^p_set$n2y2
)
}
max.fy <- max(fy)
if (trace.it)
cat("\nIteration", iter, "rho.high", high, "tail", max.fy, "\n")
if (abs(max.fy - (alpha / 2 - gamma / 2)) < converge)
break
if (max.fy < (alpha / 2 - gamma / 2)) {
step <- step / 2
high <- high - step * 2
}
} # end repeat
int <- c(rho.hat = rho.mle,
low = low,
high = high)
if (!pf) {
names(int) <- c("RR", "LL", "UL")
} else {
int <- 1 - int[c(1, 3, 2)]
names(int) <- c("PF", "LL", "UL")
}
y <- as.data.frame(t(y))
names(y) <- c("y1", "n1", "y2", "n2")
return(rr1$new(
estimate = int,
estimator = ifelse(pf, "PF", "RR"),
y = y,
rnd = rnd,
alpha = alpha
))
}
##
#' Internal function.
#'
#' @param y data
#' @param alpha alpha
#' @param iter.max maximum number of iterations
#' @param converge convergence criterion
#' @param mn boolean whether to calculate MN or use default value of 1.0
#' @export
#' @examples
#' .rr.score.asymp(c(0, 18, 16, 19), mn = FALSE)
#' .rr.score.asymp(c(0, 18, 16, 19), mn = TRUE)
#' @importFrom stats qnorm
.rr.score.asymp <- function(y,
alpha = 0.05,
iter.max = 18.,
converge = 0.0001,
mn = FALSE) {
# asymptotic score interval
# code taken from RRsc()
# choice of either Koopman (mn = FALSE)
# or Miettinenen-Nurminen (mn = TRUE)
# Data entry y = c(x2, n2, x1, n1) Vaccinates First
u_p <- function(p1, p2, n1, n2) {
(1. - p1) / (n1 * p1) + (1. - p2) / (n2 * p2)
}
z_phi <- function(phi, x1, x2, n1, n2, u_p, root, za, MN = FALSE) {
if (MN)
mn <- sqrt((n1 + n2 - 1.) / (n1 + n2))
else
mn <- 1.
p2 <- root(x1, x2, n1, n2, phi)
p1 <- p2 * phi
u <- u_p(p1, p2, n1, n2)
z <- ((x1 - n1 * p1) / (1. - p1)) * sqrt(u) * mn
return(z)
}
root <- function(x1, x2, n1, n2, phi) {
a <- phi * (n1 + n2)
b <- -(phi * (x2 + n1) + x1 + n2)
cc <- x1 + x2
det <- sqrt(b^2. - 4. * a * cc)
rt <- (-b - det) / (2. * a)
return(rt)
}
al2 <- alpha / 2.
z_al2 <- qnorm(al2)
z_ah2 <- qnorm(1. - al2)
zv <- c(z_al2, z_ah2)
x1 <- y[1.]
n1 <- y[2.]
x2 <- y[3.]
n2 <- y[4.]
p1 <- x1 / n1
p2 <- x2 / n2
p1 <- x1 / n1
phi.mle <- p1 / p2
# 0.5 log method
p1 <- (x1 + 0.5) / (n1 + 0.5)
p2 <- (x2 + 0.5) / (n2 + 0.5)
phi <- p1 / p2
v <- sqrt(u_p(p1, p2, (n1 + 0.5), (n2 + 0.5)))
starting <- exp(v * zv + logb(phi))
# Score method
score <- rep(0., length(zv))
for (k in 1.:length(zv)) {
if (k == 1. && x1 == 0.)
score[k] <- 0.
else
if (k == 2. && x2 == 0.) {
score[k] <- Inf
} else {
phi <- c(starting[k], 0.9 * starting[k])
za <- -zv[k]
zz <-
c(
z_phi(phi[1.], x1, x2, n1, n2, u_p, root, za, mn),
z_phi(phi[2.], x1, x2, n1, n2, u_p, root, za, mn)
)
if (abs(za - zz[1.]) > abs(za - zz[2.]))
phi <- rev(phi)
phi_new <- phi[1.]
phi_old <- phi[2.]
iter <- 0.
repeat {
iter <- iter + 1.
if (iter > iter.max)
break
z_new <- z_phi(phi_new, x1, x2, n1, n2, u_p, root, za, mn)
if (abs(za - z_new) < converge)
break
z_old <- z_phi(phi_old, x1, x2, n1, n2, u_p, root, za, mn)
phi <-
exp(logb(phi_old) + logb(phi_new / phi_old) *
((za - z_old) / (z_new - z_old)))
phi_old <- phi_new
phi_new <- phi
}
score[k] <- phi_new
}
}
int <- c(phi.mle, score)
names(int) <- c("point", "LL", "UL")
return(int)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.