#' @title Mantel-Haenszel method, CI for common RR over strata or clusters with
#' sparse data.
#' @description Estimates confidence intervals for the risk ratio or prevented
#' fraction from clustered or stratified data, using a Mantel-Haenszel
#' estimator for sparse data.
#' @details Based on the Mantel-Haenszel (1959) procedure for sparse data
#' developed by Greenland and Robins (1985). The confidence limits are based
#' on asymptotic normality of the log(risk ratio). Agresti and Hartzel (2000)
#' favor this procedure for small, sparse data sets, but they warn that it is
#' less efficient than maximum likelihood for large data sets.
#' @param formula Formula of the form \code{cbind(y, n) ~ x + cluster(w)}, where
#' \code{y} is the number positive, \code{n} is the group size, \code{x} is a
#' factor with two levels of treatment, and \code{w} is a factor indicating
#' the clusters.
#' @param data \code{data.frame} containing variables for formula
#' @param compare Text vector stating the factor levels: `compare[1]` is the
#' vaccinate group to which `compare[2]` (control or reference) is compared.
#' @param Y Matrix of data, \eqn{K \times 4}{K x 4}. Each row is a stratum or
#' cluster. The columns are \eqn{y1, n1, y2, n2}, where the y's are the number
#' of positive in each group, and the n is the total in each group. Group 1
#' corresponds to vaccinates and group 2 are controls or reference. If data
#' entered by formula and dataframe, \code{Y} is generated automatically.
#' @param pf Estimate \emph{RR} or its complement \emph{PF}?
#' @param alpha Complement of the confidence level.
#' @param rnd Number of digits for rounding. Affects display only, not
#' estimates.
#' @return An object of class \code{\link{rr1}} with the following fields.
#'
#' \item{estimate}{vector of point and interval estimates: point estimate,
#' lower confidence limit, upper confidence limit}
#'
#' \item{estimator}{either \code{"PF"} or \code{"RR"}}
#'
#' \item{y}{data.frame of restructured input}
#'
#' \item{rnd}{how many digits to round the display}
#'
#' \item{alpha}{complement of confidence level}
#'
#' @export
#' @note If either all y1's or all y2's are zero, a division by zero may occur,
#' and a NaN returned for some values. \cr \cr Vignette \emph{Examples for
#' Stratified Designs} forthcoming with more examples. \cr Call to this
#' function may be one of two formats: (1) specify \code{data} and
#' \code{formula} or (2) as a matrix \code{Y} \cr \cr \code{RRmh(formula,
#' data, compare = c('b','a'), pf = TRUE, alpha = 0.05, rnd = 3)} \cr \cr
#' \code{RRmh(Y, pf = TRUE, alpha = 0.05, rnd = 3)}
#' @references Mantel N, Haenszel W, 1959. Statistical aspects of the analysis
#' of data from retrospective studies of disease. \emph{Journal of the
#' National Cancer Institute} 22: 719-748. \cr \cr Greenland S, Robins JM,
#' 1985. Estimation of a common effect parameter from sparse follow-up data.
#' \emph{Biometrics} 41: 55-68. Errata, 45: 1323-1324. \cr \cr Agresti A,
#' Hartzel J, 2000. Strategies for comparing treatments on a binary response
#' with multi-centre data. \emph{Statistics in Medicine} 19: 1115-1139. \cr
#' Lachin JM, 2000. \emph{Biostatistical Methods: The Assessment of Relative
#' Risks} (Wiley, New York), Sec. 4.3.1.
#' @author \link{PF-package}
#' @seealso \code{\link{rr1}}
#' @examples
#'
#' ## Table 1 from Gart (1985)
#' ## as data frame
#'
#' # tx group "b" is control
#' RRmh(cbind(y, n) ~ tx + cluster(clus),
#' Table6,
#' compare = c('a', 'b'), pf = FALSE)
#'
#' # RR
#' # 95% interval estimates
#' #
#' # RR LL UL
#' # 2.67 1.37 5.23
#' #
#'
#' ## or as matrix
#' RRmh(Y = table6, pf = FALSE)
#'
#' # RR
#' # 95% interval estimates
#' #
#' # RR LL UL
#' # 2.67 1.37 5.23
################################################################################
#
# Mantel-Haenszel estimate of common risk ratio for K 2x2 tables.
# First draft 5 March 2010. Updated 11 March 2010, 5 Nov 2010.
# Revised 28 Dec 2010 to bring input/output format consistent with David Siev's
# RRstr(). Note that unlike earlier versions, this version does not check the
# data types and dimensions of the inputs.
#
################################################################################
#' @importFrom stats qnorm
RRmh <- function(formula = NULL,
data = NULL,
compare = c("vac", "con"),
Y,
alpha = 0.05,
pf = TRUE,
rnd = 3) {
# convert data to matrix
if (!is.null(formula) && !is.null(data)) {
Y <- .matricize(formula = formula, data = data, compare = compare)$Y
}
colnames(Y) <- c("y1", "n1", "y2", "n2")
rownames(Y) <- paste("Row", seq_len(nrow(Y)), sep = "")
# save data and empirical Rs
Y <- cbind(Y, R.obs = (Y[, 1] / Y[, 2]) / (Y[, 3] / Y[, 4]))
### Innards of the algorithm
zcrit <- qnorm(1 - alpha / 2)
x1vec <- Y[, 1] # vac
n1vec <- Y[, 2] # vac
x2vec <- Y[, 3] # con or ref
n2vec <- Y[, 4] # con or ref
nk <- n1vec + n2vec
# Point estimate (Greenland & Robins, eq. 4. Lachin, eq. 4.17)
numer <- sum(x1vec * n2vec / (nk))
denom <- sum(x2vec * n1vec / (nk))
rr.est <- numer / denom
# variance of log RR (Greenland & Robins, eq. 13)
numer <- ((n1vec * n2vec) * (x1vec + x2vec) - (x1vec * x2vec) * nk) / nk^2
rk <- x1vec * n2vec / nk
sk <- x2vec * n1vec / nk
denom <- sum(rk) * sum(sk)
var.log.rr <- sum(numer) / denom
# Confidence limits
rr.ci.lo <- exp(log(rr.est) - zcrit * sqrt(var.log.rr))
rr.ci.hi <- exp(log(rr.est) + zcrit * sqrt(var.log.rr))
int <- c(rr.est, rr.ci.lo, rr.ci.hi)
if (!pf) {
names(int) <- c("RR", "LL", "UL")
} else {
int <- 1 - int[c(1, 3, 2)]
names(int) <- c("PF", "LL", "UL")
}
return(rr1$new(estimate = int, estimator = ifelse(pf, "PF", "RR"),
y = as.data.frame(Y), rnd = rnd, alpha = alpha))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.