Nothing
##################################################################
# gtreg.sp() function #
##################################################################
# This is the gtreg() function written by Boan Zhang
gtreg.sp <- function(formula, data, groupn, retest = NULL, sens = 1,
spec = 1, linkf = c("logit", "probit", "cloglog"),
method = c("Vansteelandt", "Xie"), sens.ind = NULL,
spec.ind = NULL, start = NULL,
control = gtRegControl(...), ...) {
call <- match.call()
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "groupn"), names(mf), 0)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
gr <- model.extract(mf, "groupn")
if (!is.na(pos <- match(deparse(substitute(retest)), names(data))))
retest <- data[, pos]
Y <- model.response(mf, "any")
if (length(dim(Y)) == 1) {
nm <- rownames(Y)
dim(Y) <- NULL
if (!is.null(nm))
names(Y) <- nm
}
X <- if (!is.empty.model(mt))
model.matrix(mt, mf)
else matrix(, NROW(Y), 0)
linkf <- match.arg(linkf)
if ((method <- match.arg(method)) == "Vansteelandt") {
if (!is.null(retest))
warning("Retests cannot be used with Vansteelandt's method.")
fit <- gtreg.fit(Y, X, gr, sens, spec, linkf, start)
}
else {
if (is.null(retest))
fit <- EM(Y, X, gr, sens, spec, linkf, start, control)
else fit <- EM.ret(Y, X, gr, retest, sens, spec, linkf,
sens.ind, spec.ind, start, control)
}
fit <- c(fit, list(call = call, formula = formula, method = method,
link = linkf, terms = mt))
class(fit) <- "gt"
fit
}
##################################################################
# gtreg.halving() function #
##################################################################
gtreg.halving <- function(formula, data, groupn, subg, retest, sens = 1,
spec = 1, linkf = c("logit", "probit", "cloglog"),
sens.ind = NULL, spec.ind = NULL, start = NULL,
control = gtRegControl(...), ...) {
call <- match.call()
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "groupn"), names(mf), 0)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
gr <- model.extract(mf, "groupn")
if (!is.na(pos <- match(deparse(substitute(retest)), names(data))))
retest <- data[, pos]
if (!is.na(pos <- match(deparse(substitute(subg)), names(data))))
subg <- data[, pos]
Y <- model.response(mf, "any")
if (length(dim(Y)) == 1) {
nm <- rownames(Y)
dim(Y) <- NULL
if (!is.null(nm))
names(Y) <- nm
}
X <- if (!is.empty.model(mt))
model.matrix(mt, mf)
else matrix(, NROW(Y), 0)
linkf <- match.arg(linkf)
fit <- EM.halving(Y, X, gr, subg, retest, sens, spec, linkf,
sens.ind, spec.ind, start, control)
fit <- c(fit, list(call = call, formula = formula, method = "Xie",
link = linkf, terms = mt))
class(fit) <- "gt"
fit
}
##################################################################
# gtreg.mp() function #
##################################################################
# "mp" refers to matrix pooling, another name for array testing
gtreg.mp <- function(formula, data, coln, rown, arrayn, retest = NULL,
sens = 1, spec = 1,
linkf = c("logit", "probit", "cloglog"),
sens.ind = NULL, spec.ind = NULL, start = NULL,
control = gtRegControl(...), ...) {
call <- match.call()
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "coln", "rown",
"arrayn"), names(mf), 0)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
arrayn <- model.extract(mf, "arrayn")
rown <- model.extract(mf, "rown")
coln <- model.extract(mf, "coln")
if (!is.na(pos <- match(deparse(substitute(retest)), names(data))))
retest <- data[, pos]
Y <- model.response(mf, "any")
if (length(dim(Y)) == 1) {
nm <- rownames(Y)
dim(Y) <- NULL
if (!is.null(nm))
names(Y) <- nm
}
X <- if (!is.empty.model(mt))
model.matrix(mt, mf)
else matrix(, NROW(Y), 0)
linkf <- match.arg(linkf)
fit <- EM.mp(Y[, 1], Y[, 2], X, coln, rown, arrayn, retest,
sens, spec, linkf, sens.ind, spec.ind, start, control)
fit <- c(fit, list(call = call, formula = formula, link = linkf,
terms = mt))
class(fit) <- c("gt.mp", "gt")
fit
}
# Supporting functions
##################################################################
# gtreg.fit() function #
##################################################################
gtreg.fit <- function(Y, X, groupn, sens, spec, linkf, start = NULL){
z <- tapply(Y, groupn, tail, n = 1)
num.g <- max(groupn)
K <- ncol(X)
sam <- length(Y)
if (is.null(start)) {
if (K == 1) {
cova.mean <- as.matrix(tapply(X, groupn, mean))
optim.meth <- "BFGS"
}
else {
temp <- by(X, groupn, colMeans)
cova.mean <- do.call(rbind, temp)
optim.meth <- "Nelder-Mead"
}
beta.group <- glm.fit(cova.mean, as.vector(z),
family = binomial(link = linkf))$coefficients
}
else {
beta.group <- start
names(beta.group) <- dimnames(X)[[2]]
optim.meth <- ifelse(K == 1, "BFGS", "Nelder-Mead")
}
logL <- function(beta) {
pijk <- switch(linkf, logit = plogis(X %*% beta),
probit = pnorm(X %*% beta),
cloglog = 1 - exp(-exp(X %*% beta)))
prodp <- tapply(1 - pijk, groupn, prod)
(-1)*sum(z * log(sens + (1 - sens - spec) * prodp) +
(1 - z) * log(1 - sens - (1 - sens - spec) * prodp))
}
mod.fit <- optim(par = beta.group, fn = logL, method = optim.meth,
control = list(trace = 0, maxit = 1000), hessian = TRUE)
if (det(mod.fit$hessian) == 0)
mod.fit <- optim(par = beta.group, fn = logL, method = "SANN",
hessian = TRUE)
logL0 <- function(beta) {
inter <- rep(beta, sam)
pijk <- switch(linkf, logit = plogis(inter), probit = pnorm(inter),
cloglog = 1 - exp(-exp(inter)))
prodp <- tapply(1 - pijk, groupn, prod)
(-1)*sum(z * log(sens + (1 - sens - spec) * prodp) +
(1 - z) * log(1 - sens - (1 - sens - spec) * prodp))
}
mod.fit0 <- optim(par = binomial()$linkfun(mean(z)), fn = logL0,
method = "BFGS", control = list(trace = 0, maxit = 1000))
nulld <- 2 * mod.fit0$value
residd <- 2 * mod.fit$value
xib <- X %*% mod.fit$par
pijk <- switch(linkf, logit = plogis(xib), probit = pnorm(xib),
cloglog = 1 - exp(-exp(xib)))
prodp <- tapply(1 - pijk, groupn, prod)
zhat <- sens + (1 - sens - spec) * prodp
residual <- z - zhat
aic <- residd + 2 * K
if (mod.fit$convergence == 0)
counts <- mod.fit$counts[[1]]
else warning("Maximum number of iterations exceeded.")
list(coefficients = mod.fit$par, hessian = mod.fit$hessian,
fitted.values = zhat, deviance = residd, df.residual = num.g - K,
null.deviance = nulld, df.null = num.g - 1, aic = aic,
counts = counts, residuals = residual, z = z)
}
##################################################################
# gtRegControl() function #
##################################################################
#' @title Auxiliary for controlling group testing regression
#'
#' @description Auxiliary function to control fitting parameters
#' of the EM algorithm used internally in \code{\link{gtReg}}
#' for simple pooling (\kbd{type = "sp"}) with \kbd{method = "Xie"}
#' or for array testing (\kbd{type = "array"}).
#'
#' @param tol convergence criterion.
#' @param n.gibbs the Gibbs sample size to be used in each E step
#' of the EM algorithm, for array testing. The default is 1000.
#' @param n.burnin the number of samples in the burn-in period,
#' for array testing. The default is 20.
#' @param maxit maximum number of iterations in the EM algorithm.
#' @param trace a logical value indicating whether the output should
#' be printed for each iteration. The default is \kbd{FALSE}.
#' @param time a logical value indicating whether the length of time
#' for the model fitting should be printed. The default is \kbd{TRUE}.
#'
#' @return A list with components named as the input arguments.
#'
#' @author This function was originally written as the \code{gt.control}
#' function for the binGroup package. Minor modifications have been
#' made for inclusion in the binGroup2 package.
#'
#' @examples
#' # The default settings:
#' gtRegControl()
gtRegControl <- function(tol = 0.0001, n.gibbs = 1000, n.burnin = 20,
maxit = 500, trace = FALSE, time = TRUE) {
if (!is.numeric(tol) || tol <= 0)
stop("value of 'tol' must be > 0")
if (round(n.gibbs) != n.gibbs || n.gibbs <= 0)
stop("value of 'n.gibbs' must be a positive integer")
if (round(n.burnin) != n.burnin || n.burnin <= 0)
stop("value of 'n.burnin' must be a positive integer")
if (!is.numeric(maxit) || maxit <= 0)
stop("maximum number of iterations must be > 0")
list(tol = tol, n.gibbs = n.gibbs, n.burnin = n.burnin, maxit = maxit,
trace = trace, time = time)
}
##################################################################
# EM() function #
##################################################################
EM <- function(Y, X, groupn, sens, spec, linkf, start = NULL,
control = gtRegControl()) {
if (control$time)
start.time <- proc.time()
z <- tapply(Y, groupn, tail, n = 1)
num.g <- max(groupn)
K <- ncol(X)
if (is.null(start)) {
if (K == 1)
cova.mean <- as.matrix(tapply(X, groupn, mean))
else {
temp <- by(X, groupn, colMeans)
cova.mean <- do.call(rbind, temp)
}
beta.old <- lm.fit(cova.mean, z)$coefficients
}
else beta.old <- start
sam <- length(Y)
vec <- 1:sam
group.sizes <- tapply(Y, groupn, length)
diff <- 1
counts <- 1
extra.loop <- FALSE
next.loop <- TRUE
while (next.loop) {
xib <- X %*% beta.old
pijk <- switch(linkf, logit = plogis(xib),
probit = pnorm(xib), cloglog = 1 - exp(-exp(xib)))
prodp <- tapply(1 - pijk, groupn, prod)
den <- rep((1 - spec) * prodp + sens * (1 - prodp), group.sizes)
den2 <- rep(spec * prodp + (1 - sens) * (1 - prodp), group.sizes)
expect <- rep(NA, times = sam)
for (i in vec) {
if (Y[i] == 0)
expect[i] <- (1 - sens) * pijk[i] / den2[i]
else expect[i] <- sens * pijk[i] / den[i]
}
if (!extra.loop) {
suppress <- function(w)
if (any(grepl("non-integer #successes in a binomial glm", w)))
invokeRestart("muffleWarning")
mod.fit <- withCallingHandlers(glm.fit(X, expect,
family = binomial(link = linkf)),
warning = suppress)
diff <- max(abs((beta.old - mod.fit$coefficients) / beta.old))
beta.old <- mod.fit$coefficients
if (control$trace)
cat("beta is", beta.old, "\tdiff is", diff, "\n")
counts <- counts + 1
if (diff <= control$tol || counts > control$maxit)
extra.loop <- TRUE
}
else next.loop <- FALSE
}
erf <- 2 * pijk - 1
pt1 <- switch(linkf, logit = -exp(xib) / (1 + exp(xib))^2,
probit = sqrt(2) * xib * exp(-xib^2 / 2) /
(sqrt(pi) * (1 - erf)) - 2 * exp(-xib^2) /
(pi * (1 - erf)^2), cloglog = -exp(xib))
pt2 <- switch(linkf, logit = 0,
probit = (8 * exp(-xib^2 / 2) * erf +
2 * xib * sqrt(2 * pi) * erf^2 -
2 * xib * sqrt(2 * pi)) * exp(-xib^2 / 2) /
((1 + erf)^2 * pi * (1 - erf)^2),
cloglog = -(exp(xib - exp(xib)) + exp(2 * xib - exp(xib)) -
exp(xib)) / (exp(-exp(xib)) - 1)^2)
nm <- pt1 + expect * pt2
sign1 <- as.vector(sign(nm))
nn <- as.vector(sqrt(abs(nm)))
x2 <- X * nn
m <- (t(x2) %*% (sign1 * x2))
b <- array(NA, c(K, K, sum(group.sizes^2)))
p <- 1
for (i in vec) for (j in vec[groupn == groupn[i]]) {
wii <- ifelse(i == j, expect[i] - expect[i]^2,
expect[i] * (pijk[j] - expect[j]))
coe <- switch(linkf, logit = 1,
probit = 8 * exp(-(xib[i]^2 + xib[j]^2) / 2) /
((1 - erf[i]^2) * (1 - erf[j]^2) * pi),
cloglog = exp(xib[i] + xib[j]) /
((exp(-exp(xib[i])) - 1) * (exp(-exp(xib[j])) - 1)))
b[, , p] <- wii * coe * X[i, ] %*% t(X[j, ])
p <- p + 1
}
m1 <- apply(b, c(1, 2), sum)
H <- -(m + m1)
zhat <- sens + (1 - sens - spec) * prodp
residual <- z - zhat
residd <- -2 * sum(z * log(zhat) + (1 - z) * log(1 - zhat))
logL0 <- function(beta) {
inter <- rep(beta, sam)
pijk <- switch(linkf, logit = plogis(inter), probit = pnorm(inter),
cloglog = 1 - exp(-exp(inter)))
prodp <- tapply(1 - pijk, groupn, prod)
(-1)*sum(z * log(sens + (1 - sens - spec) * prodp) +
(1 - z) * log(1 - sens - (1 - sens - spec) * prodp))
}
mod.fit0 <- optim(par = binomial()$linkfun(mean(z)), fn = logL0,
method = "BFGS", control = list(trace = 0, maxit = 1000))
nulld <- 2 * mod.fit0$value
aic <- residd + 2 * K
if (diff > control$tol && counts > control$maxit)
warning("EM algorithm did not converge.")
if (control$time) {
end.time <- proc.time()
save.time <- end.time - start.time
cat("\n Number of minutes running:", round(save.time[3] / 60, 2), "\n \n")
}
list(coefficients = beta.old, hessian = H, fitted.values = zhat,
deviance = residd, df.residual = num.g - K, null.deviance = nulld,
df.null = num.g - 1, aic = aic, counts = counts - 1,
residuals = residual, z = z)
}
##################################################################
# EM.ret() function #
##################################################################
EM.ret <- function(Y, X, groupn, ret, sens, spec, linkf,
sens.ind, spec.ind,
start = NULL, control = gtRegControl()) {
if (control$time)
start.time <- proc.time()
if (is.null(sens.ind))
sens.ind <- sens
if (is.null(spec.ind))
spec.ind <- spec
z <- tapply(Y, groupn, tail, n = 1)
num.g <- max(groupn)
K <- ncol(X)
if (is.null(start)) {
if (K == 1)
cova.mean <- as.matrix(tapply(X, groupn, mean))
else {
temp <- by(X, groupn, colMeans)
cova.mean <- do.call(rbind, temp)
}
beta.old <- lm.fit(cova.mean, z)$coefficients
}
else beta.old <- start
sam <- length(Y)
vec <- 1:sam
group.sizes <- tapply(Y, groupn, length)
diff <- 1
counts <- 1
extra.loop <- FALSE
next.loop <- TRUE
a0 <- ifelse(ret == 1, sens.ind, 1 - sens.ind)
a1 <- ifelse(ret == 0, spec.ind, 1 - spec.ind)
while (next.loop) {
xib <- X %*% beta.old
pijk <- switch(linkf, logit = plogis(xib),
probit = pnorm(xib), cloglog = 1 - exp(-exp(xib)))
erf <- 2 * pijk - 1
prodp <- tapply(1 - pijk, groupn, prod)
den2 <- rep(spec * prodp + (1 - sens) * (1 - prodp),
group.sizes)
expect <- rep(NA, times = sam)
i <- 1
while (i <= sam) {
if (Y[i] == 0)
expect[i] <- (1 - sens) * pijk[i] / den2[i]
else {
vec1 <- vec[groupn == groupn[i]]
mb2 <- 1
for (l in vec1) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2 <- mb2 * temp
}
null <- 1
for (l in vec1) {
temp <- a1[l] * (1 - pijk[l])
null <- null * temp
}
den <- mb2 * sens + null * (1 - sens - spec)
for (l1 in vec1) {
temp <- a0[l1] * pijk[l1] + a1[l1] * (1 - pijk[l1])
num <- mb2 / temp * a0[l1] * pijk[l1] * sens
expect[l1] <- num / den
}
i <- l1
}
i <- i + 1
}
expect[expect > 1] <- 1
expect[expect < 0] <- 0
if (!extra.loop) {
suppress <- function(w)
if (any(grepl("non-integer #successes in a binomial glm", w)))
invokeRestart("muffleWarning")
mod.fit <- withCallingHandlers(glm.fit(X, expect,
family = binomial(link = linkf)),
warning = suppress)
diff <- max(abs((beta.old - mod.fit$coefficients) / beta.old))
beta.old <- mod.fit$coefficients
if (control$trace)
cat("beta is", beta.old, "\tdiff is", diff, "\n")
counts <- counts + 1
if (diff <= control$tol || counts > control$maxit)
extra.loop <- TRUE
}
else next.loop <- FALSE
}
pt1 <- switch(linkf, logit = -exp(xib) / (1 + exp(xib))^2,
probit = sqrt(2) * xib * exp(-xib^2 / 2) /
(sqrt(pi) * (1 - erf)) - 2 * exp(-xib^2) /
(pi * (1 - erf)^2), cloglog = -exp(xib))
pt2 <- switch(linkf, logit = 0,
probit = (8 * exp(-xib^2 / 2) * erf +
2 * xib * sqrt(2 * pi) * erf^2 -
2 * xib * sqrt(2 * pi)) * exp(-xib^2 / 2) /
((1 + erf)^2 * pi * (1 - erf)^2),
cloglog = -(exp(xib - exp(xib)) + exp(2 * xib - exp(xib)) -
exp(xib)) / (exp(-exp(xib)) - 1)^2)
nm <- pt1 + expect * pt2
sign1 <- as.vector(sign(nm))
nn <- as.vector(sqrt(abs(nm)))
x2 <- X * nn
m <- (t(x2) %*% (sign1 * x2))
m1 <- 0
for (i in vec) {
vec1 <- vec[groupn == groupn[i]]
if (Y[i] == 0) {
for (j in vec1) {
coe <- switch(linkf, logit = 1,
probit = 8 * exp(-(xib[i]^2 + xib[j]^2) / 2) /
((1 - erf[i]^2) * (1 - erf[j]^2) * pi),
cloglog = exp(xib[i] + xib[j]) /
((exp(-exp(xib[i])) - 1) * (exp(-exp(xib[j])) - 1)))
wii <- ifelse(i == j, expect[i] - expect[i]^2,
expect[i] * (pijk[j] - expect[j]))
tim <- wii * coe * X[i, ] %*% t(X[j, ])
m1 <- m1 + tim
}
}
else {
for (j in vec1) {
temp <- a0[j] * pijk[j] + a1[j] * (1 - pijk[j])
eii <- expect[i] / temp * a0[j] * pijk[j]
wii <- ifelse(i == j, expect[i] - expect[i]^2,
eii - expect[i] * expect[j])
coe <- switch(linkf, logit = 1,
probit = 8 * exp(-(xib[i]^2 + xib[j]^2) / 2) /
((1 - erf[i]^2) * (1 - erf[j]^2) * pi),
cloglog = exp(xib[i] + xib[j]) /
((exp(-exp(xib[i])) - 1) * (exp(-exp(xib[j])) - 1)))
tim <- wii * coe * X[i, ] %*% t(X[j, ])
m1 <- m1 + tim
}
}
}
H <- -(m + m1)
zhat <- sens + (1 - sens - spec) * prodp
residual <- z - zhat
logl <- 0
for (grn in 1:num.g) {
if (z[grn] == 1) {
vec1 <- vec[groupn == grn]
mb2 <- 1
for (l in vec1) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2 <- mb2 * temp
}
null <- 1
for (l in vec1) {
temp <- a1[l] * (1 - pijk[l])
null <- null * temp
}
prob1 <- mb2 * sens + null * (1 - sens - spec)
} else prob1 <- 1 - zhat[grn]
logl <- logl - log(prob1)
}
aic <- 2 * logl + 2 * K
if (diff > control$tol && counts > control$maxit)
warning("EM algorithm did not converge.")
if (control$time) {
end.time <- proc.time()
save.time <- end.time - start.time
cat("\n Number of minutes running:", round(save.time[3] / 60, 2), "\n \n")
}
list(coefficients = beta.old, hessian = H, fitted.values = zhat,
deviance = 2 * logl, aic = aic, counts = counts - 1,
residuals = residual, z = z)
}
##################################################################
# EM.mp() function #
##################################################################
EM.mp <- function(col.resp, row.resp, X, coln, rown, sqn, ret, sens,
spec, linkf, sens.ind, spec.ind, start = NULL,
control = gtRegControl()) {
if (control$time)
start.time <- proc.time()
if (is.null(sens.ind))
sens.ind <- sens
if (is.null(spec.ind))
spec.ind <- spec
len <- max(sqn)
diff <- 1
counts <- 1
sam <- length(sqn)
col.groupn <- coln[sqn == 1]
if (len > 1) {
for (i in 2:len) {
temp <- max(col.groupn) + coln[sqn == i]
col.groupn <- c(col.groupn, temp)
}
}
if (is.null(start)) {
mod.fit <- try(gtreg.fit(col.resp, X, col.groupn,
sens, spec, linkf))
if (inherits(mod.fit, "try-error")) {
row.groupn <- rown[sqn == 1]
if (len > 1) {
for (i in 2:len) {
temp <- max(row.groupn) + rown[sqn == i]
row.groupn <- c(row.groupn, temp)
}
}
mod.fit <- gtreg.fit(row.resp, X, row.groupn,
sens, spec, linkf)
}
beta.old <- mod.fit$coefficients
}
else beta.old <- start
extra.loop <- FALSE
next.loop <- TRUE
while (next.loop) {
xib <- X %*% beta.old
pijk.all <- switch(linkf, logit = plogis(xib),
probit = pnorm(xib), cloglog = 1 - exp(-exp(xib)))
expect.all <- numeric(0)
mat2 <- index <- 0
erf <- 2 * pijk.all - 1
for (arrayn in 1:len) {
index.r <- index.c <- vector("logical", length = sam)
for (i in 1:sam) {
if (rown[i] == 1 && sqn[i] == arrayn)
index.c[i] <- TRUE
else index.c[i] <- FALSE
if (coln[i] == 1 && sqn[i] == arrayn)
index.r[i] <- TRUE
else index.r[i] <- FALSE
}
n.row <- max(rown[index.r])
n.col <- max(coln[index.c])
rowresp <- row.resp[index.r]
colresp <- col.resp[index.c]
index <- max(index) + 1:(n.row * n.col)
if (!is.null(ret)) {
re.ind <- na.omit(cbind(coln[sqn == arrayn],
rown[sqn == arrayn], ret[sqn == arrayn]))
re <- ifelse(re.ind[, 3] == 1, sens.ind, 1 - sens.ind)
re1 <- ifelse(re.ind[, 3] == 0, spec.ind, 1 - spec.ind)
}
pijk <- matrix(pijk.all[sqn == arrayn], nrow = n.row)
a <- ifelse(rowresp == 1, sens, 1 - sens)
b <- ifelse(colresp == 1, sens, 1 - sens)
a1 <- ifelse(rowresp == 0, spec, 1 - spec)
b1 <- ifelse(colresp == 0, spec, 1 - spec)
mat <- array(NA, c(n.row, n.col, control$n.gibbs))
y <- matrix(0, nrow = n.row, ncol = n.col)
for (k in 1:(control$n.gibbs + control$n.burnin)) {
l <- 1
for (j in 1:n.col) for (i in 1:n.row) {
num <- a[i] * b[j] * pijk[i, j]
den.r <- ifelse(sum(y[i, ]) - y[i, j] > 0,
a[i], a1[i])
den.c <- ifelse(sum(y[, j]) - y[i, j] > 0,
b[j], b1[j])
den2 <- den.r * den.c * (1 - pijk[i, j])
if (!is.null(ret)) {
if (l <= length(re) && j == re.ind[l, 1] &&
i == re.ind[l, 2]) {
num <- num * re[l]
den2 <- den2 * re1[l]
l <- l + 1
}
}
den <- num + den2
if (den != 0) {
cond.p <- num/den
y[i, j] <- rbinom(1, 1, cond.p)
}
else y[i, j] <- 0
}
if (k > control$n.burnin) {
mat[, , k - control$n.burnin] <- y
vec <- as.vector(y)
if (extra.loop)
for (i1 in index[vec == 1]) for (j1 in index[vec == 1]) {
bq <- switch(linkf, logit = 1,
probit = 8 * exp(-(xib[i1]^2 + xib[j1]^2) / 2) /
((1 - erf[i1]^2) * (1 - erf[j1]^2) * pi),
cloglog = exp(xib[i1] + xib[j1]) /
((exp(-exp(xib[i1])) - 1) *
(exp(-exp(xib[j1])) - 1))) *
X[i1, ] %*% t(X[j1, ])
mat2 <- mat2 + bq
}
}
}
expect.m <- apply(mat, c(1, 2), mean)
expect <- as.vector(expect.m)
expect.all <- c(expect.all, expect)
}
if (!extra.loop) {
suppress <- function(w)
if (any(grepl("non-integer #successes in a binomial glm", w)))
invokeRestart("muffleWarning")
mod.fit <- withCallingHandlers(glm.fit(X, expect.all,
family = binomial(link = linkf)),
warning = suppress)
diff <- max(abs((beta.old - mod.fit$coefficients) / beta.old))
beta.old <- mod.fit$coefficients
if (control$trace)
cat("beta is", beta.old, "\tdiff is", diff, "\n")
counts <- counts + 1
if (diff <= control$tol || counts > control$maxit)
extra.loop <- TRUE
}
else next.loop <- FALSE
}
index <- 0
first <- mat2 / control$n.gibbs
second <- 0
for (arrayn in 1:len) {
n.row <- max(rown[sqn == arrayn])
n.col <- max(coln[sqn == arrayn])
index <- max(index) + 1:(n.row * n.col)
expect <- expect.all[index]
for (i1 in index) for (j1 in index) {
coe <- switch(linkf, logit = 1,
probit = 8 * exp(-(xib[i1]^2 + xib[j1]^2) / 2) /
((1 - erf[i1]^2) * (1 - erf[j1]^2) * pi),
cloglog = exp(xib[i1] + xib[j1]) /
((exp(-exp(xib[i1])) - 1) * (exp(-exp(xib[j1])) - 1)))
tim <- expect.all[i1] * expect.all[j1] * coe * X[i1,] %*% t(X[j1, ])
second <- second + tim
}
}
m1 <- first - second
pt1 <- switch(linkf, logit = -exp(xib) / (1 + exp(xib))^2,
probit = sqrt(2) * xib * exp(-xib^2 / 2) /
(sqrt(pi) * (1 - erf)) - 2 * exp(-xib^2) /
(pi * (1 - erf)^2), cloglog = -exp(xib))
pt2 <- switch(linkf, logit = 0,
probit = (8 * exp(-xib^2 / 2) * erf +
2 * xib * sqrt(2 * pi) * erf^2 -
2 * xib * sqrt(2 * pi)) * exp(-xib^2 / 2) /
((1 + erf)^2 * pi * (1 - erf)^2),
cloglog = -(exp(xib - exp(xib)) +
exp(2 * xib - exp(xib)) - exp(xib)) /
(exp(-exp(xib)) - 1)^2)
nm <- pt1 + expect.all * pt2
sign1 <- as.vector(sign(nm))
nn <- as.vector(sqrt(abs(nm)))
x2 <- X * nn
m <- (t(x2) %*% (sign1 * x2))
H <- -(m + m1)
if (diff > control$tol && counts > control$maxit)
warning("EM algorithm did not converge.")
if (control$time) {
end.time <- proc.time()
save.time <- end.time - start.time
cat("\n Number of minutes running:", round(save.time[3] / 60, 2), "\n \n")
}
list(coefficients = beta.old, hessian = H,
Gibbs.sample.size = control$n.gibbs, counts = counts - 1)
}
##################################################################
# EM.halving() function #
##################################################################
EM.halving <- function(Y, X, groupn, subg, ret, sens, spec, linkf,
sens.ind, spec.ind,
start = NULL, control = gtRegControl()) {
if (control$time)
start.time <- proc.time()
if (is.null(sens.ind))
sens.ind <- sens
if (is.null(spec.ind))
spec.ind <- spec
z <- tapply(Y, groupn, tail, n = 1)
num.g <- max(groupn)
K <- ncol(X)
if (is.null(start)) {
if (K == 1)
cova.mean <- as.matrix(tapply(X, groupn, mean))
else {
temp <- by(X, groupn, colMeans)
cova.mean <- do.call(rbind, temp)
}
beta.old <- lm.fit(cova.mean, z)$coefficients
} else beta.old <- start
sam <- length(Y)
vec <- 1:sam
group.sizes <- tapply(Y, groupn, length)
diff <- 1
counts <- 1
extra.loop <- FALSE
next.loop <- TRUE
a0 <- ifelse(ret == 1, sens.ind, 1 - sens.ind)
a1 <- ifelse(ret == 0, spec.ind, 1 - spec.ind)
while (next.loop) {
xib <- X %*% beta.old
pijk <- switch(linkf, logit = plogis(xib),
probit = pnorm(xib),
cloglog = 1 - exp(-exp(xib)))
erf <- 2 * pijk - 1
prodp <- tapply(1 - pijk, groupn, prod)
den2 <- rep(spec * prodp + (1 - sens) * (1 - prodp),
group.sizes)
expect <- rep(NA, times = sam)
i <- 1
while (i <= sam) {
if (Y[i] == 0)
expect[i] <- (1 - sens) * pijk[i] / den2[i]
else {
if (subg[i] == 0) {
vec1 <- vec[groupn == groupn[i]]
gs <- length(vec1)
sub1 <- vec1[1:ceiling(gs / 2)]
sub2 <- vec1[(ceiling(gs / 2) + 1):gs]
if (subg[vec1[gs]] == 0) {
den <- (1 - spec) * spec^2 * prod(1 - pijk[sub1]) *
prod(1 - pijk[sub2]) +
spec * (1 - sens) * sens * prod(1 - pijk[sub1]) *
(1 - prod(1 - pijk[sub2])) +
spec * (1 - sens) * sens * prod(1 - pijk[sub2]) *
(1 - prod(1 - pijk[sub1])) +
(1 - sens)^2 * sens * (1 - prod(1 - pijk[sub1])) *
(1 - prod(1 - pijk[sub2]))
ab1 <- (1 - sens) * sens *
(spec * prod(1 - pijk[sub2]) +
(1 - sens) * (1 - prod(1 - pijk[sub2])))
ab2 <- (1 - sens) * sens *
(spec * prod(1 - pijk[sub1]) +
(1 - sens) * (1 - prod(1 - pijk[sub1])))
for (l1 in sub1) {
expect[l1] <- ab1 * pijk[l1] / den
}
for (l1 in sub2) {
expect[l1] <- ab2 * pijk[l1] / den
}
}
if (subg[vec1[gs]] == 1) {
mb2 <- 1
for (l in sub2) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2 <- mb2 * temp
}
null <- 1
for (l in sub2) {
temp <- a1[l] * (1 - pijk[l])
null <- null * temp
}
den <- (1 - spec)^2 * spec * null * prod(1 - pijk[sub1]) +
(1 - spec) * (1 - sens) * sens * null *
(1 - prod(1 - pijk[sub1])) +
spec * sens^2 * (mb2 - null) * prod(1 - pijk[sub1]) +
(1 - sens) * sens^2 * (mb2 - null) * (1 - prod(1 - pijk[sub1]))
ab1 <- (1 - sens) * sens * (mb2 * sens + null * (1 - sens - spec))
for (l1 in sub1) {
expect[l1] <- ab1 * pijk[l1] / den
}
for (l1 in sub2) {
temp <- a0[l1] * pijk[l1] + a1[l1] * (1 - pijk[l1])
num <- mb2 / temp * a0[l1] * pijk[l1] * sens^2 *
(spec * prod(1 - pijk[sub1]) +
(1 - sens) * (1 - prod(1 - pijk[sub1])))
expect[l1] <- num / den
}
}
i <- l1
} else {
vec1 <- vec[groupn == groupn[i]]
gs <- length(vec1)
sub1 <- vec1[1:ceiling(gs / 2)]
sub2 <- vec1[(ceiling(gs / 2) + 1):gs]
if (subg[vec1[gs]] == 0) {
mb2 <- 1
for (l in sub1) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2 <- mb2 * temp
}
null <- 1
for (l in sub1) {
temp <- a1[l] * (1 - pijk[l])
null <- null * temp
}
den <- (1 - spec)^2 * spec * null * prod(1 - pijk[sub2]) +
(1 - spec) * (1 - sens) * sens * null *
(1 - prod(1 - pijk[sub2])) +
spec * sens^2 * (mb2 - null) * prod(1 - pijk[sub2]) +
(1 - sens) * sens^2 * (mb2 - null) * (1 - prod(1 - pijk[sub2]))
ab1 <- (1 - sens) * sens * (mb2 * sens + null * (1 - sens - spec))
for (l1 in sub1) {
temp <- a0[l1] * pijk[l1] + a1[l1] * (1 - pijk[l1])
num <- mb2 / temp * a0[l1] * pijk[l1] * sens^2 *
(spec * prod(1 - pijk[sub2]) +
(1 - sens) * (1 - prod(1 - pijk[sub2])))
expect[l1] <- num / den
}
for (l1 in sub2) {
expect[l1] <- ab1 * pijk[l1] / den
}
}
if (subg[vec1[gs]] == 1) {
mb2 <- 1
for (l in sub1) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2 <- mb2 * temp
}
null <- 1
for (l in sub1) {
temp <- a1[l] * (1 - pijk[l])
null <- null * temp
}
mb2a <- 1
for (l in sub2) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2a <- mb2a * temp
}
nulla <- 1
for (l in sub2) {
temp <- a1[l] * (1 - pijk[l])
nulla <- nulla * temp
}
den <- (1 - spec)^3 * null * nulla +
(1 - spec) * sens^2 * null * (mb2a - nulla) +
(1 - spec) * sens^2 * (mb2 - null) * nulla +
sens^3 * (mb2 - null) * (mb2a - nulla)
for (l1 in sub1) {
temp <- a0[l1] * pijk[l1] + a1[l1] * (1 - pijk[l1])
num <- mb2 / temp * a0[l1] * pijk[l1] * sens^2 *
(mb2a * sens + nulla * (1 - sens - spec))
expect[l1] <- num / den
}
for (l1 in sub2) {
temp <- a0[l1] * pijk[l1] + a1[l1] * (1 - pijk[l1])
num <- mb2a / temp * a0[l1] * pijk[l1] * sens^2 *
(mb2 * sens + null * (1 - sens - spec))
expect[l1] <- num / den
}
}
i <- l1
}
}
i <- i + 1
}
expect[expect > 1] <- 1
expect[expect < 0] <- 0
if (!extra.loop) {
suppress <- function(w)
if (any(grepl("non-integer #successes in a binomial glm", w)))
invokeRestart("muffleWarning")
mod.fit <- withCallingHandlers(glm.fit(X, expect,
family = binomial(link = linkf)),
warning = suppress)
diff <- max(abs((beta.old - mod.fit$coefficients) / beta.old))
beta.old <- mod.fit$coefficients
if (control$trace)
cat("beta is", beta.old, "\tdiff is", diff, "\n")
counts <- counts + 1
if (diff <= control$tol || counts > control$maxit)
extra.loop <- TRUE
}
else next.loop <- FALSE
}
pt1 <- switch(linkf, logit = -exp(xib)/(1 + exp(xib))^2,
probit = sqrt(2) * xib * exp(-xib^2 / 2) /
(sqrt(pi) * (1 - erf)) - 2 * exp(-xib^2) /
(pi * (1 - erf)^2), cloglog = -exp(xib))
pt2 <- switch(linkf, logit = 0,
probit = (8 * exp(-xib^2 / 2) * erf +
2 * xib * sqrt(2 * pi) * erf^2 -
2 * xib * sqrt(2 * pi)) * exp(-xib^2 / 2) /
((1 + erf)^2 * pi * (1 - erf)^2),
cloglog = -(exp(xib - exp(xib)) + exp(2 * xib - exp(xib)) -
exp(xib))/(exp(-exp(xib)) - 1)^2)
nm <- pt1 + expect * pt2
sign1 <- as.vector(sign(nm))
nn <- as.vector(sqrt(abs(nm)))
x2 <- X * nn
m <- (t(x2) %*% (sign1 * x2))
m1 <- 0
i <- 1
while (i <= sam) {
vec1 <- vec[groupn == groupn[i]]
gs <- length(vec1)
if (Y[i] == 0) {
for (j in vec1) {
wii <- ifelse(i == j, expect[i] - expect[i]^2,
expect[i] * (pijk[j] - expect[j]))
tim <- wii * X[i, ] %*% t(X[j, ])
m1 <- m1 + tim
}
} else {
sub1 <- vec1[1:ceiling(gs / 2)]
sub2 <- vec1[(ceiling(gs / 2) + 1):gs]
for (i in sub1) {
for (j in sub1) {
if (subg[j] == 0) {
eii <- expect[i] * pijk[j]
} else {
temp <- a0[j] * pijk[j] + a1[j] * (1 - pijk[j])
eii <- expect[i] / temp * a0[j] * pijk[j]
}
wii <- ifelse(i == j, expect[i] - expect[i]^2,
eii - expect[i] * expect[j])
tim <- wii * X[i, ] %*% t(X[j, ])
m1 <- m1 + tim
}
for (j in sub2) {
if (subg[j] == 0) {
temp <- spec * prod(1 - pijk[sub2]) +
(1 - sens) * (1 - prod(1 - pijk[sub2]))
eii <- expect[i] * (1 - sens) * pijk[j] / temp
} else {
mb2a <- 1
for (l in sub2) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2a <- mb2a * temp
}
nulla <- 1
for (l in sub2) {
temp <- a1[l] * (1 - pijk[l])
nulla <- nulla * temp
}
temp <- a0[j] * pijk[j] + a1[j] * (1 - pijk[j])
tempa <- mb2a * sens + nulla * (1 - sens - spec)
eii <- expect[i] / tempa * sens * a0[j] * pijk[j] * mb2a / temp
}
wii <- ifelse(i == j, expect[i] - expect[i]^2,
eii - expect[i] * expect[j])
tim <- wii * X[i, ] %*% t(X[j, ])
m1 <- m1 + tim
}
}
for (i in sub2) {
for (j in sub1) {
if (subg[j] == 0) {
temp <- spec * prod(1 - pijk[sub1]) +
(1 - sens) * (1 - prod(1 - pijk[sub1]))
eii <- expect[i] * (1 - sens) * pijk[j] / temp
} else {
mb2 <- 1
for (l in sub1) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2 <- mb2 * temp
}
null <- 1
for (l in sub1) {
temp <- a1[l] * (1 - pijk[l])
null <- null * temp
}
temp <- a0[j] * pijk[j] + a1[j] * (1 - pijk[j])
tempa <- mb2 * sens + null * (1 - sens - spec)
eii <- expect[i] / tempa * sens * a0[j] * pijk[j] * mb2 / temp
}
wii <- ifelse(i == j, expect[i] - expect[i]^2,
eii - expect[i] * expect[j])
tim <- wii * X[i, ] %*% t(X[j, ])
m1 <- m1 + tim
}
for (j in sub2) {
if (subg[j] == 0) {
eii <- expect[i] * pijk[j]
} else {
temp <- a0[j] * pijk[j] + a1[j] * (1 - pijk[j])
eii <- expect[i] / temp * a0[j] * pijk[j]
}
wii <- ifelse(i == j, expect[i] - expect[i]^2,
eii - expect[i] * expect[j])
tim <- wii * X[i, ] %*% t(X[j, ])
m1 <- m1 + tim
}
}
}
i <- i + 1
}
H <- -(m + m1)
zhat <- sens + (1 - sens - spec) * prodp
residual <- z - zhat
logl <- 0
for (grn in 1:num.g) {
if (z[grn] == 1) {
vec1 <- vec[groupn == grn]
gs <- length(vec1)
sub1 <- vec1[1:ceiling(gs / 2)]
sub2 <- vec1[(ceiling(gs / 2) + 1):gs]
if (subg[vec1[1]] == 0) {
if (subg[vec1[gs]] == 0) {
prob1 <- (1 - spec) * spec^2 * prod(1 - pijk[sub1]) *
prod(1 - pijk[sub2]) +
spec * (1 - sens) * sens * prod(1 - pijk[sub1]) *
(1 - prod(1 - pijk[sub2])) +
spec * (1 - sens) * sens * prod(1 - pijk[sub2]) *
(1 - prod(1 - pijk[sub1])) +
(1 - sens)^2 * sens * (1 - prod(1 - pijk[sub1])) *
(1 - prod(1 - pijk[sub2]))
}
if (subg[vec1[gs]] == 1) {
mb2 <- 1
for (l in sub2) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2 <- mb2 * temp
}
null <- 1
for (l in sub2) {
temp <- a1[l] * (1 - pijk[l])
null <- null * temp
}
prob1 <- (1 - spec)^2 * spec * null * prod(1 - pijk[sub1]) +
(1 - spec) * (1 - sens) * sens * null *
(1 - prod(1 - pijk[sub1])) +
spec * sens^2 * (mb2 - null) * prod(1 - pijk[sub1]) +
(1 - sens) * sens^2 * (mb2 - null) * (1 - prod(1 - pijk[sub1]))
}
} else {
if (subg[vec1[gs]] == 0) {
mb2 <- 1
for (l in sub1) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2 <- mb2 * temp
}
null <- 1
for (l in sub1) {
temp <- a1[l] * (1 - pijk[l])
null <- null * temp
}
prob1 <- (1 - spec)^2 * spec * null * prod(1 - pijk[sub2]) +
(1 - spec) * (1 - sens) * sens * null *
(1 - prod(1 - pijk[sub2])) +
spec * sens^2 * (mb2 - null) * prod(1 - pijk[sub2]) +
(1 - sens) * sens^2 * (mb2 - null) * (1 - prod(1 - pijk[sub2]))
}
if (subg[vec1[gs]] == 1) {
mb2 <- 1
for (l in sub1) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2 <- mb2 * temp
}
null <- 1
for (l in sub1) {
temp <- a1[l] * (1 - pijk[l])
null <- null * temp
}
mb2a <- 1
for (l in sub2) {
temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
mb2a <- mb2a * temp
}
nulla <- 1
for (l in sub2) {
temp <- a1[l] * (1 - pijk[l])
nulla <- nulla * temp
}
prob1 <- (1 - spec)^3 * null * nulla +
(1 - spec) * sens^2 * null * (mb2a - nulla) +
(1 - spec) * sens^2 * (mb2 - null) * nulla +
sens^3 * (mb2 - null) * (mb2a - nulla)
}
}
} else prob1 <- 1 - zhat[grn]
logl <- logl - log(prob1)
}
aic <- 2 * logl + 2 * K
if (diff > control$tol && counts > control$maxit)
warning("EM algorithm did not converge.")
if (control$time) {
end.time <- proc.time()
save.time <- end.time - start.time
cat("\n Number of minutes running:", round(save.time[3] / 60, 2), "\n \n")
}
list(coefficients = beta.old, hessian = H, fitted.values = zhat,
deviance = 2 * logl, aic = aic,
counts = counts - 1, residuals = residual, z = z)
}
##################################################################
# gtReg() function #
##################################################################
#' @title Fitting group testing regression models
#'
#' @description Fits the group testing regression model specified
#' through a symbolic description of the linear predictor and
#' descriptions of the group testing setting. This function allows
#' for fitting regression models with simple pooling, halving, or array
#' testing data.
#'
#' @param type \kbd{"sp"} for simple pooling (Dorfman testing with
#' or without retests), \kbd{"halving"} for halving
#' protocol, or \kbd{"array"} for array testing. See 'Details' for
#' descriptions of the group testing algorithms.
#' @param formula an object of class "formula" (or one that
#' can be coerced to that class); a symbolic description of
#' the model to be fitted. The details of model specification
#' are under 'Details'.
#' @param data an optional data frame, list, or environment
#' (or object coercible by \kbd{as.data.frame} to a data frame)
#' containing the variables in the model. If not found in data,
#' the variables are taken from \kbd{environment(formula)},
#' typically the environment from which \code{gtReg} is called.
#' @param groupn a vector, list, or data frame of the group
#' numbers that designates individuals to groups (for use with
#' simple pooling, \kbd{type = "sp"}, or the halving protocol,
#' \kbd{type = "halving"}).
#' @param subg a vector, list, or data frame of the group numbers
#' that designates individuals to subgroups (for use with the
#' halving protocol, \kbd{type = "halving"}).
#' @param coln a vector, list, or data frame that specifies the
#' column group number for each sample (for use with array
#' testing, \kbd{type = "array"}).
#' @param rown a vector, list, or data frame that specifies the
#' row group number for each sample (for use with array testing,
#' \kbd{type = "array"}).
#' @param arrayn a vector, list, or data frame that specifies the
#' array number for each sample (for use with array testing,
#' \kbd{type = "array"}).
#' @param retest a vector, list, or data frame of individual
#' retest results. Default value is \kbd{NULL} for no retests.
#' See 'Details' for details on how to specify \kbd{retest}.
#' @param sens sensitivity of the test. Default value is set
#' to 1.
#' @param spec specificity of the test. Default value is set
#' to 1.
#' @param linkf a character string specifying one of the three
#' link functions for a binomial model: \kbd{"logit"} (default),
#' \kbd{"probit"}, or \kbd{"cloglog"}.
#' @param method the method to fit the regression model.
#' Options include \kbd{"Vansteelandt"} (default) or \kbd{"Xie"}.
#' The \kbd{"Vansteelandt"} option finds estimates by directly
#' maximizing the likelihood function based on the group responses,
#' while the \kbd{"Xie"} option uses the EM algorithm to
#' maximize the likelihood function in terms of the unobserved
#' individual responses.
#' @param sens.ind sensitivity of the individual retests. If NULL,
#' set to be equal to \kbd{sens}.
#' @param spec.ind specificity of the individual retests. If NULL,
#' set to be equal to \kbd{spec}.
#' @param start starting values for the parameters in the linear
#' predictor.
#' @param control a list of parameters for controlling the fitting
#' process in method \kbd{"Xie"}. These parameters will be passed
#' to the \code{\link{gtRegControl}} function for use.
#' @param ... arguments to be passed to \code{\link{gtRegControl}} by
#' default. See argument \kbd{control}.
#'
#' @details With simple pooling and halving, a typical predictor
#' has the form \kbd{groupresp ~ covariates} where \kbd{groupresp}
#' is the (numeric) group response vector. With array testing,
#' individual samples are placed in a matrix-like grid where
#' samples are pooled within each row and within each column.
#' This leads to two kinds of group responses: row and column
#' group responses. Thus, a typical predictor has the form
#' \kbd{cbind(col.resp, row.resp) ~ covariates}, where
#' \kbd{col.resp} is the (numeric) column group response vector
#' and \kbd{row.resp} is the (numeric) row group response vector.
#' For all methods, \kbd{covariates} is a series of terms which
#' specifies a linear predictor for individual responses.
#' Note that it is actually the unobserved individual responses,
#' not the observed group responses, which are modeled by the
#' covariates. When denoting group responses (\kbd{groupresp},
#' \kbd{col.resp}, and \kbd{row.resp}), a 0 denotes a negative
#' response and a 1 denotes a positive response, where the
#' probability of an individual positive response is being
#' modeled directly.
#'
#' A terms specification of the form
#' \kbd{first + second} indicates all the terms in \kbd{first}
#' together with all the terms in \kbd{second} with duplicates
#' removed. A specification of the form \kbd{first:second}
#' indicates the set of terms obtained by taking the interactions
#' of all terms in \kbd{first} with all terms in \kbd{second}.
#' The specification \kbd{first*second} indicates the cross of
#' \kbd{first} and \kbd{second}. This is the same as \kbd{first +
#' second + first:second}. The terms in the formula will be
#' re-ordered so that main effects come first, followed by the
#' interactions, all second-order, all third-order, and so on;
#' to avoid this, pass a terms object as the formula.
#'
#' For simple pooling (\kbd{type = "sp"}), the functions \kbd{gtreg.fit},
#' \kbd{EM}, and \kbd{EM.ret}, where the first corresponds to Vansteelandt's
#' method described in Vansteelandt et al. (2000) and the last two correspond
#' to Xie's method described in Xie (2001), are called to carry out the
#' model fitting. The \kbd{gtreg.fit} function uses the \kbd{optim}
#' function with default method \kbd{"Nelder-Mead"} to maximize
#' the likelihood function of the observed group responses.
#' If this optimization method produces a Hessian matrix of all
#' zero elements, the \kbd{"SANN"} method in \kbd{optim} is
#' employed to find the coefficients and Hessian matrix. For
#' the \kbd{"SANN"} method, the number of iterations in \kbd{optim}
#' is set to be 10000. For the background on the use of \kbd{optim},
#' see \kbd{help(optim)}.
#'
#' The \kbd{EM} and \kbd{EM.ret} functions apply Xie's EM
#' algorithm to the likelihood function written in terms of the
#' unobserved individual responses; the functions use \kbd{glm.fit}
#' to update the parameter estimates within each M step. The
#' \kbd{EM} function is used when there are no retests and
#' \kbd{EM.ret} is used when individual retests are available.
#' Thus, within the \kbd{retest} argument, individual observations
#' in observed positive groups are 0 (negative) or 1 (positive);
#' the remaining individual observations are \kbd{NA}s, meaning
#' that no retest is performed for them. Retests cannot be used
#' with Vansteelandt's method; a warning message will be given
#' in this case, and the individual retests will be ignored in
#' the model fitting. There could be slight differences in the
#' estimates between Vansteelandt's and Xie's methods (when
#' retests are not available) due to different convergence criteria.
#'
#' With simple pooling (i.e., Dorfman testing, two-stage hierarchical
#' testing), each individual appears in exactly one pool. When only the
#' group responses are observed, the null degrees of freedom are the number
#' of groups minus 1 and the residual degrees of freedom are the number of
#' groups minus the number of parameters. When individual retests are
#' observed too, it is an open research question for what the degrees of
#' freedom and the deviance for the null model should be; therefore, the
#' degrees of freedom and \kbd{null.deviance} will not be displayed.
#'
#' Under the halving protocol, the \kbd{EM.halving} function
#' applies Xie's EM algorithm to the
#' likelihood function written in terms of the unobserved
#' individual responses; the functions use \kbd{glm.fit} to update
#' the parameter estimates within each M step. In the halving
#' protocol, if the initial group tests positive, it is split
#' into two subgroups. The two subgroups are subsequently tested
#' and if either subgroup tests positive, the third and final
#' step is to test all individuals within the subgroup. Thus,
#' within \kbd{subg}, subgroup responses in observed positive
#' groups are 0 (negative) or 1 (positive); the remaining
#' subgroup responses are \kbd{NA}s, meaning that no tests are
#' performed for them. The individual retests are similarly coded.
#'
#' With array testing (also known as matrix pooling), the
#' \kbd{EM.mp} function applies Xie's
#' EM algorithm to the likelihood function written in terms of the
#' unobserved individual responses. In each E step, the Gibbs
#' sampling technique is used to estimate the conditional
#' probabilities. Because of the large number of Gibbs samples
#' needed to achieve convergence, the model fitting process could
#' be quite slow, especially when multiple positive rows and
#' columns are observed. In this case, we can either increase the
#' Gibbs sample size to help achieve convergence or loosen the
#' convergence criteria by increasing \kbd{tol} at the expense
#' of perhaps poorer estimates. If follow-up retests are performed,
#' the retest results going into the model will help achieve
#' convergence faster with the same Gibbs sample size and
#' convergence criteria. In each M step, we use \kbd{glm.fit} to
#' update the parameter estimates.
#'
#' For simple pooling, \kbd{retest} provides individual retest
#' results for Dorfman's retesting procedure. Under the halving
#' protocol, \kbd{retest} provides individual retest results
#' within a subgroup that tests positive. The \kbd{retest}
#' argument provides individual retest results, where a 0
#' denotes negative and 1 denotes positive status. An \kbd{NA}
#' denotes that no retest is performed for that individual.
#' The default value is \kbd{NULL} for no retests.
#'
#' For simple pooling, \kbd{control} provides parameters for
#' controlling the fitting process in the \kbd{"Xie"} method only.
#'
#' \kbd{gtReg} returns an object of class \kbd{"gtReg"}.
#' The function \kbd{summary} (i.e., \code{\link{summary.gtReg}}
#' is used to obtain or print a summary of the results.
#' The group testing function \kbd{predict} (i.e.,
#' \code{\link{predict.gtReg}}) is used to make predictions
#' on \kbd{"gtReg"} objects.
#'
#' @return An object of class \kbd{"gtReg"}, a list which may include:
#' \item{coefficients}{a named vector of coefficients.}
#' \item{hessian}{estimated Hessian matrix of the negative
#' log-likelihood function. This serves as an estimate of the
#' information matrix.}
#' \item{residuals}{the response residuals. This is the difference
#' of the observed group responses and the fitted group
#' responses. Not included for array testing.}
#' \item{fitted.values}{the fitted mean values of group responses.
#' Not included for array testing.}
#' \item{deviance}{the deviance between the fitted model and the
#' saturated model. Not included for array testing.}
#' \item{aic}{Akaike's Information Criterion. This is minus twice
#' the maximized log-likelihood plus twice the number of
#' coefficients. Not included for array testing.}
#' \item{null.deviance}{the deviance for the null model,
#' comparable with \kbd{deviance}. The null model will
#' include only the intercept, if there is one in the model.
#' Provided for simple pooling, \kbd{type = "sp"}, only.}
#' \item{counts}{the number of iterations in \kbd{optim}
#' (Vansteelandt's method) or the number of iterations in the
#' EM algorithm (Xie's method, halving, and array testing).}
#' \item{Gibbs.sample.size}{the number of Gibbs samples
#' generated in each E step. Provided for array testing,
#' \kbd{type = "array"}, only.}
#' \item{df.residual}{the residual degrees of freedom.
#' Provided for simple pooling, \kbd{type = "sp"}, only.}
#' \item{df.null}{the residual degrees of freedom for the null model.
#' Provided for simple pooling, \kbd{type = "sp"}, only.}
#' \item{z}{the vector of group responses. Not included for array testing.}
#' \item{call}{the matched call.}
#' \item{formula}{the formula supplied.}
#' \item{terms}{the terms object used.}
#' \item{method}{the method (\kbd{"Vansteelandt"} or \kbd{"Xie"})
#' used to fit the model. For the halving protocol, the
#' \kbd{"Xie"} method is used. Not included for array testing.}
#' \item{link}{the link function used in the model.}
#'
#' @author The majority of this function was originally written as
#' \kbd{gtreg.sp}, \kbd{gtreg.halving}, and \kbd{gtreg.mp} by Boan Zhang
#' for the \code{binGroup} package. Minor modifications have been made for
#' inclusion of the functions in the \code{binGroup2} package.
#'
#' @references
#' \insertRef{Vansteelandt2000}{binGroup2}
#'
#' \insertRef{Xie2001}{binGroup2}
#'
#' @seealso \code{\link{gtSim}} for simulation of data in the
#' group testing form to be used by \kbd{gtReg},
#' \code{\link{summary.gtReg}} and \code{\link{predict.gtReg}}
#' for \kbd{gtreg} methods.
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(type = "sp", formula = groupres ~ AGE + EDUC.,
#' data = hivsurv, groupn = gnum, sens = 0.9,
#' spec = 0.9, method = "Xie")
#' fit1
#'
#' set.seed(46)
#' gt.data <- gtSim(type = "sp", par = c(-12, 0.2),
#' size1 = 700, size2 = 5)
#' fit2 <- gtReg(type = "sp", formula = gres ~ x, data = gt.data,
#' groupn = groupn)
#' fit2
#'
#' set.seed(21)
#' gt.data <- gtSim(type = "sp", par = c(-12, 0.2),
#' size1 = 700, size2 = 6, sens = 0.95, spec = 0.95,
#' sens.ind = 0.98, spec.ind = 0.98)
#' fit3 <- gtReg(type = "sp", formula = gres ~ x, data = gt.data,
#' groupn = groupn, retest = retest, method = "Xie",
#' sens = 0.95, spec = 0.95, sens.ind = 0.98,
#' spec.ind = 0.98, trace = TRUE)
#' summary(fit3)
#'
#' set.seed(46)
#' gt.data <- gtSim(type = "halving", par = c(-6, 0.1), gshape = 17,
#' gscale = 1.4, size1 = 5000, size2 = 5,
#' sens = 0.95, spec = 0.95)
#' fit4 <- gtReg(type = "halving", formula = gres ~ x,
#' data = gt.data, groupn = groupn, subg = subgroup,
#' retest = retest, sens = 0.95, spec = 0.95,
#' start = c(-6, 0.1), trace = TRUE)
#' summary(fit4)
#'
#' # 5x6 and 4x5 array
#' \donttest{set.seed(9128)
#' sa1a <- gtSim(type = "array", par = c(-7, 0.1), size1 = c(5, 4),
#' size2 = c(6, 5), sens = 0.95, spec = 0.95)
#' sa1 <- sa1a$dframe
#' fit5 <- gtReg(type = "array",
#' formula = cbind(col.resp, row.resp) ~ x,
#' data = sa1, coln = coln, rown = rown,
#' arrayn = arrayn, sens = 0.95, spec = 0.95,
#' tol = 0.005, n.gibbs = 2000, trace = TRUE)
#' fit5
#' summary(fit5)}
#'
# Brianna Hitt - 01-07-2020
# Brianna Hitt - 06-30-2012
# Changed class of all results to "gtReg" (instead of "gt" or "gt.mp")
# Brianna Hitt - 3 November 2023
# Added checks to ensure sensitivity and specificity values are between 0 and 1
gtReg <- function(type = "sp", formula, data, groupn = NULL,
subg = NULL, coln = NULL, rown = NULL, arrayn = NULL,
retest = NULL, sens = 1, spec = 1,
linkf = c("logit", "probit", "cloglog"),
method = c("Vansteelandt", "Xie"),
sens.ind = NULL, spec.ind = NULL, start = NULL,
control = gtRegControl(...), ...) {
if (sens < 0 | sens > 1) {
stop("Please provide sensitivity values between 0 and 1.\n")
}
if (!is.null(sens.ind)) {
if (sens.ind < 0 | sens.ind > 1) {
stop("Please provide sensitivity values between 0 and 1.\n")
}
}
if (spec < 0 | spec > 1) {
stop("Please provide specificity values between 0 and 1.\n")
}
if (!is.null(spec.ind)) {
if (spec.ind < 0 | spec.ind > 1) {
stop("Please provide specificity values between 0 and 1.\n")
}
}
call <- match.call()
mf <- match.call(expand.dots = FALSE)
if (type %in% c("sp", "halving")) {
m <- match(c("formula", "data", "groupn"), names(mf), 0)
} else if (type == "array") {
m <- match(c("formula", "data", "coln", "rown", "arrayn"),
names(mf), 0)
}
mf <- mf[c(1,m)]
mf$drop.unused.levels <- TRUE
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
if (type %in% c("sp", "halving")) {
gr <- model.extract(mf, "groupn")
} else if (type == "array") {
arrayn <- model.extract(mf, "arrayn")
rown <- model.extract(mf, "rown")
coln <- model.extract(mf, "coln")
}
if (!is.na(pos <- match(deparse(substitute(retest)), names(data))))
retest <- data[, pos]
if (type == "halving") {
if (!is.na(pos <- match(deparse(substitute(subg)), names(data))))
subg <- data[, pos]
}
Y <- model.response(mf, "any")
if (length(dim(Y)) == 1) {
nm <- rownames(Y)
dim(Y) <- NULL
if (!is.null(nm))
names(Y) <- nm
}
X <- if (!is.empty.model(mt))
model.matrix(mt, mf)
else matrix(, NROW(Y), 0)
linkf <- match.arg(linkf)
if (type == "sp") {
if ((method <- match.arg(method)) == "Vansteelandt") {
if (!is.null(retest))
warning("Retests cannot be used with Vansteelandt's method.")
fit <- gtreg.fit(Y, X, gr, sens, spec, linkf, start)
}
else {
if (is.null(retest))
fit <- EM(Y, X, gr, sens, spec, linkf, start, control)
else fit <- EM.ret(Y, X, gr, retest, sens, spec, linkf,
sens.ind, spec.ind, start, control)
}
fit <- c(fit, list(call = call, formula = formula, method = method,
link = linkf, terms = mt))
# class(fit) <- "gt"
} else if (type == "halving") {
fit <- EM.halving(Y, X, gr, subg, retest, sens, spec, linkf,
sens.ind, spec.ind, start, control)
fit <- c(fit, list(call = call, formula = formula, method = "Xie",
link = linkf, terms = mt))
# class(fit) <- "gt"
} else if (type == "array") {
fit <- EM.mp(Y[, 1], Y[, 2], X, coln, rown, arrayn, retest,
sens, spec, linkf, sens.ind, spec.ind, start, control)
fit <- c(fit, list(call = call, formula = formula, link = linkf,
terms = mt))
# class(fit) <- c("gt.mp", "gt")
}
# class(fit) <- c(class(fit), "gtReg")
class(fit) <- "gtReg"
fit
}
# Print function
##################################################################
# print.gtReg() function #
##################################################################
#' @title Print method for \kbd{gtReg}
#'
#' @description Print method for objects obtained by \code{\link{gtReg}}.
#'
#' @param x An object of class "gtReg" created by \code{\link{gtReg}}.
#' @param digits digits for rounding.
#' @param ... currently not used.
#'
#' @return A print out of the function call, coefficients, and the null and
#' residual deviance and degrees of freedom.
#'
#' @author This function was originally written by Boan Zhang as the
#' \kbd{print.gt} function for the \code{binGroup} package. Minor modifications
#' were made for inclusion in the \code{binGroup2} package.
print.gtReg <- function (x,
digits = max(3, getOption("digits") - 3), ...) {
cat("\nCall:\n")
cat(paste(deparse(x$call), sep = "\n", collapse = "\n"),
"\n\n", sep = "")
if (length(coef(x))) {
cat("Coefficients:\n")
print.default(format(coef(x), digits = digits), print.gap = 2,
quote = FALSE)
}
else cat("No coefficients\n")
if (!is.null(x$df.null)) {
cat("\nDegrees of Freedom:", x$df.null, "\tTotal (i.e. Null); ",
x$df.residual, "Residual\n")
cat("Null Deviance:\t ",
format(signif(x$null.deviance, digits)),
"\nResidual Deviance: ",
format(signif(x$deviance, digits)),
"\tAIC:", format(signif(x$aic, digits)), "\n")
}
invisible(x)
}
# Residuals function
##################################################################
# residuals.gtReg() function #
##################################################################
#' @title Extract model residuals from a fitted group testing model
#'
#' @description Extract model residuals from objects of class "gtReg" returned
#' by \code{\link{gtReg}}.
#'
#' @param object An object of class "gtReg", created by \code{\link{gtReg}},
#' from which the model residuals are to be extracted.
#' @param type The type of residuals which should be returned. Options include
#' "\kbd{deviance}" (default), "\kbd{pearson}", and "\kbd{response}".
#' @param ... currently not used.
#'
#' @return Residuals of group responses extracted from the object \kbd{object}.
#'
#' @author This function was originally written by Boan Zhang as the
#' \kbd{residuals.gt} function for the \code{binGroup} package.
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(formula = groupres ~ AGE * EDUC.,
#' data = hivsurv, groupn = gnum,
#' linkf = "probit")
#' residuals(object = fit1, type = "pearson")
#' residuals(object = fit1, type = "deviance")
residuals.gtReg <- function (object,
type = c("deviance", "pearson", "response"), ...) {
type <- match.arg(type)
r <- object$residuals
zhat <- object$fitted.values
z <- object$z
res <- switch(type, response = r,
pearson = r/sqrt(zhat * (1 - zhat)),
deviance = sqrt(-2 * (z * log(zhat) +
(1 - z) * log(1 - zhat))) * sign(z - zhat))
res
}
# Coefficients function
##################################################################
# coefficients.gtReg() function #
##################################################################
#' @title Extract coefficients from a fitted group testing model
#'
#' @description Extract coefficients from objects of class "gtReg" returned
#' by \code{\link{gtReg}}.
#'
#' @param object An object of class "gtReg", created by \code{\link{gtReg}},
#' from which the coefficients are to be extracted.
#' @param digits digits for rounding.
#' @param ... not currently used.
#'
#' @return Model coefficients extracted from the object \kbd{object}.
#'
#' @author Brianna D. Hitt
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(formula = groupres ~ AGE * EDUC.,
#' data = hivsurv, groupn = gnum,
#' linkf = "probit")
#' coefficients(object = fit1)
coef.gtReg <- function (object, digits = max(3, getOption("digits") - 3),
...) {
signif(object$coefficients, digits)
}
###############################################################################
#' @rdname coef.gtReg
coefficients.gtReg <- coef.gtReg
# Formula function
##################################################################
# formula.gtReg() function #
##################################################################
#' @title Extract the model formula from a fitted group testing model
#'
#' @description Extract the model formula from objects of class "gtReg"
#' returned by \code{\link{gtReg}}.
#'
#' @param x An object of class "gtReg", created by \code{\link{gtReg}},
#' from which the model formula is to be extracted.
#' @param ... not currently used.
#'
#' @return Model formula extracted from the object \kbd{object}.
#'
#' @author Brianna D. Hitt
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(formula = groupres ~ AGE * EDUC.,
#' data = hivsurv, groupn = gnum,
#' linkf = "probit")
#' formula(x = fit1)
formula.gtReg <- function (x, ...) {
x$formula
}
# Variance-covariance function
##################################################################
# vcov.gtReg() function #
##################################################################
# #' @title Calculate variance-covariance matrix for a fitted group testing model
# #'
# #' @description Returns the variance-covariance matrix of the main parameters
# #' of a fitted model object of class "gtReg" returned by \code{\link{gtReg}}.
# #' The "main" parameters of the model correspond to those returned by
# #' \code{\link{coef.gtReg}}, and typically do not contain a nuisance scale
# #' parameter.
# #'
# #' @param object A fitted model object of class "gtReg", created by
# #' \code{\link{gtReg}}, from which the variance-covariance matrix will be
# #' calculated.
# #' @param type The type of residuals which should be used to calculate the
# #' variance-covariance matrix. Options include
# #' "\kbd{deviance}" (default), "\kbd{pearson}", and "\kbd{response}".
# #' @param ... currently not used.
# #'
# #' @return A matrix of the estimated covariances between the parameter
# #' estimates in the linear or non-linear predictor of the model. This should
# #' have row and column names corresponding to the parameter names given by the
# #' \code{\link{coef.gtReg}} method.
# #'
# #' @author Brianna D. Hitt
# #'
# #' @examples
# #' data(hivsurv)
# #' fit1 <- gtReg(formula = groupres ~ AGE * EDUC.,
# #' data = hivsurv, groupn = gnum, linkf = "probit")
# #' vcov.gtReg(object = fit1)
#
# vcov.gtReg <- function (object,
# type = c("deviance", "pearson", "response"), ...) {
# # type <- match.arg(type)
# # r <- object$residuals
# # zhat <- object$fitted.values
# # z <- object$z
# # res <- switch(type, response = r,
# # pearson = r/sqrt(zhat * (1 - zhat)),
# # deviance = sqrt(-2 * (z * log(zhat) +
# # (1 - z) * log(1 - zhat))) * sign(z - zhat))
# # res
# }
#
# Summary function
##################################################################
# summary.gtReg() function #
##################################################################
#' @title Summary method for \kbd{gtReg}
#'
#' @description Produce a summary list for objects of class
#' \kbd{"gtReg"} returned by \code{\link{gtReg}}.
#'
#' @param object a fitted object of class \kbd{"gtReg"}.
#' @param ... currently not used.
#'
#' @details The \kbd{coefficients} component of the results gives a matrix
#' containing the estimated coefficients and their estimated standard errors.
#' The third column is their ratio, labeled
#' \kbd{z ratio} using Wald tests. A fourth column gives the
#' two-tailed p-value corresponding to the z-ratio based on a
#' Wald test. Note that it is possible that there are no residual
#' degrees of freedom from which to estimate, in which case the
#' estimate is \kbd{NaN}.
#'
#' @return \kbd{summary.gtReg} returns an object of class
#' \kbd{"summary.gtReg"}, a list containing:
#' \item{call}{the component from \kbd{object}.}
#' \item{link}{the component from \kbd{object}.}
#' \item{deviance}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{aic}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{df.residual}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{null.deviance}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{df.null}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{deviance.resid}{the deviance residuals,
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{coefficients}{the matrix of coefficients, standard errors,
#' z-values, and p-values. Aliased coefficients are omitted.}
#' \item{counts}{the component from \kbd{object}.}
#' \item{method}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{Gibbs.sample.size}{the component from \kbd{object},
#' for array testing (\kbd{type = "array"} in \code{\link{gtReg}}) only.}
#' \item{cov.mat}{the estimated covariance matrix of the estimated
#' coefficients.}
#'
#' @author The majority of this function was originally written as
#' \code{summary.gt} and \code{summary.gt.mp} by Boan Zhang for the
#' \code{binGroup} package. Minor modifications were made to the function
#' for inclusion in the \code{binGroup2} package.
#'
#' @seealso \code{\link{gtReg}} for creating an object of class
#' \kbd{"gtReg"}.
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(type = "sp",
#' formula = groupres ~ AGE + EDUC.,
#' data = hivsurv, groupn = gnum,
#' sens = 0.9, spec = 0.9,
#' method = "Xie")
#' summary(fit1)
#'
#' # 5x6 and 4x5 array
#' set.seed(9128)
#' sa2a <- gtSim(type = "array", par = c(-7, 0.1),
#' size1 = c(5, 4), size2 = c(6, 5),
#' sens = 0.95, spec = 0.95)
#' sa2 <- sa2a$dframe
#' \donttest{
#' fit2 <- gtReg(type = "array",
#' formula = cbind(col.resp, row.resp) ~ x,
#' data = sa2, coln = coln, rown = rown,
#' arrayn = arrayn, sens = 0.95, spec = 0.95,
#' linkf = "logit", n.gibbs = 1000, tol = 0.005)
#' summary(fit2)}
# Brianna Hitt - 06-30-2021
# Updated gtReg() so that class of returned results is "gtReg"
# Updated summary.gtReg() so that the matrix pooling results can still be
# identified ("Gibbs.sample.size" %in% names(object) instead of "gt.mp"
# %in% class(object))
summary.gtReg <- function(object, ...) {
coef.p <- object$coefficients
cov.mat <- solve(object$hessian)
dimnames(cov.mat) <- list(names(coef.p), names(coef.p))
var.cf <- diag(cov.mat)
s.err <- sqrt(var.cf)
zvalue <- coef.p / s.err
dn <- c("Estimate", "Std. Error")
pvalue <- 2 * pnorm(-abs(zvalue))
coef.table <- cbind(coef.p, s.err, zvalue, pvalue)
dimnames(coef.table) <- list(names(coef.p), c(dn, "z value", "Pr(>|z|)"))
# if ("gt.mp" %in% class(object)) {
if ("Gibbs.sample.size" %in% names(object)) {
keep <- match(c("call", "link", "Gibbs.sample.size", "counts"),
names(object), 0)
ans <- c(object[keep], list(coefficients = coef.table,
cov.mat = cov.mat))
} else{
keep <- match(c("call", "link", "aic", "deviance", "df.residual",
"null.deviance", "df.null", "counts", "method", "z"),
names(object), 0)
ans <- c(object[keep], list(coefficients = coef.table,
deviance.resid = residuals(object,
type = "deviance"),
cov.mat = cov.mat))
}
class(ans) <- "summary.gtReg"
ans
}
# Print method for summary function
##################################################################
# print.summary.gtReg() function #
##################################################################
#' @title Print method for \kbd{summary.gtReg}
#'
#' @description Print method for objects obtained by
#' \code{\link{summary.gtReg}}.
#'
#' @param x An object of class "summary.gtReg" created by
#' \code{\link{summary.gtReg}}.
#' @param digits digits for rounding.
#' @param signif.stars a logical value indicating whether significance
#' stars should be shown.
#' @param ... Additional arguments to be passed to \code{printCoefmat}.
#'
#' @return A print out of the function call, deviance residuals (for
#' simple pooling and halving only), coefficients, null and
#' residual deviance and degrees of freedom (for simple pooling only),
#' AIC (for simple pooling and halving only), number of
#' Gibbs samples (for array testing only), and the number of
#' iterations.
#'
#' @author This function combines code from
#' \code{print.summary.gt} and
#' \code{print.summary.gt.mp}, written by Boan Zhang
#' for the \code{binGroup} package. Minor modifications were
#' made for inclusion in the \code{binGroup2} package.
print.summary.gtReg <- function(x, digits = max(3, getOption("digits") - 3),
signif.stars = getOption("show.signif.stars"),
...) {
obj <- x
cat("\nCall:\n")
cat(paste(deparse(obj$call), sep = "\n", collapse = "\n"),
"\n\n", sep = "")
# for simple pooling and array testing only
if ("deviance.resid" %in% names(obj)) {
cat("Deviance Residuals: \n")
if (length(obj$z) > 5) {
obj$deviance.resid <- quantile(obj$deviance.resid, na.rm = TRUE)
names(obj$deviance.resid) <- c("Min", "1Q", "Median",
"3Q", "Max")
}
print.default(obj$deviance.resid, digits = digits, na.print = "",
print.gap = 2)
}
cat("\nCoefficients:\n")
coefs <- obj$coefficients
printCoefmat(coefs, digits = digits, signif.stars = signif.stars,
na.print = "NA", ...)
if (!is.null(unlist(obj["df.null"]))) {
cat("\n",
apply(cbind(paste(format(c("Null", "Residual"), justify = "right"),
"deviance:"),
format(unlist(obj[c("null.deviance", "deviance")]),
digits = 4),
" on", format(unlist(obj[c("df.null", "df.residual")])),
" degrees of freedom\n"), 1, paste,
collapse = " "), sep = "")
}
if ("method" %in% names(obj)) {
if (obj$method == "Vansteelandt") {
cat("AIC: ", format(obj$aic, digits = 4),
"\n\n", "Number of iterations in optim(): ",
obj$counts, "\n", sep = "")
} else {
cat("AIC: ", format(obj$aic, digits = 4),
"\n\n", "Number of iterations in EM: ",
obj$counts, "\n", sep = "")
}
}
if ("Gibbs.sample.size" %in% names(obj)) {
cat("\nNumber of Gibbs samples generated in each E step: ",
obj$Gibbs.sample.size, "\n",
"Number of iterations in EM algorithm: ",
obj$counts, "\n", sep = "")
}
cat("\n")
invisible(obj)
}
# Other regression-related functions
##################################################################
# predict.gtReg() function #
##################################################################
# Brianna Hitt - 04.03.2022 - added attributed class of predict.gtReg()
# for use with print.predict.gtReg() function
#' @title Predict method for \kbd{gtReg}
#'
#' @description Obtains predictions for individual observations and
#' optionally computes the standard errors of those predictions from
#' objects of class \kbd{"gtReg"} returned by \code{\link{gtReg}}.
#'
#' @param object a fitted object of class \kbd{"gtReg"}.
#' @param newdata an optional data frame in which to look for
#' variables with which to predict. If omitted, the fitted linear
#' predictors are used.
#' @param type the type of prediction required. The \kbd{"link"}
#' option is on the scale of the linear predictors. The \kbd{"response"}
#' option is on the scale of the response variable. Thus, for the
#' logit model, the \kbd{"link"} predictions are of log-odds
#' (probabilities on the logit scale) and \kbd{type = "response"}
#' gives the predicted probabilities.
#' @param se.fit a logical value indicating whether standard errors
#' are required.
#' @param conf.level the confidence level of the interval for the
#' predicted values.
#' @param na.action a function determining what should be done with
#' missing values in \kbd{newdata}. The default is to predict \kbd{NA}.
#' @param ... currently not used.
#'
#' @details If \kbd{newdata} is omitted, the predictions are based
#' on the data used for the fit. When \kbd{newdata} is present and
#' contains missing values, how the missing values will be dealt with
#' is determined by the \kbd{na.action} argument. In this case, if
#' \kbd{na.action = na.omit}, omitted cases will not appear, whereas
#' if \kbd{na.action = na.exclude}, omitted cases will appear (in
#' predictions and standard errors) with value \kbd{NA}.
#'
#' @return If \kbd{se = FALSE}, a vector or matrix of predictions. If
#' \kbd{se = TRUE}, a list containing:
#' \item{fit}{predictions.}
#' \item{se.fit}{estimated standard errors.}
#' \item{lower}{the lower bound of the confidence interval,
#' if calculated (i.e., \kbd{conf.level} is specified).}
#' \item{upper}{the upper bound of the confidence interval,
#' if calculated (i.e., \kbd{conf.level} is specified).}
#'
#' @author Boan Zhang
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(formula = groupres ~ AGE + EDUC.,
#' data = hivsurv, groupn = gnum,
#' sens = 0.9, spec = 0.9,
#' linkf = "logit", method = "V")
#' pred.data <- data.frame(AGE = c(15, 25, 30),
#' EDUC. = c(1, 3, 2))
#' predict(object = fit1, newdata = pred.data,
#' type = "link", se.fit = TRUE)
#' predict(object = fit1, newdata = pred.data,
#' type = "response", se.fit = TRUE,
#' conf.level = 0.9)
#' predict(object = fit1, type = "response",
#' se.fit = TRUE, conf.level = 0.9)
predict.gtReg <- function(object, newdata, type = c("link", "response"),
se.fit = FALSE, conf.level = NULL,
na.action = na.pass, ...) {
tt <- terms(object)
Terms <- delete.response(tt)
if (missing(newdata) || is.null(newdata)) {
m <- model.frame(object)
newd <- model.matrix(Terms, m)
}
else {
m <- model.frame(Terms, newdata, na.action = na.action)
newd <- model.matrix(Terms, m)
}
type <- match.arg(type)
lin.pred <- as.vector(newd %*% object$coefficients)
link <- object$link
res <- switch(link, logit = plogis(lin.pred), probit = pnorm(lin.pred),
cloglog = 1 - exp(-exp(lin.pred)))
if (type == "response")
pred <- res
else pred <- lin.pred
if (se.fit) {
cov <- solve(object$hessian)
var.lin.pred <- diag(newd %*% cov %*% t(newd))
var.res <- switch(link, logit = exp(2 * lin.pred) / (1 + exp(lin.pred))^4,
probit = dnorm(lin.pred)^2,
cloglog = (exp(-exp(lin.pred)) *
exp(lin.pred))^2) * var.lin.pred
if (type == "response")
se <- sqrt(var.res)
else se <- sqrt(var.lin.pred)
if (!is.null(conf.level)) {
alpha <- 1 - conf.level
lower <- lin.pred - qnorm(1 - alpha / 2) * sqrt(var.lin.pred)
upper <- lin.pred + qnorm(1 - alpha / 2) * sqrt(var.lin.pred)
res.lower <- switch(link, logit = plogis(lower),
probit = pnorm(lower),
cloglog = 1 - exp(-exp(lower)))
res.upper <- switch(link, logit = plogis(upper),
probit = pnorm(upper),
cloglog = 1 - exp(-exp(upper)))
if (type == "response") {
lwr <- res.lower
upr <- res.upper
}
else {
lwr <- lower
upr <- upper
}
}
}
names(pred) <- 1:length(lin.pred)
if (!is.null(conf.level)) {
out <- list(fit = pred, se.fit = se, lower = lwr, upper = upr)
class(out) <- c("predict.gtReg", "list")
}
else if (se.fit) {
out <- list(fit = pred, se.fit = se)
class(out) <- c("predict.gtReg", "list")
}
else {
out <- pred
class(out) <- c("predict.gtReg", "numeric")
}
out
}
# Print method for predict function
##################################################################
# print.predict.gtReg() function #
##################################################################
#' @title Print method for \kbd{predict.gtReg}
#'
#' @description Print method for objects obtained by
#' \code{\link{predict.gtReg}}.
#'
#' @param x An object of class "predict.gtReg" created by
#' \code{\link{predict.gtReg}}.
#' @param digits digits for rounding.
#' @param ... not currently used.
#'
#' @return A matrix of predictions with rows corresponding to each observation
#' in \kbd{newdata} (if provided) or each observation in the data set used
#' for the fit. The columns correspond to the predictions (\kbd{fit}), the
#' estimated standard errors (\kbd{se.fit}), the lower bound of the confidence
#' interval (\kbd{lower}), and the upper bound of the confidence interval
#' (\kbd{upper}).
#' If \kbd{conf.level} is not specified, the \kbd{lower} and \kbd{upper}
#' columns will not be included. If \kbd{se = FALSE}, the \kbd{se.fit} column
#' will not be included.
#'
#' @author Brianna Hitt
print.predict.gtReg <- function(x, digits = max(3, getOption("digits") - 3),
...) {
if (inherits(x, "list")) {
if (length(x) == 4) {
print(data.frame(fit = signif(x$fit, digits),
se.fit = signif(x$se.fit, digits),
lower = signif(x$lower, digits),
upper = signif(x$upper, digits)))
}
else if (length(x) == 2) {
print(data.frame(fit = signif(x$fit, digits),
se.fit = signif(x$se.fit, digits)))
}
}
else {
print(data.frame(fit = signif(as.vector(x), digits)))
}
}
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.