R/gtReg.R

Defines functions print.predict.gtReg predict.gtReg print.summary.gtReg summary.gtReg gtReg EM.halving EM.mp EM.ret EM gtRegControl gtreg.fit gtreg.mp gtreg.halving gtreg.sp

Documented in gtReg gtRegControl predict.gtReg print.predict.gtReg print.summary.gtReg summary.gtReg

##################################################################
# gtreg.sp() function                                            #
##################################################################
# This is the gtreg() function written by Boan Zhang

gtreg.sp <- function(formula, data, groupn, retest = NULL, sens = 1,
                     spec = 1, linkf = c("logit", "probit", "cloglog"),
                     method = c("Vansteelandt", "Xie"), sens.ind = NULL,
                     spec.ind = NULL, start = NULL,
                     control = gtRegControl(...), ...) {

  call <- match.call()
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "groupn"), names(mf), 0)
  mf <- mf[c(1, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  mt <- attr(mf, "terms")
  gr <- model.extract(mf, "groupn")
  if (!is.na(pos <- match(deparse(substitute(retest)), names(data))))
    retest <- data[, pos]
  Y <- model.response(mf, "any")
  if (length(dim(Y)) == 1) {
    nm <- rownames(Y)
    dim(Y) <- NULL
    if (!is.null(nm))
      names(Y) <- nm
  }
  X <- if (!is.empty.model(mt))
    model.matrix(mt, mf)
  else matrix(, NROW(Y), 0)
  linkf <- match.arg(linkf)
  if ((method <- match.arg(method)) == "Vansteelandt") {
    if (!is.null(retest))
      warning("Retests cannot be used with Vansteelandt's method.")
    fit <- gtreg.fit(Y, X, gr, sens, spec, linkf, start)
  }
  else {
    if (is.null(retest))
      fit <- EM(Y, X, gr, sens, spec, linkf, start, control)
    else fit <-  EM.ret(Y, X, gr, retest, sens, spec, linkf,
                        sens.ind, spec.ind, start, control)
  }
  fit <- c(fit, list(call = call, formula = formula, method = method,
                     link = linkf, terms = mt))
  class(fit) <- "gt"
  fit

}




##################################################################
# gtreg.halving() function                                       #
##################################################################

gtreg.halving <- function(formula, data, groupn, subg, retest, sens = 1,
                          spec = 1, linkf = c("logit", "probit", "cloglog"),
                          sens.ind = NULL, spec.ind = NULL, start = NULL,
                          control = gtRegControl(...), ...) {

  call <- match.call()
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "groupn"), names(mf), 0)
  mf <- mf[c(1, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  mt <- attr(mf, "terms")
  gr <- model.extract(mf, "groupn")
  if (!is.na(pos <- match(deparse(substitute(retest)), names(data))))
    retest <- data[, pos]
  if (!is.na(pos <- match(deparse(substitute(subg)), names(data))))
    subg <- data[, pos]
  Y <- model.response(mf, "any")
  if (length(dim(Y)) == 1) {
    nm <- rownames(Y)
    dim(Y) <- NULL
    if (!is.null(nm))
      names(Y) <- nm
  }
  X <- if (!is.empty.model(mt))
    model.matrix(mt, mf)
  else matrix(, NROW(Y), 0)
  linkf <- match.arg(linkf)
  fit <-  EM.halving(Y, X, gr, subg, retest, sens, spec, linkf,
                     sens.ind, spec.ind, start, control)
  fit <- c(fit, list(call = call, formula = formula, method = "Xie",
                     link = linkf, terms = mt))
  class(fit) <- "gt"
  fit

}




##################################################################
# gtreg.mp() function                                            #
##################################################################
# "mp" refers to matrix pooling, another name for array testing

gtreg.mp <- function(formula, data, coln, rown, arrayn, retest = NULL,
                     sens = 1, spec = 1,
                     linkf = c("logit", "probit", "cloglog"),
                     sens.ind = NULL, spec.ind = NULL, start = NULL,
                     control = gtRegControl(...), ...) {
  call <- match.call()
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "coln", "rown",
               "arrayn"), names(mf), 0)
  mf <- mf[c(1, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  mt <- attr(mf, "terms")
  arrayn <- model.extract(mf, "arrayn")
  rown <- model.extract(mf, "rown")
  coln <- model.extract(mf, "coln")
  if (!is.na(pos <- match(deparse(substitute(retest)), names(data))))
    retest <- data[, pos]
  Y <- model.response(mf, "any")
  if (length(dim(Y)) == 1) {
    nm <- rownames(Y)
    dim(Y) <- NULL
    if (!is.null(nm))
      names(Y) <- nm
  }
  X <- if (!is.empty.model(mt))
    model.matrix(mt, mf)
  else matrix(, NROW(Y), 0)
  linkf <- match.arg(linkf)
  fit <- EM.mp(Y[, 1], Y[, 2], X, coln, rown, arrayn, retest,
               sens, spec, linkf, sens.ind, spec.ind, start, control)
  fit <- c(fit, list(call = call, formula = formula, link = linkf,
                     terms = mt))
  class(fit) <- c("gt.mp", "gt")
  fit
}




# Supporting functions
##################################################################
# gtreg.fit() function                                           #
##################################################################

gtreg.fit <- function(Y, X, groupn, sens, spec, linkf, start = NULL){
  z <- tapply(Y, groupn, tail, n = 1)
  num.g <- max(groupn)
  K <- ncol(X)
  sam <- length(Y)
  if (is.null(start)) {
    if (K == 1) {
      cova.mean <- as.matrix(tapply(X, groupn, mean))
      optim.meth <- "BFGS"
    }
    else {
      temp <- by(X, groupn, colMeans)
      cova.mean <- do.call(rbind, temp)
      optim.meth <- "Nelder-Mead"
    }
    beta.group <- glm.fit(cova.mean, as.vector(z),
                          family = binomial(link = linkf))$coefficients
  }
  else {
    beta.group <- start
    names(beta.group) <- dimnames(X)[[2]]
    optim.meth <- ifelse(K == 1, "BFGS", "Nelder-Mead")
  }
  logL <- function(beta) {
    pijk <- switch(linkf, logit = plogis(X %*% beta),
                   probit = pnorm(X %*% beta),
                   cloglog = 1 - exp(-exp(X %*% beta)))
    prodp <- tapply(1 - pijk, groupn, prod)
    (-1)*sum(z * log(sens + (1 - sens - spec) * prodp) +
               (1 - z) * log(1 - sens - (1 - sens - spec) * prodp))
  }
  mod.fit <- optim(par = beta.group, fn = logL, method = optim.meth,
                   control = list(trace = 0, maxit = 1000), hessian = TRUE)
  if (det(mod.fit$hessian) == 0)
    mod.fit <- optim(par = beta.group, fn = logL, method = "SANN",
                     hessian = TRUE)
  logL0 <- function(beta) {
    inter <- rep(beta, sam)
    pijk <- switch(linkf, logit = plogis(inter), probit = pnorm(inter),
                   cloglog = 1 - exp(-exp(inter)))
    prodp <- tapply(1 - pijk, groupn, prod)
    (-1)*sum(z * log(sens + (1 - sens - spec) * prodp) +
               (1 - z) * log(1 - sens - (1 - sens - spec) * prodp))
  }
  mod.fit0 <- optim(par = binomial()$linkfun(mean(z)), fn = logL0,
                    method = "BFGS", control = list(trace = 0, maxit = 1000))
  nulld <- 2 * mod.fit0$value
  residd <- 2 * mod.fit$value
  xib <- X %*% mod.fit$par
  pijk <- switch(linkf, logit = plogis(xib), probit = pnorm(xib),
                 cloglog = 1 - exp(-exp(xib)))
  prodp <- tapply(1 - pijk, groupn, prod)
  zhat <- sens + (1 - sens - spec) * prodp
  residual <- z - zhat
  aic <- residd + 2 * K
  if (mod.fit$convergence == 0)
    counts <- mod.fit$counts[[1]]
  else warning("Maximum number of iterations exceeded.")
  list(coefficients = mod.fit$par, hessian = mod.fit$hessian,
       fitted.values = zhat, deviance = residd, df.residual = num.g - K,
       null.deviance = nulld, df.null = num.g - 1, aic = aic,
       counts = counts, residuals = residual, z = z)
}




##################################################################
# gtRegControl() function                                        #
##################################################################

#' @title Auxiliary for controlling group testing regression
#'
#' @description Auxiliary function to control fitting parameters
#' of the EM algorithm used internally in \code{\link{gtReg}}
#' for simple pooling (\kbd{type = "sp"}) with \kbd{method = "Xie"}
#' or for array testing (\kbd{type = "array"}).
#'
#' @param tol convergence criterion.
#' @param n.gibbs the Gibbs sample size to be used in each E step
#' of the EM algorithm, for array testing. The default is 1000.
#' @param n.burnin the number of samples in the burn-in period,
#' for array testing. The default is 20.
#' @param maxit maximum number of iterations in the EM algorithm.
#' @param trace a logical value indicating whether the output should
#' be printed for each iteration. The default is \kbd{FALSE}.
#' @param time a logical value indicating whether the length of time
#' for the model fitting should be printed. The default is \kbd{TRUE}.
#'
#' @return A list with components named as the input arguments.
#'
#' @author This function was originally written as the \code{gt.control}
#' function for the binGroup package. Minor modifications have been
#' made for inclusion in the binGroup2 package.
#'
#' @examples
#' # The default settings:
#' gtRegControl()

gtRegControl <- function(tol = 0.0001, n.gibbs = 1000, n.burnin = 20,
                         maxit = 500, trace = FALSE, time = TRUE) {
  if (!is.numeric(tol) || tol <= 0)
    stop("value of 'tol' must be > 0")
  if (round(n.gibbs) != n.gibbs || n.gibbs <= 0)
    stop("value of 'n.gibbs' must be a positive integer")
  if (round(n.burnin) != n.burnin || n.burnin <= 0)
    stop("value of 'n.burnin' must be a positive integer")
  if (!is.numeric(maxit) || maxit <= 0)
    stop("maximum number of iterations must be > 0")
  list(tol = tol, n.gibbs = n.gibbs, n.burnin = n.burnin, maxit = maxit,
       trace = trace, time = time)
}




##################################################################
# EM() function                                                  #
##################################################################
EM <- function(Y, X, groupn, sens, spec, linkf, start = NULL,
               control = gtRegControl()) {
  if (control$time)
    start.time <- proc.time()
  z <- tapply(Y, groupn, tail, n = 1)
  num.g <- max(groupn)
  K <- ncol(X)
  if (is.null(start)) {
    if (K == 1)
      cova.mean <- as.matrix(tapply(X, groupn, mean))
    else {
      temp <- by(X, groupn, colMeans)
      cova.mean <- do.call(rbind, temp)
    }
    beta.old <- lm.fit(cova.mean, z)$coefficients
  }
  else beta.old <- start
  sam <- length(Y)
  vec <- 1:sam
  group.sizes <- tapply(Y, groupn, length)
  diff <- 1
  counts <- 1
  extra.loop <- FALSE
  next.loop <- TRUE
  while (next.loop) {
    xib <- X %*% beta.old
    pijk <- switch(linkf, logit = plogis(xib),
                   probit = pnorm(xib), cloglog = 1 - exp(-exp(xib)))
    prodp <- tapply(1 - pijk, groupn, prod)
    den <- rep((1 - spec) * prodp + sens * (1 - prodp), group.sizes)
    den2 <- rep(spec * prodp + (1 - sens) * (1 - prodp), group.sizes)
    expect <- rep(NA, times = sam)
    for (i in vec) {
      if (Y[i] == 0)
        expect[i] <- (1 - sens) * pijk[i] / den2[i]
      else expect[i] <- sens * pijk[i] / den[i]
    }
    if (!extra.loop) {
      suppress <- function(w)
        if (any(grepl("non-integer #successes in a binomial glm", w)))
          invokeRestart("muffleWarning")
      mod.fit <- withCallingHandlers(glm.fit(X, expect,
                                             family = binomial(link = linkf)),
                                     warning = suppress)
      diff <- max(abs((beta.old - mod.fit$coefficients) / beta.old))
      beta.old <- mod.fit$coefficients
      if (control$trace)
        cat("beta is", beta.old, "\tdiff is", diff, "\n")
      counts <- counts + 1
      if (diff <= control$tol || counts > control$maxit)
        extra.loop <- TRUE
    }
    else next.loop <- FALSE
  }
  erf <- 2 * pijk - 1
  pt1 <- switch(linkf, logit = -exp(xib) / (1 + exp(xib))^2,
                probit = sqrt(2) * xib * exp(-xib^2 / 2) /
                  (sqrt(pi) * (1 - erf)) - 2 * exp(-xib^2) /
                  (pi * (1 - erf)^2), cloglog = -exp(xib))
  pt2 <- switch(linkf, logit = 0,
                probit = (8 * exp(-xib^2 / 2) * erf +
                            2 * xib * sqrt(2 * pi) * erf^2 -
                            2 * xib * sqrt(2 * pi)) * exp(-xib^2 / 2) /
                  ((1 + erf)^2 * pi * (1 - erf)^2),
                cloglog = -(exp(xib - exp(xib)) + exp(2 * xib - exp(xib)) -
                              exp(xib)) / (exp(-exp(xib)) - 1)^2)
  nm <- pt1 + expect * pt2
  sign1 <- as.vector(sign(nm))
  nn <- as.vector(sqrt(abs(nm)))
  x2 <- X * nn
  m <- (t(x2) %*% (sign1 * x2))
  b <- array(NA, c(K, K, sum(group.sizes^2)))
  p <- 1
  for (i in vec) for (j in vec[groupn == groupn[i]]) {
    wii <- ifelse(i == j, expect[i] - expect[i]^2,
                  expect[i] * (pijk[j] - expect[j]))
    coe <- switch(linkf, logit = 1,
                  probit = 8 * exp(-(xib[i]^2 + xib[j]^2) / 2) /
                    ((1 - erf[i]^2) * (1 - erf[j]^2) * pi),
                  cloglog = exp(xib[i] + xib[j]) /
                    ((exp(-exp(xib[i])) - 1) * (exp(-exp(xib[j])) - 1)))
    b[, , p] <- wii * coe * X[i, ] %*% t(X[j, ])
    p <- p + 1
  }
  m1 <- apply(b, c(1, 2), sum)
  H <- -(m + m1)
  zhat <- sens + (1 - sens - spec) * prodp
  residual <- z - zhat
  residd <- -2 * sum(z * log(zhat) + (1 - z) * log(1 - zhat))
  logL0 <- function(beta) {
    inter <- rep(beta, sam)
    pijk <- switch(linkf, logit = plogis(inter), probit = pnorm(inter),
                   cloglog = 1 - exp(-exp(inter)))
    prodp <- tapply(1 - pijk, groupn, prod)
    (-1)*sum(z * log(sens + (1 - sens - spec) * prodp) +
               (1 - z) * log(1 - sens - (1 - sens - spec) * prodp))
  }
  mod.fit0 <- optim(par = binomial()$linkfun(mean(z)), fn = logL0,
                    method = "BFGS", control = list(trace = 0, maxit = 1000))
  nulld <- 2 * mod.fit0$value
  aic <- residd + 2 * K
  if (diff > control$tol && counts > control$maxit)
    warning("EM algorithm did not converge.")
  if (control$time) {
    end.time <- proc.time()
    save.time <- end.time - start.time
    cat("\n Number of minutes running:", round(save.time[3] / 60, 2), "\n \n")
  }
  list(coefficients = beta.old, hessian = H, fitted.values = zhat,
       deviance = residd, df.residual = num.g - K, null.deviance = nulld,
       df.null = num.g - 1, aic = aic, counts = counts - 1,
       residuals = residual, z = z)
}




##################################################################
# EM.ret() function                                              #
##################################################################

EM.ret <- function(Y, X, groupn, ret, sens, spec, linkf,
                   sens.ind, spec.ind,
                   start = NULL, control = gtRegControl()) {
  if (control$time)
    start.time <- proc.time()
  if (is.null(sens.ind))
    sens.ind <- sens
  if (is.null(spec.ind))
    spec.ind <- spec
  z <- tapply(Y, groupn, tail, n = 1)
  num.g <- max(groupn)
  K <- ncol(X)
  if (is.null(start)) {
    if (K == 1)
      cova.mean <- as.matrix(tapply(X, groupn, mean))
    else {
      temp <- by(X, groupn, colMeans)
      cova.mean <- do.call(rbind, temp)
    }
    beta.old <- lm.fit(cova.mean, z)$coefficients
  }
  else beta.old <- start
  sam <- length(Y)
  vec <- 1:sam
  group.sizes <- tapply(Y, groupn, length)
  diff <- 1
  counts <- 1
  extra.loop <- FALSE
  next.loop <- TRUE
  a0 <- ifelse(ret == 1, sens.ind, 1 - sens.ind)
  a1 <- ifelse(ret == 0, spec.ind, 1 - spec.ind)
  while (next.loop) {
    xib <- X %*% beta.old
    pijk <- switch(linkf, logit = plogis(xib),
                   probit = pnorm(xib), cloglog = 1 - exp(-exp(xib)))
    erf <- 2 * pijk - 1
    prodp <- tapply(1 - pijk, groupn, prod)
    den2 <- rep(spec * prodp + (1 - sens) * (1 - prodp),
                group.sizes)
    expect <- rep(NA, times = sam)
    i <- 1
    while (i <= sam) {
      if (Y[i] == 0)
        expect[i] <- (1 - sens) * pijk[i] / den2[i]
      else {
        vec1 <- vec[groupn == groupn[i]]
        mb2 <- 1
        for (l in vec1) {
          temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
          mb2 <- mb2 * temp
        }
        null <- 1
        for (l in vec1) {
          temp <- a1[l] * (1 - pijk[l])
          null <- null * temp
        }
        den <- mb2 * sens + null * (1 - sens - spec)
        for (l1 in vec1) {
          temp <- a0[l1] * pijk[l1] + a1[l1] * (1 - pijk[l1])
          num <- mb2 / temp * a0[l1] * pijk[l1] * sens
          expect[l1] <- num / den
        }
        i <- l1
      }
      i <- i + 1
    }
    expect[expect > 1] <- 1
    expect[expect < 0] <- 0
    if (!extra.loop) {
      suppress <- function(w)
        if (any(grepl("non-integer #successes in a binomial glm", w)))
          invokeRestart("muffleWarning")
      mod.fit <- withCallingHandlers(glm.fit(X, expect,
                                             family = binomial(link = linkf)),
                                     warning = suppress)
      diff <- max(abs((beta.old - mod.fit$coefficients) / beta.old))
      beta.old <- mod.fit$coefficients
      if (control$trace)
        cat("beta is", beta.old, "\tdiff is", diff, "\n")
      counts <- counts + 1
      if (diff <= control$tol || counts > control$maxit)
        extra.loop <- TRUE
    }
    else next.loop <- FALSE
  }
  pt1 <- switch(linkf, logit = -exp(xib) / (1 + exp(xib))^2,
                probit = sqrt(2) * xib * exp(-xib^2 / 2) /
                  (sqrt(pi) * (1 - erf)) - 2 * exp(-xib^2) /
                  (pi * (1 - erf)^2), cloglog = -exp(xib))
  pt2 <- switch(linkf, logit = 0,
                probit = (8 * exp(-xib^2 / 2) * erf +
                            2 * xib * sqrt(2 * pi) * erf^2 -
                            2 * xib * sqrt(2 * pi)) * exp(-xib^2 / 2) /
                  ((1 + erf)^2 * pi * (1 - erf)^2),
                cloglog = -(exp(xib - exp(xib)) + exp(2 * xib - exp(xib)) -
                              exp(xib)) / (exp(-exp(xib)) - 1)^2)
  nm <- pt1 + expect * pt2
  sign1 <- as.vector(sign(nm))
  nn <- as.vector(sqrt(abs(nm)))
  x2 <- X * nn
  m <- (t(x2) %*% (sign1 * x2))
  m1 <- 0
  for (i in vec) {
    vec1 <- vec[groupn == groupn[i]]
    if (Y[i] == 0) {
      for (j in vec1) {
        coe <- switch(linkf, logit = 1,
                      probit = 8 * exp(-(xib[i]^2 + xib[j]^2) / 2) /
                        ((1 - erf[i]^2) * (1 - erf[j]^2) * pi),
                      cloglog = exp(xib[i] + xib[j]) /
                        ((exp(-exp(xib[i])) - 1) * (exp(-exp(xib[j])) - 1)))
        wii <- ifelse(i == j, expect[i] - expect[i]^2,
                      expect[i] * (pijk[j] - expect[j]))
        tim <- wii * coe * X[i, ] %*% t(X[j, ])
        m1 <- m1 + tim
      }
    }
    else {
      for (j in vec1) {
        temp <- a0[j] * pijk[j] + a1[j] * (1 - pijk[j])
        eii <- expect[i] / temp * a0[j] * pijk[j]
        wii <- ifelse(i == j, expect[i] - expect[i]^2,
                      eii - expect[i] * expect[j])
        coe <- switch(linkf, logit = 1,
                      probit = 8 * exp(-(xib[i]^2 + xib[j]^2) / 2) /
                        ((1 - erf[i]^2) * (1 - erf[j]^2) * pi),
                      cloglog = exp(xib[i] + xib[j]) /
                        ((exp(-exp(xib[i])) - 1) * (exp(-exp(xib[j])) - 1)))
        tim <- wii * coe * X[i, ] %*% t(X[j, ])
        m1 <- m1 + tim
      }
    }
  }
  H <- -(m + m1)
  zhat <- sens + (1 - sens - spec) * prodp
  residual <- z - zhat
  logl <- 0
  for (grn in 1:num.g) {
    if (z[grn] == 1) {
      vec1 <- vec[groupn == grn]
      mb2 <- 1
      for (l in vec1) {
        temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
        mb2 <- mb2 * temp
      }
      null <- 1
      for (l in vec1) {
        temp <- a1[l] * (1 - pijk[l])
        null <- null * temp
      }
      prob1 <- mb2 * sens + null * (1 - sens - spec)
    } else prob1 <- 1 - zhat[grn]
    logl <- logl - log(prob1)
  }
  aic <- 2 * logl + 2 * K
  if (diff > control$tol && counts > control$maxit)
    warning("EM algorithm did not converge.")
  if (control$time) {
    end.time <- proc.time()
    save.time <- end.time - start.time
    cat("\n Number of minutes running:", round(save.time[3] / 60, 2), "\n \n")
  }
  list(coefficients = beta.old, hessian = H, fitted.values = zhat,
       deviance = 2 * logl, aic = aic, counts = counts - 1,
       residuals = residual, z = z)
}




##################################################################
# EM.mp() function                                               #
##################################################################

EM.mp <- function(col.resp, row.resp, X, coln, rown, sqn, ret, sens,
                  spec, linkf, sens.ind, spec.ind, start = NULL,
                  control = gtRegControl()) {
  if (control$time)
    start.time <- proc.time()
  if (is.null(sens.ind))
    sens.ind <- sens
  if (is.null(spec.ind))
    spec.ind <- spec
  len <- max(sqn)
  diff <- 1
  counts <- 1
  sam <- length(sqn)
  col.groupn <- coln[sqn == 1]
  if (len > 1) {
    for (i in 2:len) {
      temp <- max(col.groupn) + coln[sqn == i]
      col.groupn <- c(col.groupn, temp)
    }
  }
  if (is.null(start)) {
    mod.fit <- try(gtreg.fit(col.resp, X, col.groupn,
                             sens, spec, linkf))
    if (inherits(mod.fit, "try-error")) {
      row.groupn <- rown[sqn == 1]
      if (len > 1) {
        for (i in 2:len) {
          temp <- max(row.groupn) + rown[sqn == i]
          row.groupn <- c(row.groupn, temp)
        }
      }
      mod.fit <- gtreg.fit(row.resp, X, row.groupn,
                           sens, spec, linkf)
    }
    beta.old <- mod.fit$coefficients
  }
  else beta.old <- start
  extra.loop <- FALSE
  next.loop <- TRUE
  while (next.loop) {
    xib <- X %*% beta.old
    pijk.all <- switch(linkf, logit = plogis(xib),
                       probit = pnorm(xib), cloglog = 1 - exp(-exp(xib)))
    expect.all <- numeric(0)
    mat2 <- index <- 0
    erf <- 2 * pijk.all - 1
    for (arrayn in 1:len) {
      index.r <- index.c <- vector("logical", length = sam)
      for (i in 1:sam) {
        if (rown[i] == 1 && sqn[i] == arrayn)
          index.c[i] <- TRUE
        else index.c[i] <- FALSE
        if (coln[i] == 1 && sqn[i] == arrayn)
          index.r[i] <- TRUE
        else index.r[i] <- FALSE
      }
      n.row <- max(rown[index.r])
      n.col <- max(coln[index.c])
      rowresp <- row.resp[index.r]
      colresp <- col.resp[index.c]
      index <- max(index) + 1:(n.row * n.col)
      if (!is.null(ret)) {
        re.ind <- na.omit(cbind(coln[sqn == arrayn],
                                rown[sqn == arrayn], ret[sqn == arrayn]))
        re <- ifelse(re.ind[, 3] == 1, sens.ind, 1 - sens.ind)
        re1 <- ifelse(re.ind[, 3] == 0, spec.ind, 1 - spec.ind)
      }
      pijk <- matrix(pijk.all[sqn == arrayn], nrow = n.row)
      a <- ifelse(rowresp == 1, sens, 1 - sens)
      b <- ifelse(colresp == 1, sens, 1 - sens)
      a1 <- ifelse(rowresp == 0, spec, 1 - spec)
      b1 <- ifelse(colresp == 0, spec, 1 - spec)
      mat <- array(NA, c(n.row, n.col, control$n.gibbs))
      y <- matrix(0, nrow = n.row, ncol = n.col)
      for (k in 1:(control$n.gibbs + control$n.burnin)) {
        l <- 1
        for (j in 1:n.col) for (i in 1:n.row) {
          num <- a[i] * b[j] * pijk[i, j]
          den.r <- ifelse(sum(y[i, ]) - y[i, j] > 0,
                          a[i], a1[i])
          den.c <- ifelse(sum(y[, j]) - y[i, j] > 0,
                          b[j], b1[j])
          den2 <- den.r * den.c * (1 - pijk[i, j])
          if (!is.null(ret)) {
            if (l <= length(re) && j == re.ind[l, 1] &&
                i == re.ind[l, 2]) {
              num <- num * re[l]
              den2 <- den2 * re1[l]
              l <- l + 1
            }
          }
          den <- num + den2
          if (den != 0) {
            cond.p <- num/den
            y[i, j] <- rbinom(1, 1, cond.p)
          }
          else y[i, j] <- 0
        }
        if (k > control$n.burnin) {
          mat[, , k - control$n.burnin] <- y
          vec <- as.vector(y)
          if (extra.loop)
            for (i1 in index[vec == 1]) for (j1 in index[vec == 1]) {
              bq <- switch(linkf, logit = 1,
                           probit = 8 * exp(-(xib[i1]^2 + xib[j1]^2) / 2) /
                             ((1 - erf[i1]^2) * (1 - erf[j1]^2) * pi),
                           cloglog = exp(xib[i1] + xib[j1]) /
                             ((exp(-exp(xib[i1])) - 1) *
                                (exp(-exp(xib[j1])) - 1))) *
                X[i1, ] %*% t(X[j1, ])
              mat2 <- mat2 + bq
            }
        }
      }
      expect.m <- apply(mat, c(1, 2), mean)
      expect <- as.vector(expect.m)
      expect.all <- c(expect.all, expect)
    }
    if (!extra.loop) {
      suppress <- function(w)
        if (any(grepl("non-integer #successes in a binomial glm", w)))
          invokeRestart("muffleWarning")
      mod.fit <- withCallingHandlers(glm.fit(X, expect.all,
                                             family = binomial(link = linkf)),
                                     warning = suppress)
      diff <- max(abs((beta.old - mod.fit$coefficients) / beta.old))
      beta.old <- mod.fit$coefficients
      if (control$trace)
        cat("beta is", beta.old, "\tdiff is", diff, "\n")
      counts <- counts + 1
      if (diff <= control$tol || counts > control$maxit)
        extra.loop <- TRUE
    }
    else next.loop <- FALSE
  }
  index <- 0
  first <- mat2 / control$n.gibbs
  second <- 0
  for (arrayn in 1:len) {
    n.row <- max(rown[sqn == arrayn])
    n.col <- max(coln[sqn == arrayn])
    index <- max(index) + 1:(n.row * n.col)
    expect <- expect.all[index]
    for (i1 in index) for (j1 in index) {
      coe <- switch(linkf, logit = 1,
                    probit = 8 * exp(-(xib[i1]^2 + xib[j1]^2) / 2) /
                      ((1 - erf[i1]^2) * (1 - erf[j1]^2) * pi),
                    cloglog = exp(xib[i1] + xib[j1]) /
                      ((exp(-exp(xib[i1])) - 1) * (exp(-exp(xib[j1])) - 1)))
      tim <- expect.all[i1] * expect.all[j1] * coe * X[i1,] %*% t(X[j1, ])
      second <- second + tim
    }
  }
  m1 <- first - second
  pt1 <- switch(linkf, logit = -exp(xib) / (1 + exp(xib))^2,
                probit = sqrt(2) * xib * exp(-xib^2 / 2) /
                  (sqrt(pi) * (1 - erf)) - 2 * exp(-xib^2) /
                  (pi * (1 - erf)^2), cloglog = -exp(xib))
  pt2 <- switch(linkf, logit = 0,
                probit = (8 * exp(-xib^2 / 2) * erf +
                            2 * xib * sqrt(2 * pi) * erf^2 -
                            2 * xib * sqrt(2 * pi)) * exp(-xib^2 / 2) /
                  ((1 + erf)^2 * pi * (1 - erf)^2),
                cloglog = -(exp(xib - exp(xib)) +
                              exp(2 * xib - exp(xib)) - exp(xib)) /
                  (exp(-exp(xib)) - 1)^2)
  nm <- pt1 + expect.all * pt2
  sign1 <- as.vector(sign(nm))
  nn <- as.vector(sqrt(abs(nm)))
  x2 <- X * nn
  m <- (t(x2) %*% (sign1 * x2))
  H <- -(m + m1)
  if (diff > control$tol && counts > control$maxit)
    warning("EM algorithm did not converge.")
  if (control$time) {
    end.time <- proc.time()
    save.time <- end.time - start.time
    cat("\n Number of minutes running:", round(save.time[3] / 60, 2), "\n \n")
  }
  list(coefficients = beta.old, hessian = H,
       Gibbs.sample.size = control$n.gibbs, counts = counts - 1)
}



##################################################################
# EM.halving() function                                          #
##################################################################

EM.halving <- function(Y, X, groupn, subg, ret, sens, spec, linkf,
                       sens.ind, spec.ind,
                       start = NULL, control = gtRegControl()) {
  if (control$time)
    start.time <- proc.time()
  if (is.null(sens.ind))
    sens.ind <- sens
  if (is.null(spec.ind))
    spec.ind <- spec
  z <- tapply(Y, groupn, tail, n = 1)
  num.g <- max(groupn)
  K <- ncol(X)
  if (is.null(start)) {
    if (K == 1)
      cova.mean <- as.matrix(tapply(X, groupn, mean))
    else {
      temp <- by(X, groupn, colMeans)
      cova.mean <- do.call(rbind, temp)
    }
    beta.old <- lm.fit(cova.mean, z)$coefficients
  } else beta.old <- start
  sam <- length(Y)
  vec <- 1:sam
  group.sizes <- tapply(Y, groupn, length)
  diff <- 1
  counts <- 1
  extra.loop <- FALSE
  next.loop <- TRUE
  a0 <- ifelse(ret == 1, sens.ind, 1 - sens.ind)
  a1 <- ifelse(ret == 0, spec.ind, 1 - spec.ind)
  while (next.loop) {
    xib <- X %*% beta.old
    pijk <- switch(linkf, logit = plogis(xib),
                   probit = pnorm(xib),
                   cloglog = 1 - exp(-exp(xib)))
    erf <- 2 * pijk - 1
    prodp <- tapply(1 - pijk, groupn, prod)
    den2 <- rep(spec * prodp + (1 - sens) * (1 - prodp),
                group.sizes)
    expect <- rep(NA, times = sam)
    i <- 1
    while (i <= sam) {
      if (Y[i] == 0)
        expect[i] <- (1 - sens) * pijk[i] / den2[i]
      else {
        if (subg[i] == 0) {
          vec1 <- vec[groupn == groupn[i]]
          gs <- length(vec1)
          sub1 <- vec1[1:ceiling(gs / 2)]
          sub2 <- vec1[(ceiling(gs / 2) + 1):gs]
          if (subg[vec1[gs]] == 0) {
            den <- (1 - spec) * spec^2 * prod(1 - pijk[sub1]) *
              prod(1 - pijk[sub2]) +
              spec * (1 - sens) * sens * prod(1 - pijk[sub1]) *
              (1 - prod(1 - pijk[sub2])) +
              spec * (1 - sens) * sens * prod(1 - pijk[sub2]) *
              (1 - prod(1 - pijk[sub1])) +
              (1 - sens)^2 * sens * (1 - prod(1 - pijk[sub1])) *
              (1 - prod(1 - pijk[sub2]))
            ab1 <- (1 - sens) * sens *
              (spec * prod(1 - pijk[sub2]) +
                 (1 - sens) * (1 - prod(1 - pijk[sub2])))
            ab2 <- (1 - sens) * sens *
              (spec * prod(1 - pijk[sub1]) +
                 (1 - sens) * (1 - prod(1 - pijk[sub1])))
            for (l1 in sub1) {
              expect[l1] <- ab1 * pijk[l1] / den
            }
            for (l1 in sub2) {
              expect[l1] <- ab2 * pijk[l1] / den
            }
          }
          if (subg[vec1[gs]] == 1) {
            mb2 <- 1
            for (l in sub2) {
              temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
              mb2 <- mb2 * temp
            }
            null <- 1
            for (l in sub2) {
              temp <- a1[l] * (1 - pijk[l])
              null <- null * temp
            }
            den <- (1 - spec)^2 * spec * null * prod(1 - pijk[sub1]) +
              (1 - spec) * (1 - sens) * sens * null *
              (1 - prod(1 - pijk[sub1])) +
              spec * sens^2 * (mb2 - null) * prod(1 - pijk[sub1]) +
              (1 - sens) * sens^2 * (mb2 - null) * (1 - prod(1 - pijk[sub1]))
            ab1 <- (1 - sens) * sens * (mb2 * sens + null * (1 - sens - spec))
            for (l1 in sub1) {
              expect[l1] <- ab1 * pijk[l1] / den
            }
            for (l1 in sub2) {
              temp <- a0[l1] * pijk[l1] + a1[l1] * (1 - pijk[l1])
              num <- mb2 / temp * a0[l1] * pijk[l1] * sens^2 *
                (spec * prod(1 - pijk[sub1]) +
                   (1 - sens) * (1 - prod(1 - pijk[sub1])))
              expect[l1] <- num / den
            }
          }
          i <- l1
        } else {
          vec1 <- vec[groupn == groupn[i]]
          gs <- length(vec1)
          sub1 <- vec1[1:ceiling(gs / 2)]
          sub2 <- vec1[(ceiling(gs / 2) + 1):gs]
          if (subg[vec1[gs]] == 0) {
            mb2 <- 1
            for (l in sub1) {
              temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
              mb2 <- mb2 * temp
            }
            null <- 1
            for (l in sub1) {
              temp <- a1[l] * (1 - pijk[l])
              null <- null * temp
            }
            den <- (1 - spec)^2 * spec * null * prod(1 - pijk[sub2]) +
              (1 - spec) * (1 - sens) * sens * null *
              (1 - prod(1 - pijk[sub2])) +
              spec * sens^2 * (mb2 - null) * prod(1 - pijk[sub2]) +
              (1 - sens) * sens^2 * (mb2 - null) * (1 - prod(1 - pijk[sub2]))
            ab1 <- (1 - sens) * sens * (mb2 * sens + null * (1 - sens - spec))
            for (l1 in sub1) {
              temp <- a0[l1] * pijk[l1] + a1[l1] * (1 - pijk[l1])
              num <- mb2 / temp * a0[l1] * pijk[l1] * sens^2 *
                (spec * prod(1 - pijk[sub2]) +
                   (1 - sens) * (1 - prod(1 - pijk[sub2])))
              expect[l1] <- num / den

            }
            for (l1 in sub2) {
              expect[l1] <- ab1 * pijk[l1] / den
            }
          }
          if (subg[vec1[gs]] == 1) {
            mb2 <- 1
            for (l in sub1) {
              temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
              mb2 <- mb2 * temp
            }
            null <- 1
            for (l in sub1) {
              temp <- a1[l] * (1 - pijk[l])
              null <- null * temp
            }
            mb2a <- 1
            for (l in sub2) {
              temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
              mb2a <- mb2a * temp
            }
            nulla <- 1
            for (l in sub2) {
              temp <- a1[l] * (1 - pijk[l])
              nulla <- nulla * temp
            }
            den <- (1 - spec)^3 * null * nulla +
              (1 - spec) * sens^2 * null * (mb2a - nulla) +
              (1 - spec) * sens^2 * (mb2 - null) * nulla +
              sens^3 * (mb2 - null) * (mb2a - nulla)
            for (l1 in sub1) {
              temp <- a0[l1] * pijk[l1] + a1[l1] * (1 - pijk[l1])
              num <- mb2 / temp * a0[l1] * pijk[l1] * sens^2 *
                (mb2a * sens + nulla * (1 - sens - spec))
              expect[l1] <- num / den
            }
            for (l1 in sub2) {
              temp <- a0[l1] * pijk[l1] + a1[l1] * (1 - pijk[l1])
              num <- mb2a / temp * a0[l1] * pijk[l1] * sens^2 *
                (mb2 * sens + null * (1 - sens - spec))
              expect[l1] <- num / den
            }
          }
          i <- l1
        }
      }
      i <- i + 1
    }
    expect[expect > 1] <- 1
    expect[expect < 0] <- 0
    if (!extra.loop) {
      suppress <- function(w)
        if (any(grepl("non-integer #successes in a binomial glm", w)))
          invokeRestart("muffleWarning")
      mod.fit <- withCallingHandlers(glm.fit(X, expect,
                                             family = binomial(link = linkf)),
                                     warning = suppress)
      diff <- max(abs((beta.old - mod.fit$coefficients) / beta.old))
      beta.old <- mod.fit$coefficients
      if (control$trace)
        cat("beta is", beta.old, "\tdiff is", diff, "\n")
      counts <- counts + 1
      if (diff <= control$tol || counts > control$maxit)
        extra.loop <- TRUE
    }
    else next.loop <- FALSE
  }
  pt1 <- switch(linkf, logit = -exp(xib)/(1 + exp(xib))^2,
                probit = sqrt(2) * xib * exp(-xib^2 / 2) /
                  (sqrt(pi) * (1 - erf)) - 2 * exp(-xib^2) /
                  (pi * (1 - erf)^2), cloglog = -exp(xib))
  pt2 <- switch(linkf, logit = 0,
                probit = (8 * exp(-xib^2 / 2) * erf +
                            2 * xib * sqrt(2 * pi) * erf^2 -
                            2 * xib * sqrt(2 * pi)) * exp(-xib^2 / 2) /
                  ((1 + erf)^2 * pi * (1 - erf)^2),
                cloglog = -(exp(xib - exp(xib)) + exp(2 * xib - exp(xib)) -
                              exp(xib))/(exp(-exp(xib)) - 1)^2)
  nm <- pt1 + expect * pt2
  sign1 <- as.vector(sign(nm))
  nn <- as.vector(sqrt(abs(nm)))
  x2 <- X * nn
  m <- (t(x2) %*% (sign1 * x2))
  m1 <- 0
  i <- 1
  while (i <= sam) {
    vec1 <- vec[groupn == groupn[i]]
    gs <- length(vec1)
    if (Y[i] == 0) {
      for (j in vec1) {
        wii <- ifelse(i == j, expect[i] - expect[i]^2,
                      expect[i] * (pijk[j] - expect[j]))
        tim <- wii * X[i, ] %*% t(X[j, ])
        m1 <- m1 + tim
      }
    } else {
      sub1 <- vec1[1:ceiling(gs / 2)]
      sub2 <- vec1[(ceiling(gs / 2) + 1):gs]
      for (i in sub1) {
        for (j in sub1) {
          if (subg[j] == 0) {
            eii <- expect[i] * pijk[j]
          } else {
            temp <- a0[j] * pijk[j] + a1[j] * (1 - pijk[j])
            eii <- expect[i] / temp * a0[j] * pijk[j]
          }
          wii <- ifelse(i == j, expect[i] - expect[i]^2,
                        eii - expect[i] * expect[j])
          tim <- wii * X[i, ] %*% t(X[j, ])
          m1 <- m1 + tim
        }
        for (j in sub2) {
          if (subg[j] == 0) {
            temp <- spec * prod(1 - pijk[sub2]) +
              (1 - sens) * (1 - prod(1 - pijk[sub2]))
            eii <- expect[i] * (1 - sens) * pijk[j] / temp
          } else {
            mb2a <- 1
            for (l in sub2) {
              temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
              mb2a <- mb2a * temp
            }
            nulla <- 1
            for (l in sub2) {
              temp <- a1[l] * (1 - pijk[l])
              nulla <- nulla * temp
            }
            temp <- a0[j] * pijk[j] + a1[j] * (1 - pijk[j])
            tempa <- mb2a * sens + nulla * (1 - sens - spec)
            eii <- expect[i] / tempa * sens * a0[j] * pijk[j] * mb2a / temp
          }
          wii <- ifelse(i == j, expect[i] - expect[i]^2,
                        eii - expect[i] * expect[j])
          tim <- wii * X[i, ] %*% t(X[j, ])
          m1 <- m1 + tim
        }
      }
      for (i in sub2) {
        for (j in sub1) {
          if (subg[j] == 0) {
            temp <- spec * prod(1 - pijk[sub1]) +
              (1 - sens) * (1 - prod(1 - pijk[sub1]))
            eii <- expect[i] * (1 - sens) * pijk[j] / temp
          } else {
            mb2 <- 1
            for (l in sub1) {
              temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
              mb2 <- mb2 * temp
            }
            null <- 1
            for (l in sub1) {
              temp <- a1[l] * (1 - pijk[l])
              null <- null * temp
            }
            temp <- a0[j] * pijk[j] + a1[j] * (1 - pijk[j])
            tempa <- mb2 * sens + null * (1 - sens - spec)
            eii <- expect[i] / tempa * sens * a0[j] * pijk[j] * mb2 / temp
          }
          wii <- ifelse(i == j, expect[i] - expect[i]^2,
                        eii - expect[i] * expect[j])
          tim <- wii * X[i, ] %*% t(X[j, ])
          m1 <- m1 + tim
        }
        for (j in sub2) {
          if (subg[j] == 0) {
            eii <- expect[i] * pijk[j]
          } else {
            temp <- a0[j] * pijk[j] + a1[j] * (1 - pijk[j])
            eii <- expect[i] / temp * a0[j] * pijk[j]
          }
          wii <- ifelse(i == j, expect[i] - expect[i]^2,
                        eii - expect[i] * expect[j])
          tim <- wii * X[i, ] %*% t(X[j, ])
          m1 <- m1 + tim
        }
      }
    }
    i <- i + 1
  }
  H <- -(m + m1)
  zhat <- sens + (1 - sens - spec) * prodp
  residual <- z - zhat
  logl <- 0
  for (grn in 1:num.g) {
    if (z[grn] == 1) {
      vec1 <- vec[groupn == grn]
      gs <- length(vec1)
      sub1 <- vec1[1:ceiling(gs / 2)]
      sub2 <- vec1[(ceiling(gs / 2) + 1):gs]
      if (subg[vec1[1]] == 0) {
        if (subg[vec1[gs]] == 0) {
          prob1 <- (1 - spec) * spec^2 * prod(1 - pijk[sub1]) *
            prod(1 - pijk[sub2]) +
            spec * (1 - sens) * sens * prod(1 - pijk[sub1]) *
            (1 - prod(1 - pijk[sub2])) +
            spec * (1 - sens) * sens * prod(1 - pijk[sub2]) *
            (1 - prod(1 - pijk[sub1])) +
            (1 - sens)^2 * sens * (1 - prod(1 - pijk[sub1])) *
            (1 - prod(1 - pijk[sub2]))
        }
        if (subg[vec1[gs]] == 1) {
          mb2 <- 1
          for (l in sub2) {
            temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
            mb2 <- mb2 * temp
          }
          null <- 1
          for (l in sub2) {
            temp <- a1[l] * (1 - pijk[l])
            null <- null * temp
          }
          prob1 <- (1 - spec)^2 * spec * null * prod(1 - pijk[sub1]) +
            (1 - spec) * (1 - sens) * sens * null *
            (1 - prod(1 - pijk[sub1])) +
            spec * sens^2 * (mb2 - null) * prod(1 - pijk[sub1]) +
            (1 - sens) * sens^2 * (mb2 - null) * (1 - prod(1 - pijk[sub1]))
        }
      } else {
        if (subg[vec1[gs]] == 0) {
          mb2 <- 1
          for (l in sub1) {
            temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
            mb2 <- mb2 * temp
          }
          null <- 1
          for (l in sub1) {
            temp <- a1[l] * (1 - pijk[l])
            null <- null * temp
          }
          prob1 <- (1 - spec)^2 * spec * null * prod(1 - pijk[sub2]) +
            (1 - spec) * (1 - sens) * sens * null *
            (1 - prod(1 - pijk[sub2])) +
            spec * sens^2 * (mb2 - null) * prod(1 - pijk[sub2]) +
            (1 - sens) * sens^2 * (mb2 - null) * (1 - prod(1 - pijk[sub2]))
        }
        if (subg[vec1[gs]] == 1) {
          mb2 <- 1
          for (l in sub1) {
            temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
            mb2 <- mb2 * temp
          }
          null <- 1
          for (l in sub1) {
            temp <- a1[l] * (1 - pijk[l])
            null <- null * temp
          }
          mb2a <- 1
          for (l in sub2) {
            temp <- a0[l] * pijk[l] + a1[l] * (1 - pijk[l])
            mb2a <- mb2a * temp
          }
          nulla <- 1
          for (l in sub2) {
            temp <- a1[l] * (1 - pijk[l])
            nulla <- nulla * temp
          }
          prob1 <- (1 - spec)^3 * null * nulla +
            (1 - spec) * sens^2 * null * (mb2a - nulla) +
            (1 - spec) * sens^2 * (mb2 - null) * nulla +
            sens^3 * (mb2 - null) * (mb2a - nulla)
        }
      }
    } else prob1 <- 1 - zhat[grn]
    logl <- logl - log(prob1)
  }
  aic <- 2 * logl + 2 * K
  if (diff > control$tol && counts > control$maxit)
    warning("EM algorithm did not converge.")
  if (control$time) {
    end.time <- proc.time()
    save.time <- end.time - start.time
    cat("\n Number of minutes running:", round(save.time[3] / 60, 2), "\n \n")
  }
  list(coefficients = beta.old, hessian = H, fitted.values = zhat,
       deviance = 2 * logl, aic = aic,
       counts = counts - 1, residuals = residual, z = z)
}




##################################################################
# gtReg() function                                               #
##################################################################

#' @title Fitting group testing regression models
#'
#' @description Fits the group testing regression model specified
#' through a symbolic description of the linear predictor and
#' descriptions of the group testing setting. This function allows
#' for fitting regression models with simple pooling, halving, or array
#' testing data.
#'
#' @param type \kbd{"sp"} for simple pooling (Dorfman testing with
#' or without retests), \kbd{"halving"} for halving
#' protocol, or \kbd{"array"} for array testing. See 'Details' for
#' descriptions of the group testing algorithms.
#' @param formula an object of class "formula" (or one that
#' can be coerced to that class); a symbolic description of
#' the model to be fitted. The details of model specification
#' are under 'Details'.
#' @param data an optional data frame, list, or environment
#' (or object coercible by \kbd{as.data.frame} to a data frame)
#' containing the variables in the model. If not found in data,
#' the variables are taken from \kbd{environment(formula)},
#' typically the environment from which \code{gtReg} is called.
#' @param groupn a vector, list, or data frame of the group
#' numbers that designates individuals to groups (for use with
#' simple pooling, \kbd{type = "sp"}, or the halving protocol,
#' \kbd{type = "halving"}).
#' @param subg a vector, list, or data frame of the group numbers
#' that designates individuals to subgroups (for use with the
#' halving protocol, \kbd{type = "halving"}).
#' @param coln a vector, list, or data frame that specifies the
#' column group number for each sample (for use with array
#' testing, \kbd{type = "array"}).
#' @param rown a vector, list, or data frame that specifies the
#' row group number for each sample (for use with array testing,
#' \kbd{type = "array"}).
#' @param arrayn a vector, list, or data frame that specifies the
#' array number for each sample (for use with array testing,
#' \kbd{type = "array"}).
#' @param retest a vector, list, or data frame of individual
#' retest results. Default value is \kbd{NULL} for no retests.
#' See 'Details' for details on how to specify \kbd{retest}.
#' @param sens sensitivity of the test. Default value is set
#' to 1.
#' @param spec specificity of the test. Default value is set
#' to 1.
#' @param linkf a character string specifying one of the three
#' link functions for a binomial model: \kbd{"logit"} (default),
#' \kbd{"probit"}, or \kbd{"cloglog"}.
#' @param method the method to fit the regression model.
#' Options include \kbd{"Vansteelandt"} (default) or \kbd{"Xie"}.
#' The \kbd{"Vansteelandt"} option finds estimates by directly
#' maximizing the likelihood function based on the group responses,
#' while the \kbd{"Xie"} option uses the EM algorithm to
#' maximize the likelihood function in terms of the unobserved
#' individual responses.
#' @param sens.ind sensitivity of the individual retests. If NULL,
#' set to be equal to \kbd{sens}.
#' @param spec.ind specificity of the individual retests. If NULL,
#' set to be equal to \kbd{spec}.
#' @param start starting values for the parameters in the linear
#' predictor.
#' @param control a list of parameters for controlling the fitting
#' process in method \kbd{"Xie"}. These parameters will be passed
#' to the \code{\link{gtRegControl}} function for use.
#' @param ... arguments to be passed to \code{\link{gtRegControl}} by
#' default. See argument \kbd{control}.
#'
#' @details With simple pooling and halving, a typical predictor
#' has the form \kbd{groupresp ~ covariates} where \kbd{groupresp}
#' is the (numeric) group response vector. With array testing,
#' individual samples are placed in a matrix-like grid where
#' samples are pooled within each row and within each column.
#' This leads to two kinds of group responses: row and column
#' group responses. Thus, a typical predictor has the form
#' \kbd{cbind(col.resp, row.resp) ~ covariates}, where
#' \kbd{col.resp} is the (numeric) column group response vector
#' and \kbd{row.resp} is the (numeric) row group response vector.
#' For all methods, \kbd{covariates} is a series of terms which
#' specifies a linear predictor for individual responses.
#' Note that it is actually the unobserved individual responses,
#' not the observed group responses, which are modeled by the
#' covariates. When denoting group responses (\kbd{groupresp},
#' \kbd{col.resp}, and \kbd{row.resp}), a 0 denotes a negative
#' response and a 1 denotes a positive response, where the
#' probability of an individual positive response is being
#' modeled directly.
#'
#' A terms specification of the form
#' \kbd{first + second} indicates all the terms in \kbd{first}
#' together with all the terms in \kbd{second} with duplicates
#' removed. A specification of the form \kbd{first:second}
#' indicates the set of terms obtained by taking the interactions
#' of all terms in \kbd{first} with all terms in \kbd{second}.
#' The specification \kbd{first*second} indicates the cross of
#' \kbd{first} and \kbd{second}. This is the same as \kbd{first +
#' second + first:second}. The terms in the formula will be
#' re-ordered so that main effects come first, followed by the
#' interactions, all second-order, all third-order, and so on;
#' to avoid this, pass a terms object as the formula.
#'
#' For simple pooling (\kbd{type = "sp"}), the functions \kbd{gtreg.fit},
#' \kbd{EM}, and \kbd{EM.ret}, where the first corresponds to Vansteelandt's
#' method described in Vansteelandt et al. (2000) and the last two correspond
#' to Xie's method described in Xie (2001), are called to carry out the
#' model fitting. The \kbd{gtreg.fit} function uses the \kbd{optim}
#' function with default method \kbd{"Nelder-Mead"} to maximize
#' the likelihood function of the observed group responses.
#' If this optimization method produces a Hessian matrix of all
#' zero elements, the \kbd{"SANN"} method in \kbd{optim} is
#' employed to find the coefficients and Hessian matrix. For
#' the \kbd{"SANN"} method, the number of iterations in \kbd{optim}
#' is set to be 10000. For the background on the use of \kbd{optim},
#' see \kbd{help(optim)}.
#'
#' The \kbd{EM} and \kbd{EM.ret} functions apply Xie's EM
#' algorithm to the likelihood function written in terms of the
#' unobserved individual responses; the functions use \kbd{glm.fit}
#' to update the parameter estimates within each M step. The
#' \kbd{EM} function is used when there are no retests and
#' \kbd{EM.ret} is used when individual retests are available.
#' Thus, within the \kbd{retest} argument, individual observations
#' in observed positive groups are 0 (negative) or 1 (positive);
#' the remaining individual observations are \kbd{NA}s, meaning
#' that no retest is performed for them. Retests cannot be used
#' with Vansteelandt's method; a warning message will be given
#' in this case, and the individual retests will be ignored in
#' the model fitting. There could be slight differences in the
#' estimates between Vansteelandt's and Xie's methods (when
#' retests are not available) due to different convergence criteria.
#'
#' With simple pooling (i.e., Dorfman testing, two-stage hierarchical
#' testing), each individual appears in exactly one pool. When only the
#' group responses are observed, the null degrees of freedom are the number
#' of groups minus 1 and the residual degrees of freedom are the number of
#' groups minus the number of parameters. When individual retests are
#' observed too, it is an open research question for what the degrees of
#' freedom and the deviance for the null model should be; therefore, the
#' degrees of freedom and \kbd{null.deviance} will not be displayed.
#'
#' Under the halving protocol, the \kbd{EM.halving} function
#' applies Xie's EM algorithm to the
#' likelihood function written in terms of the unobserved
#' individual responses; the functions use \kbd{glm.fit} to update
#' the parameter estimates within each M step. In the halving
#' protocol, if the initial group tests positive, it is split
#' into two subgroups. The two subgroups are subsequently tested
#' and if either subgroup tests positive, the third and final
#' step is to test all individuals within the subgroup. Thus,
#' within \kbd{subg}, subgroup responses in observed positive
#' groups are 0 (negative) or 1 (positive); the remaining
#' subgroup responses are \kbd{NA}s, meaning that no tests are
#' performed for them. The individual retests are similarly coded.
#'
#' With array testing (also known as matrix pooling), the
#' \kbd{EM.mp} function applies Xie's
#' EM algorithm to the likelihood function written in terms of the
#' unobserved individual responses. In each E step, the Gibbs
#' sampling technique is used to estimate the conditional
#' probabilities. Because of the large number of Gibbs samples
#' needed to achieve convergence, the model fitting process could
#' be quite slow, especially when multiple positive rows and
#' columns are observed. In this case, we can either increase the
#' Gibbs sample size to help achieve convergence or loosen the
#' convergence criteria by increasing \kbd{tol} at the expense
#' of perhaps poorer estimates. If follow-up retests are performed,
#' the retest results going into the model will help achieve
#' convergence faster with the same Gibbs sample size and
#' convergence criteria. In each M step, we use \kbd{glm.fit} to
#' update the parameter estimates.
#'
#' For simple pooling, \kbd{retest} provides individual retest
#' results for Dorfman's retesting procedure. Under the halving
#' protocol, \kbd{retest} provides individual retest results
#' within a subgroup that tests positive. The \kbd{retest}
#' argument provides individual retest results, where a 0
#' denotes negative and 1 denotes positive status. An \kbd{NA}
#' denotes that no retest is performed for that individual.
#' The default value is \kbd{NULL} for no retests.
#'
#' For simple pooling, \kbd{control} provides parameters for
#' controlling the fitting process in the \kbd{"Xie"} method only.
#'
#' \kbd{gtReg} returns an object of class \kbd{"gtReg"}.
#' The function \kbd{summary} (i.e., \code{\link{summary.gtReg}}
#' is used to obtain or print a summary of the results.
#' The group testing function \kbd{predict} (i.e.,
#' \code{\link{predict.gtReg}}) is used to make predictions
#' on \kbd{"gtReg"} objects.
#'
#' @return An object of class \kbd{"gtReg"}, a list which may include:
#' \item{coefficients}{a named vector of coefficients.}
#' \item{hessian}{estimated Hessian matrix of the negative
#' log-likelihood function. This serves as an estimate of the
#' information matrix.}
#' \item{residuals}{the response residuals. This is the difference
#' of the observed group responses and the fitted group
#' responses. Not included for array testing.}
#' \item{fitted.values}{the fitted mean values of group responses.
#' Not included for array testing.}
#' \item{deviance}{the deviance between the fitted model and the
#' saturated model. Not included for array testing.}
#' \item{aic}{Akaike's Information Criterion. This is minus twice
#' the maximized log-likelihood plus twice the number of
#' coefficients. Not included for array testing.}
#' \item{null.deviance}{the deviance for the null model,
#' comparable with \kbd{deviance}. The null model will
#' include only the intercept, if there is one in the model.
#' Provided for simple pooling, \kbd{type = "sp"}, only.}
#' \item{counts}{the number of iterations in \kbd{optim}
#' (Vansteelandt's method) or the number of iterations in the
#' EM algorithm (Xie's method, halving, and array testing).}
#' \item{Gibbs.sample.size}{the number of Gibbs samples
#' generated in each E step. Provided for array testing,
#' \kbd{type = "array"}, only.}
#' \item{df.residual}{the residual degrees of freedom.
#' Provided for simple pooling, \kbd{type = "sp"}, only.}
#' \item{df.null}{the residual degrees of freedom for the null model.
#' Provided for simple pooling, \kbd{type = "sp"}, only.}
#' \item{z}{the vector of group responses. Not included for array testing.}
#' \item{call}{the matched call.}
#' \item{formula}{the formula supplied.}
#' \item{terms}{the terms object used.}
#' \item{method}{the method (\kbd{"Vansteelandt"} or \kbd{"Xie"})
#' used to fit the model. For the halving protocol, the
#' \kbd{"Xie"} method is used. Not included for array testing.}
#' \item{link}{the link function used in the model.}
#'
#' @author The majority of this function was originally written as
#' \kbd{gtreg.sp}, \kbd{gtreg.halving}, and \kbd{gtreg.mp} by Boan Zhang
#' for the \code{binGroup} package. Minor modifications have been made for
#' inclusion of the functions in the \code{binGroup2} package.
#'
#' @references
#' \insertRef{Vansteelandt2000}{binGroup2}
#'
#' \insertRef{Xie2001}{binGroup2}
#'
#' @seealso \code{\link{gtSim}} for simulation of data in the
#' group testing form to be used by \kbd{gtReg},
#' \code{\link{summary.gtReg}} and \code{\link{predict.gtReg}}
#' for \kbd{gtreg} methods.
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(type = "sp", formula  =  groupres ~ AGE + EDUC.,
#'               data  =  hivsurv, groupn  =  gnum, sens  =  0.9,
#'               spec  =  0.9, method  =  "Xie")
#' fit1
#'
#' set.seed(46)
#' gt.data <- gtSim(type = "sp", par = c(-12, 0.2),
#'                  size1 = 700, size2 = 5)
#' fit2 <- gtReg(type = "sp", formula = gres ~ x, data = gt.data,
#'               groupn = groupn)
#' fit2
#'
#' set.seed(21)
#' gt.data <- gtSim(type = "sp", par = c(-12, 0.2),
#'                  size1 = 700, size2 = 6, sens = 0.95, spec = 0.95,
#'                  sens.ind = 0.98, spec.ind = 0.98)
#' fit3 <- gtReg(type = "sp", formula = gres ~ x, data = gt.data,
#'               groupn = groupn, retest = retest, method = "Xie",
#'               sens = 0.95, spec = 0.95, sens.ind = 0.98,
#'               spec.ind = 0.98, trace = TRUE)
#' summary(fit3)
#'
#' set.seed(46)
#' gt.data <- gtSim(type = "halving", par = c(-6, 0.1), gshape = 17,
#'                  gscale = 1.4, size1 = 5000, size2 = 5,
#'                  sens = 0.95, spec = 0.95)
#' fit4 <- gtReg(type = "halving", formula = gres ~ x,
#'               data = gt.data, groupn = groupn, subg = subgroup,
#'               retest = retest, sens = 0.95, spec = 0.95,
#'               start = c(-6, 0.1), trace = TRUE)
#' summary(fit4)
#'
#' # 5x6 and 4x5 array
#' \donttest{set.seed(9128)
#' sa1a <- gtSim(type = "array", par = c(-7, 0.1), size1 = c(5, 4),
#'               size2 = c(6, 5), sens = 0.95, spec = 0.95)
#' sa1 <- sa1a$dframe
#' fit5 <- gtReg(type = "array",
#'               formula = cbind(col.resp, row.resp) ~ x,
#'               data = sa1, coln = coln, rown = rown,
#'               arrayn = arrayn, sens = 0.95, spec = 0.95,
#'               tol = 0.005, n.gibbs = 2000, trace = TRUE)
#' fit5
#' summary(fit5)}
#'


# Brianna Hitt - 01-07-2020

# Brianna Hitt - 06-30-2012
# Changed class of all results to "gtReg" (instead of "gt" or "gt.mp")

# Brianna Hitt - 3 November 2023
#   Added checks to ensure sensitivity and specificity values are between 0 and 1

gtReg <- function(type = "sp", formula, data, groupn = NULL,
                  subg = NULL, coln = NULL, rown = NULL, arrayn = NULL,
                  retest = NULL, sens = 1, spec = 1,
                  linkf = c("logit", "probit", "cloglog"),
                  method = c("Vansteelandt", "Xie"),
                  sens.ind = NULL, spec.ind = NULL, start = NULL,
                  control = gtRegControl(...), ...) {

  if (sens < 0 | sens > 1) {
    stop("Please provide sensitivity values between 0 and 1.\n")
  }

  if (!is.null(sens.ind)) {
    if (sens.ind < 0 | sens.ind > 1) {
      stop("Please provide sensitivity values between 0 and 1.\n")
    }
  }

  if (spec < 0 | spec > 1) {
    stop("Please provide specificity values between 0 and 1.\n")
  }

  if (!is.null(spec.ind)) {
    if (spec.ind < 0 | spec.ind > 1) {
      stop("Please provide specificity values between 0 and 1.\n")
    }
  }


  call <- match.call()
  mf <- match.call(expand.dots = FALSE)

  if (type %in% c("sp", "halving")) {
    m <- match(c("formula", "data", "groupn"), names(mf), 0)
  } else if (type == "array") {
    m <- match(c("formula", "data", "coln", "rown", "arrayn"),
               names(mf), 0)
  }

  mf <- mf[c(1,m)]
  mf$drop.unused.levels <- TRUE
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  mt <- attr(mf, "terms")

  if (type %in% c("sp", "halving")) {
    gr <- model.extract(mf, "groupn")
  } else if (type == "array") {
    arrayn <- model.extract(mf, "arrayn")
    rown <- model.extract(mf, "rown")
    coln <- model.extract(mf, "coln")
  }

  if (!is.na(pos <- match(deparse(substitute(retest)), names(data))))
    retest <- data[, pos]
  if (type == "halving") {
    if (!is.na(pos <- match(deparse(substitute(subg)), names(data))))
      subg <- data[, pos]
  }

  Y <- model.response(mf, "any")
  if (length(dim(Y)) == 1) {
    nm <- rownames(Y)
    dim(Y) <- NULL
    if (!is.null(nm))
      names(Y) <- nm
  }
  X <- if (!is.empty.model(mt))
    model.matrix(mt, mf)
  else matrix(, NROW(Y), 0)
  linkf <- match.arg(linkf)

  if (type == "sp") {
    if ((method <- match.arg(method)) == "Vansteelandt") {
      if (!is.null(retest))
        warning("Retests cannot be used with Vansteelandt's method.")
      fit <- gtreg.fit(Y, X, gr, sens, spec, linkf, start)
    }
    else {
      if (is.null(retest))
        fit <- EM(Y, X, gr, sens, spec, linkf, start, control)
      else fit <-  EM.ret(Y, X, gr, retest, sens, spec, linkf,
                          sens.ind, spec.ind, start, control)
    }
    fit <- c(fit, list(call = call, formula = formula, method = method,
                       link = linkf, terms = mt))
    # class(fit) <- "gt"
  } else if (type == "halving") {
    fit <-  EM.halving(Y, X, gr, subg, retest, sens, spec, linkf,
                       sens.ind, spec.ind, start, control)
    fit <- c(fit, list(call = call, formula = formula, method = "Xie",
                       link = linkf, terms = mt))
    # class(fit) <- "gt"
  } else if (type == "array") {
    fit <- EM.mp(Y[, 1], Y[, 2], X, coln, rown, arrayn, retest,
                 sens, spec, linkf, sens.ind, spec.ind, start, control)
    fit <- c(fit, list(call = call, formula = formula, link = linkf,
                       terms = mt))
    # class(fit) <- c("gt.mp", "gt")
  }

  # class(fit) <- c(class(fit), "gtReg")
  class(fit) <- "gtReg"
  fit
}




# Print function
##################################################################
# print.gtReg() function                                         #
##################################################################

#' @title Print method for \kbd{gtReg}
#'
#' @description Print method for objects obtained by \code{\link{gtReg}}.
#'
#' @param x An object of class "gtReg" created by \code{\link{gtReg}}.
#' @param digits digits for rounding.
#' @param ... currently not used.
#'
#' @return A print out of the function call, coefficients, and the null and
#' residual deviance and degrees of freedom.
#'
#' @author This function was originally written by Boan Zhang as the
#' \kbd{print.gt} function for the \code{binGroup} package. Minor modifications
#' were made for inclusion in the \code{binGroup2} package.

print.gtReg <- function (x,
                         digits = max(3, getOption("digits") - 3), ...) {

  cat("\nCall:\n")
  cat(paste(deparse(x$call), sep = "\n", collapse = "\n"),
      "\n\n", sep = "")

  if (length(coef(x))) {
    cat("Coefficients:\n")
    print.default(format(coef(x), digits = digits), print.gap = 2,
                  quote = FALSE)
  }
  else cat("No coefficients\n")

  if (!is.null(x$df.null)) {
    cat("\nDegrees of Freedom:", x$df.null, "\tTotal (i.e. Null); ",
        x$df.residual, "Residual\n")
    cat("Null Deviance:\t   ",
        format(signif(x$null.deviance, digits)),
        "\nResidual Deviance: ",
        format(signif(x$deviance, digits)),
        "\tAIC:", format(signif(x$aic, digits)), "\n")
  }

  invisible(x)
}




# Residuals function
##################################################################
# residuals.gtReg() function                                     #
##################################################################

#' @title Extract model residuals from a fitted group testing model
#'
#' @description Extract model residuals from objects of class "gtReg" returned
#' by \code{\link{gtReg}}.
#'
#' @param object An object of class "gtReg", created by \code{\link{gtReg}},
#' from which the model residuals are to be extracted.
#' @param type The type of residuals which should be returned. Options include
#' "\kbd{deviance}" (default), "\kbd{pearson}", and "\kbd{response}".
#' @param ... currently not used.
#'
#' @return Residuals of group responses extracted from the object \kbd{object}.
#'
#' @author This function was originally written by Boan Zhang as the
#' \kbd{residuals.gt} function for the \code{binGroup} package.
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(formula = groupres ~ AGE * EDUC.,
#'               data = hivsurv, groupn = gnum,
#'               linkf = "probit")
#' residuals(object = fit1, type = "pearson")
#' residuals(object = fit1, type = "deviance")

residuals.gtReg <- function (object,
                             type = c("deviance", "pearson", "response"), ...) {
  type <- match.arg(type)
  r <- object$residuals
  zhat <- object$fitted.values
  z <- object$z
  res <- switch(type, response = r,
                pearson = r/sqrt(zhat * (1 - zhat)),
                deviance = sqrt(-2 * (z * log(zhat) +
                                        (1 - z) * log(1 - zhat))) * sign(z - zhat))
  res
}




# Coefficients function
##################################################################
# coefficients.gtReg() function                                  #
##################################################################

#' @title Extract coefficients from a fitted group testing model
#'
#' @description Extract coefficients from objects of class "gtReg" returned
#' by \code{\link{gtReg}}.
#'
#' @param object An object of class "gtReg", created by \code{\link{gtReg}},
#' from which the coefficients are to be extracted.
#' @param digits digits for rounding.
#' @param ... not currently used.
#'
#' @return Model coefficients extracted from the object \kbd{object}.
#'
#' @author Brianna D. Hitt
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(formula = groupres ~ AGE * EDUC.,
#'               data = hivsurv, groupn = gnum,
#'               linkf = "probit")
#' coefficients(object = fit1)

coef.gtReg <- function (object, digits = max(3, getOption("digits") - 3),
                        ...) {

  signif(object$coefficients, digits)
}




###############################################################################
#' @rdname coef.gtReg
coefficients.gtReg <- coef.gtReg





# Formula function
##################################################################
# formula.gtReg() function                                       #
##################################################################

#' @title Extract the model formula from a fitted group testing model
#'
#' @description Extract the model formula from objects of class "gtReg"
#' returned by \code{\link{gtReg}}.
#'
#' @param x An object of class "gtReg", created by \code{\link{gtReg}},
#' from which the model formula is to be extracted.
#' @param ... not currently used.
#'
#' @return Model formula extracted from the object \kbd{object}.
#'
#' @author Brianna D. Hitt
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(formula = groupres ~ AGE * EDUC.,
#'               data = hivsurv, groupn = gnum,
#'               linkf = "probit")
#' formula(x = fit1)

formula.gtReg <- function (x, ...) {
  x$formula
}




# Variance-covariance function
##################################################################
# vcov.gtReg() function                                          #
##################################################################

# #' @title Calculate variance-covariance matrix for a fitted group testing model
# #'
# #' @description Returns the variance-covariance matrix of the main parameters
# #' of a fitted model object of class "gtReg" returned by \code{\link{gtReg}}.
# #' The "main" parameters of the model correspond to those returned by
# #' \code{\link{coef.gtReg}}, and typically do not contain a nuisance scale
# #' parameter.
# #'
# #' @param object A fitted model object of class "gtReg", created by
# #' \code{\link{gtReg}}, from which the variance-covariance matrix will be
# #' calculated.
# #' @param type The type of residuals which should be used to calculate the
# #' variance-covariance matrix. Options include
# #' "\kbd{deviance}" (default), "\kbd{pearson}", and "\kbd{response}".
# #' @param ... currently not used.
# #'
# #' @return A matrix of the estimated covariances between the parameter
# #' estimates in the linear or non-linear predictor of the model. This should
# #' have row and column names corresponding to the parameter names given by the
# #' \code{\link{coef.gtReg}} method.
# #'
# #' @author Brianna D. Hitt
# #'
# #' @examples
# #' data(hivsurv)
# #' fit1 <- gtReg(formula = groupres ~ AGE * EDUC.,
# #' data = hivsurv, groupn = gnum, linkf = "probit")
# #' vcov.gtReg(object = fit1)
#
# vcov.gtReg <- function (object,
#                         type = c("deviance", "pearson", "response"), ...) {
#   # type <- match.arg(type)
#   # r <- object$residuals
#   # zhat <- object$fitted.values
#   # z <- object$z
#   # res <- switch(type, response = r,
#   #               pearson = r/sqrt(zhat * (1 - zhat)),
#   #               deviance = sqrt(-2 * (z * log(zhat) +
#   #                                       (1 - z) * log(1 - zhat))) * sign(z - zhat))
#   # res
# }
#



# Summary function
##################################################################
# summary.gtReg() function                                       #
##################################################################

#' @title Summary method for \kbd{gtReg}
#'
#' @description Produce a summary list for objects of class
#' \kbd{"gtReg"} returned by \code{\link{gtReg}}.
#'
#' @param object a fitted object of class \kbd{"gtReg"}.
#' @param ... currently not used.
#'
#' @details The \kbd{coefficients} component of the results gives a matrix
#' containing the estimated coefficients and their estimated standard errors.
#' The third column is their ratio, labeled
#' \kbd{z ratio} using Wald tests. A fourth column gives the
#' two-tailed p-value corresponding to the z-ratio based on a
#' Wald test. Note that it is possible that there are no residual
#' degrees of freedom from which to estimate, in which case the
#' estimate is \kbd{NaN}.
#'
#' @return \kbd{summary.gtReg} returns an object of class
#' \kbd{"summary.gtReg"}, a list containing:
#' \item{call}{the component from \kbd{object}.}
#' \item{link}{the component from \kbd{object}.}
#' \item{deviance}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{aic}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{df.residual}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{null.deviance}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{df.null}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{deviance.resid}{the deviance residuals,
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{coefficients}{the matrix of coefficients, standard errors,
#' z-values, and p-values. Aliased coefficients are omitted.}
#' \item{counts}{the component from \kbd{object}.}
#' \item{method}{the component from \kbd{object},
#' for simple pooling (\kbd{type = "sp"} in \code{\link{gtReg}}) only.}
#' \item{Gibbs.sample.size}{the component from \kbd{object},
#' for array testing (\kbd{type = "array"} in \code{\link{gtReg}}) only.}
#' \item{cov.mat}{the estimated covariance matrix of the estimated
#' coefficients.}
#'
#' @author The majority of this function was originally written as
#' \code{summary.gt} and \code{summary.gt.mp} by Boan Zhang for the
#' \code{binGroup} package. Minor modifications were made to the function
#' for inclusion in the \code{binGroup2} package.
#'
#' @seealso \code{\link{gtReg}} for creating an object of class
#' \kbd{"gtReg"}.
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(type = "sp",
#'               formula = groupres ~ AGE + EDUC.,
#'               data = hivsurv, groupn = gnum,
#'               sens = 0.9, spec = 0.9,
#'               method = "Xie")
#' summary(fit1)
#'
#' # 5x6 and 4x5 array
#' set.seed(9128)
#' sa2a <- gtSim(type = "array", par = c(-7, 0.1),
#'               size1 = c(5, 4), size2 = c(6, 5),
#'               sens = 0.95, spec = 0.95)
#' sa2 <- sa2a$dframe
#' \donttest{
#' fit2 <- gtReg(type = "array",
#'               formula = cbind(col.resp, row.resp) ~ x,
#'               data = sa2, coln = coln, rown = rown,
#'               arrayn = arrayn, sens = 0.95, spec = 0.95,
#'               linkf = "logit", n.gibbs = 1000, tol = 0.005)
#' summary(fit2)}

# Brianna Hitt - 06-30-2021
# Updated gtReg() so that class of returned results is "gtReg"
# Updated summary.gtReg() so that the matrix pooling results can still be
#   identified ("Gibbs.sample.size" %in% names(object) instead of "gt.mp"
#   %in% class(object))

summary.gtReg <- function(object, ...) {
  coef.p <- object$coefficients
  cov.mat <- solve(object$hessian)
  dimnames(cov.mat) <- list(names(coef.p), names(coef.p))
  var.cf <- diag(cov.mat)
  s.err <- sqrt(var.cf)
  zvalue <- coef.p / s.err
  dn <- c("Estimate", "Std. Error")
  pvalue <- 2 * pnorm(-abs(zvalue))
  coef.table <- cbind(coef.p, s.err, zvalue, pvalue)
  dimnames(coef.table) <- list(names(coef.p), c(dn, "z value", "Pr(>|z|)"))

  # if ("gt.mp" %in% class(object)) {
  if ("Gibbs.sample.size" %in% names(object)) {
    keep <- match(c("call", "link", "Gibbs.sample.size", "counts"),
                  names(object), 0)
    ans <- c(object[keep], list(coefficients = coef.table,
                                cov.mat = cov.mat))
  } else{
    keep <- match(c("call", "link", "aic", "deviance", "df.residual",
                    "null.deviance", "df.null", "counts", "method", "z"),
                  names(object), 0)
    ans <- c(object[keep], list(coefficients = coef.table,
                                deviance.resid = residuals(object,
                                                           type = "deviance"),
                                cov.mat = cov.mat))
  }
  class(ans) <- "summary.gtReg"
  ans
}




# Print method for summary function
##################################################################
# print.summary.gtReg() function                                 #
##################################################################

#' @title Print method for \kbd{summary.gtReg}
#'
#' @description Print method for objects obtained by
#' \code{\link{summary.gtReg}}.
#'
#' @param x An object of class "summary.gtReg" created by
#' \code{\link{summary.gtReg}}.
#' @param digits digits for rounding.
#' @param signif.stars a logical value indicating whether significance
#' stars should be shown.
#' @param ... Additional arguments to be passed to \code{printCoefmat}.
#'
#' @return A print out of the function call, deviance residuals (for
#' simple pooling and halving only), coefficients, null and
#' residual deviance and degrees of freedom (for simple pooling only),
#' AIC (for simple pooling and halving only), number of
#' Gibbs samples (for array testing only), and the number of
#' iterations.
#'
#' @author This function combines code from
#' \code{print.summary.gt} and
#' \code{print.summary.gt.mp}, written by Boan Zhang
#' for the \code{binGroup} package. Minor modifications were
#' made for inclusion in the \code{binGroup2} package.

print.summary.gtReg <- function(x, digits = max(3, getOption("digits") - 3),
                                signif.stars = getOption("show.signif.stars"),
                                ...) {

  obj <- x
  cat("\nCall:\n")
  cat(paste(deparse(obj$call), sep = "\n", collapse = "\n"),
      "\n\n", sep = "")

  # for simple pooling and array testing only
  if ("deviance.resid" %in% names(obj)) {
    cat("Deviance Residuals: \n")
    if (length(obj$z) > 5) {
      obj$deviance.resid <- quantile(obj$deviance.resid, na.rm = TRUE)
      names(obj$deviance.resid) <- c("Min", "1Q", "Median",
                                     "3Q", "Max")
    }
    print.default(obj$deviance.resid, digits = digits, na.print = "",
                  print.gap = 2)
  }

  cat("\nCoefficients:\n")
  coefs <- obj$coefficients
  printCoefmat(coefs, digits = digits, signif.stars = signif.stars,
               na.print = "NA", ...)

  if (!is.null(unlist(obj["df.null"]))) {
    cat("\n",
        apply(cbind(paste(format(c("Null", "Residual"), justify = "right"),
                          "deviance:"),
                    format(unlist(obj[c("null.deviance", "deviance")]),
                           digits = 4),
                    " on", format(unlist(obj[c("df.null", "df.residual")])),
                    " degrees of freedom\n"), 1, paste,
              collapse = " "), sep = "")
  }

  if ("method" %in% names(obj)) {
    if (obj$method == "Vansteelandt") {
      cat("AIC: ", format(obj$aic, digits = 4),
          "\n\n", "Number of iterations in optim(): ",
          obj$counts, "\n", sep = "")
    } else {
      cat("AIC: ", format(obj$aic, digits = 4),
          "\n\n", "Number of iterations in EM: ",
          obj$counts, "\n", sep = "")
    }
  }

  if ("Gibbs.sample.size" %in% names(obj)) {
    cat("\nNumber of Gibbs samples generated in each E step: ",
        obj$Gibbs.sample.size, "\n",
        "Number of iterations in EM algorithm: ",
        obj$counts, "\n", sep = "")
  }

  cat("\n")
  invisible(obj)
}




# Other regression-related functions
##################################################################
# predict.gtReg() function                                       #
##################################################################

# Brianna Hitt - 04.03.2022 - added attributed class of predict.gtReg()
# for use with print.predict.gtReg() function

#' @title Predict method for \kbd{gtReg}
#'
#' @description Obtains predictions for individual observations and
#' optionally computes the standard errors of those predictions from
#' objects of class \kbd{"gtReg"} returned by \code{\link{gtReg}}.
#'
#' @param object a fitted object of class \kbd{"gtReg"}.
#' @param newdata an optional data frame in which to look for
#' variables with which to predict. If omitted, the fitted linear
#' predictors are used.
#' @param type the type of prediction required. The \kbd{"link"}
#' option is on the scale of the linear predictors. The \kbd{"response"}
#' option is on the scale of the response variable. Thus, for the
#' logit model, the \kbd{"link"} predictions are of log-odds
#' (probabilities on the logit scale) and  \kbd{type = "response"}
#' gives the predicted probabilities.
#' @param se.fit a logical value indicating whether standard errors
#' are required.
#' @param conf.level the confidence level of the interval for the
#' predicted values.
#' @param na.action a function determining what should be done with
#' missing values in \kbd{newdata}. The default is to predict \kbd{NA}.
#' @param ... currently not used.
#'
#' @details If \kbd{newdata} is omitted, the predictions are based
#' on the data used for the fit. When \kbd{newdata} is present and
#' contains missing values, how the missing values will be dealt with
#' is determined by the \kbd{na.action} argument. In this case, if
#' \kbd{na.action = na.omit}, omitted cases will not appear, whereas
#' if \kbd{na.action = na.exclude}, omitted cases will appear (in
#' predictions and standard errors) with value \kbd{NA}.
#'
#' @return If \kbd{se = FALSE}, a vector or matrix of predictions. If
#' \kbd{se = TRUE}, a list containing:
#' \item{fit}{predictions.}
#' \item{se.fit}{estimated standard errors.}
#' \item{lower}{the lower bound of the confidence interval,
#' if calculated (i.e., \kbd{conf.level} is specified).}
#' \item{upper}{the upper bound of the confidence interval,
#' if calculated (i.e., \kbd{conf.level} is specified).}
#'
#' @author Boan Zhang
#'
#' @examples
#' data(hivsurv)
#' fit1 <- gtReg(formula = groupres ~ AGE + EDUC.,
#'               data = hivsurv, groupn = gnum,
#'               sens = 0.9, spec = 0.9,
#'               linkf = "logit", method = "V")
#' pred.data <- data.frame(AGE = c(15, 25, 30),
#'                         EDUC. = c(1, 3, 2))
#' predict(object = fit1, newdata = pred.data,
#'         type = "link", se.fit = TRUE)
#' predict(object = fit1, newdata = pred.data,
#'         type = "response", se.fit = TRUE,
#'         conf.level = 0.9)
#' predict(object = fit1, type = "response",
#'         se.fit = TRUE, conf.level = 0.9)

predict.gtReg <- function(object, newdata, type = c("link", "response"),
                          se.fit = FALSE, conf.level = NULL,
                          na.action = na.pass, ...) {
  tt <- terms(object)
  Terms <- delete.response(tt)
  if (missing(newdata) || is.null(newdata)) {
    m <- model.frame(object)
    newd <- model.matrix(Terms, m)
  }
  else {
    m <- model.frame(Terms, newdata, na.action = na.action)
    newd <- model.matrix(Terms, m)
  }
  type <- match.arg(type)
  lin.pred <- as.vector(newd %*% object$coefficients)
  link <- object$link
  res <- switch(link, logit = plogis(lin.pred), probit = pnorm(lin.pred),
                cloglog = 1 - exp(-exp(lin.pred)))
  if (type == "response")
    pred <- res
  else pred <- lin.pred
  if (se.fit) {
    cov <- solve(object$hessian)
    var.lin.pred <- diag(newd %*% cov %*% t(newd))
    var.res <- switch(link, logit = exp(2 * lin.pred) / (1 + exp(lin.pred))^4,
                      probit = dnorm(lin.pred)^2,
                      cloglog = (exp(-exp(lin.pred)) *
                                   exp(lin.pred))^2) * var.lin.pred
    if (type == "response")
      se <- sqrt(var.res)
    else se <- sqrt(var.lin.pred)
    if (!is.null(conf.level)) {
      alpha <- 1 - conf.level
      lower <- lin.pred - qnorm(1 - alpha / 2) * sqrt(var.lin.pred)
      upper <- lin.pred + qnorm(1 - alpha / 2) * sqrt(var.lin.pred)
      res.lower <- switch(link, logit = plogis(lower),
                          probit = pnorm(lower),
                          cloglog = 1 - exp(-exp(lower)))
      res.upper <- switch(link, logit = plogis(upper),
                          probit = pnorm(upper),
                          cloglog = 1 - exp(-exp(upper)))
      if (type == "response") {
        lwr <- res.lower
        upr <- res.upper
      }
      else {
        lwr <- lower
        upr <- upper
      }
    }
  }
  names(pred) <- 1:length(lin.pred)
  if (!is.null(conf.level)) {
    out <- list(fit = pred, se.fit = se, lower = lwr, upper = upr)
    class(out) <- c("predict.gtReg", "list")
  }
  else if (se.fit) {
    out <- list(fit = pred, se.fit = se)
    class(out) <- c("predict.gtReg", "list")
  }
  else {
    out <- pred
    class(out) <- c("predict.gtReg", "numeric")
  }

  out
}




# Print method for predict function
##################################################################
# print.predict.gtReg() function                                 #
##################################################################

#' @title Print method for \kbd{predict.gtReg}
#'
#' @description Print method for objects obtained by
#' \code{\link{predict.gtReg}}.
#'
#' @param x An object of class "predict.gtReg" created by
#' \code{\link{predict.gtReg}}.
#' @param digits digits for rounding.
#' @param ... not currently used.
#'
#' @return A matrix of predictions with rows corresponding to each observation
#' in \kbd{newdata} (if provided) or each observation in the data set used
#' for the fit. The columns correspond to the predictions (\kbd{fit}), the
#' estimated standard errors (\kbd{se.fit}), the lower bound of the confidence
#' interval (\kbd{lower}), and the upper bound of the confidence interval
#' (\kbd{upper}).
#' If \kbd{conf.level} is not specified, the \kbd{lower} and \kbd{upper}
#' columns will not be included. If \kbd{se = FALSE}, the \kbd{se.fit} column
#' will not be included.
#'
#' @author Brianna Hitt

print.predict.gtReg <- function(x, digits = max(3, getOption("digits") - 3),
                                ...) {

  if (inherits(x, "list")) {
    if (length(x) == 4) {
      print(data.frame(fit = signif(x$fit, digits),
                 se.fit = signif(x$se.fit, digits),
                 lower = signif(x$lower, digits),
                 upper = signif(x$upper, digits)))
    }
    else if (length(x) == 2) {
      print(data.frame(fit = signif(x$fit, digits),
                 se.fit = signif(x$se.fit, digits)))
    }
  }
  else {
    print(data.frame(fit = signif(as.vector(x), digits)))
  }
}


#

Try the binGroup2 package in your browser

Any scripts or data that you put into this service are public.

binGroup2 documentation built on Nov. 14, 2023, 9:06 a.m.