R/fitNB2.R

Defines functions snbinom fitNB2

Documented in fitNB2 snbinom

#' Internal function: Fits a NB2 regression via maximum likelihood with log-link
#' for mean and dispersion parameter.
#'
#' Internal fitting function for NB2 regression models. Used for fitting the
#' starting values of the one-step ML estimators in \code{\link{walsNB}}. Only
#' works with log-link so far, no other links tested.
#'
#' @param X Design matrix.
#' @param Y Count response vector.
#' @param family Object of class \code{"\link[WALS]{familyNBWALS}"} generated by
#' \code{\link[WALS]{negbinWALS}}.
#' @param control List of parameters for controlling the optimization process.
#' Use \code{\link[WALS]{controlNB}} to generate the list.
#'
#' @details
#' The available parameters for controlling the optimization are documented in
#' \code{\link[WALS]{controlNB}}.
#'
#' @returns A list with the following elements
#' \item{coefficients}{fitted coefficients from NB2 regression}
#' \item{theta}{fitted dispersion parameter from NB2 regression}
#' \item{convergence}{0 indicates successful completion. All error codes except
#' for \code{99} are generated by \code{\link[stats]{optim}}. Possible error
#' codes are
#' \describe{
#'   \item{\code{1}}{indicates that the iteration limit \code{maxit} had been reached.}
#'   \item{\code{10}}{degeneracy of the Nelder-Mead simplex.}
#'   \item{\code{51}}{warning from "L-BFGS-B" method; see component \code{message}
#'   for further details.}
#'   \item{\code{52}}{error from "L-BFGS-B" method; see component \code{message}
#'   for further details.}
#'   \item{\code{99}}{(only possible if \code{controlNB(initMASS = TRUE)}) indicates
#'    convergence issues in \code{\link[MASS]{glm.nb}}.}
#'  }
#' }
#' \item{ll}{log-likelihood of fitted NB2 regression model}
#' \item{message}{If \code{controlNB(initMASS = FALSE)}, character string
#'   giving any additional information returned by the optimizer, else \code{NULL}.}
#' \item{start}{If \code{controlNB(initMASS = FALSE)}, contains a vector with the
#'   starting values used for \code{\link[stats]{optim}}.}
#'
#' @seealso [controlNB], [negbinWALS], \link[MASS]{glm.nb}, [optim].
#'
fitNB2 <- function(X, Y, family, control = controlNB()) {

  # ignore starting values in control, use MASS::glm.nb
  if (control$initMASS) { # reduce memory, limit output by model = FALSE, x = FALSE ...
    fit <- MASS::glm.nb(Y ~ -1 + X, model = FALSE, x = FALSE, y = FALSE,
                        control = glm.control(maxit = control$controlOptim$maxit,
                                              epsilon = control$epsilonMASS))

    # Remove 'X' at beginning of variable names --> delete first character
    fittedCoefs <- coef(fit)
    names(fittedCoefs) <- sub('.', '', names(fittedCoefs))

    out <- list(coefficients = fittedCoefs, theta = fit$theta,
                # generate error code 99 if IWLS algo in glm.nb failed
                convergence = if (fit$converged) 0 else 99,
                ll = logLik(fit), message = NULL, start = NULL)
    return(out)
  }

  ## starting values
  startBeta <- control$start$mu
  startLogTheta <- control$start$logTheta

  ## default starting values

  # use poisson regression for regression coefs
  if (is.null(startBeta)) startBeta <- glm.fit(X, Y, family = poisson(family$link))$coefficient

  # maximize loglik wrt theta given regression coefs
  if (is.null(startLogTheta)) {
    if (control$initThetaMASS) {
      mu <- family$linkinv(X %*% startBeta)
      startLogTheta <- log(MASS::theta.ml(Y, mu, n = length(Y),
                                          limit = control$controlOptim$maxit,
                                          eps = control$eps))
    } else {
      startLogTheta <- 0
    }
  }

  start <- c(startBeta, startLogTheta)


  ## loglik and gradient
  nllfun <- function(parms, k) {
    mu <- family$linkinv(X %*% parms[1:k])
    theta <- exp(parms[k + 1]) # optimize wrt log theta --> unconstrained

    return(-sum(dnbinom(Y, size = theta, mu = mu, log = TRUE))) # negative loglik
  }

  ngrad <- function(parms, k) { # negative gradient because minimize
    eta <- X %*% parms[1:k]
    mu <- family$linkinv(eta)
    theta <- exp(parms[k + 1])
    gr <- snbinom(Y, mu = mu, size = theta)
    gr <- cbind(gr[, 1] * family$mu.eta(eta)[,,drop = TRUE] * X, gr[, 2] * theta)
    return(-colSums(gr))
  }

  k <- ncol(X)

  ## ML estimation
  fit <- optim(fn = nllfun, gr = ngrad, par = start, k = k,
               method = control$method, control = control$controlOptim)
  out <- list(coefficients = fit$par[1:k], theta = exp(fit$par[k + 1]),
              convergence = fit$convergence, ll = -fit$value, message = fit$message,
              start = start)
  return(out)
}

#' Internal function: first derivatives of NB2 PMF
#'
#' First derivatives of NB2 PMF used in \code{\link[WALS]{fitNB2}}. Code is
#' taken from the function \code{snbinom()} in the \code{countreg} package
#' version 0.2-1 (2023-06-13) \insertCite{countreg}{WALS}.
#'
#' @param x Vector of quantiles.
#' @param mu Vector of means.
#' @param size Vector of dispersion parameter. If a scalar is given, the value
#' is recycled.
#' @param parameter Specifies which parameter the derivative is taken for.
#' \code{parameter = c("mu", "size")} returns a matrix with derivatives
#' for both parameters.
#' @param drop If \code{TRUE}, drops empty dimensions of return using
#' \code{\link[base]{drop}}. If \code{FALSE} does not apply \code{\link[base]{drop}}.
#'
#' @returns A vector or matrix containing the first derivatives.
#'
#' @references
#' \insertAllCited{}
#'
snbinom <- function(x, mu, size, parameter = c("mu", "size"), drop = TRUE) {
  parameter <- sapply(parameter, function(x) match.arg(x, c("mu", "size")))
  s <- cbind(
    if ("mu" %in% parameter) x/mu - (x + size)/(mu + size) else NULL,
    if ("size" %in% parameter) digamma(x + size) - digamma(size) +
      log(size) + 1 - log(mu + size) - (x + size) / (mu + size) else NULL
  )
  colnames(s) <- c("mu", "size")[c("mu", "size") %in% parameter]
  s[(x < 0) | (abs(x - round(x)) > sqrt(.Machine$double.eps)), ] <- 0
  if (drop & NCOL(s) < 2L) drop(s) else s
}

Try the WALS package in your browser

Any scripts or data that you put into this service are public.

WALS documentation built on June 22, 2024, 9:42 a.m.