Nothing
#' Internal function: Fits a NB2 regression via maximum likelihood with log-link
#' for mean and dispersion parameter.
#'
#' Internal fitting function for NB2 regression models. Used for fitting the
#' starting values of the one-step ML estimators in \code{\link{walsNB}}. Only
#' works with log-link so far, no other links tested.
#'
#' @param X Design matrix.
#' @param Y Count response vector.
#' @param family Object of class \code{"\link[WALS]{familyNBWALS}"} generated by
#' \code{\link[WALS]{negbinWALS}}.
#' @param control List of parameters for controlling the optimization process.
#' Use \code{\link[WALS]{controlNB}} to generate the list.
#'
#' @details
#' The available parameters for controlling the optimization are documented in
#' \code{\link[WALS]{controlNB}}.
#'
#' @returns A list with the following elements
#' \item{coefficients}{fitted coefficients from NB2 regression}
#' \item{theta}{fitted dispersion parameter from NB2 regression}
#' \item{convergence}{0 indicates successful completion. All error codes except
#' for \code{99} are generated by \code{\link[stats]{optim}}. Possible error
#' codes are
#' \describe{
#' \item{\code{1}}{indicates that the iteration limit \code{maxit} had been reached.}
#' \item{\code{10}}{degeneracy of the Nelder-Mead simplex.}
#' \item{\code{51}}{warning from "L-BFGS-B" method; see component \code{message}
#' for further details.}
#' \item{\code{52}}{error from "L-BFGS-B" method; see component \code{message}
#' for further details.}
#' \item{\code{99}}{(only possible if \code{controlNB(initMASS = TRUE)}) indicates
#' convergence issues in \code{\link[MASS]{glm.nb}}.}
#' }
#' }
#' \item{ll}{log-likelihood of fitted NB2 regression model}
#' \item{message}{If \code{controlNB(initMASS = FALSE)}, character string
#' giving any additional information returned by the optimizer, else \code{NULL}.}
#' \item{start}{If \code{controlNB(initMASS = FALSE)}, contains a vector with the
#' starting values used for \code{\link[stats]{optim}}.}
#'
#' @seealso [controlNB], [negbinWALS], \link[MASS]{glm.nb}, [optim].
#'
fitNB2 <- function(X, Y, family, control = controlNB()) {
# ignore starting values in control, use MASS::glm.nb
if (control$initMASS) { # reduce memory, limit output by model = FALSE, x = FALSE ...
fit <- MASS::glm.nb(Y ~ -1 + X, model = FALSE, x = FALSE, y = FALSE,
control = glm.control(maxit = control$controlOptim$maxit,
epsilon = control$epsilonMASS))
# Remove 'X' at beginning of variable names --> delete first character
fittedCoefs <- coef(fit)
names(fittedCoefs) <- sub('.', '', names(fittedCoefs))
out <- list(coefficients = fittedCoefs, theta = fit$theta,
# generate error code 99 if IWLS algo in glm.nb failed
convergence = if (fit$converged) 0 else 99,
ll = logLik(fit), message = NULL, start = NULL)
return(out)
}
## starting values
startBeta <- control$start$mu
startLogTheta <- control$start$logTheta
## default starting values
# use poisson regression for regression coefs
if (is.null(startBeta)) startBeta <- glm.fit(X, Y, family = poisson(family$link))$coefficient
# maximize loglik wrt theta given regression coefs
if (is.null(startLogTheta)) {
if (control$initThetaMASS) {
mu <- family$linkinv(X %*% startBeta)
startLogTheta <- log(MASS::theta.ml(Y, mu, n = length(Y),
limit = control$controlOptim$maxit,
eps = control$eps))
} else {
startLogTheta <- 0
}
}
start <- c(startBeta, startLogTheta)
## loglik and gradient
nllfun <- function(parms, k) {
mu <- family$linkinv(X %*% parms[1:k])
theta <- exp(parms[k + 1]) # optimize wrt log theta --> unconstrained
return(-sum(dnbinom(Y, size = theta, mu = mu, log = TRUE))) # negative loglik
}
ngrad <- function(parms, k) { # negative gradient because minimize
eta <- X %*% parms[1:k]
mu <- family$linkinv(eta)
theta <- exp(parms[k + 1])
gr <- snbinom(Y, mu = mu, size = theta)
gr <- cbind(gr[, 1] * family$mu.eta(eta)[,,drop = TRUE] * X, gr[, 2] * theta)
return(-colSums(gr))
}
k <- ncol(X)
## ML estimation
fit <- optim(fn = nllfun, gr = ngrad, par = start, k = k,
method = control$method, control = control$controlOptim)
out <- list(coefficients = fit$par[1:k], theta = exp(fit$par[k + 1]),
convergence = fit$convergence, ll = -fit$value, message = fit$message,
start = start)
return(out)
}
#' Internal function: first derivatives of NB2 PMF
#'
#' First derivatives of NB2 PMF used in \code{\link[WALS]{fitNB2}}. Code is
#' taken from the function \code{snbinom()} in the \code{countreg} package
#' version 0.2-1 (2023-06-13) \insertCite{countreg}{WALS}.
#'
#' @param x Vector of quantiles.
#' @param mu Vector of means.
#' @param size Vector of dispersion parameter. If a scalar is given, the value
#' is recycled.
#' @param parameter Specifies which parameter the derivative is taken for.
#' \code{parameter = c("mu", "size")} returns a matrix with derivatives
#' for both parameters.
#' @param drop If \code{TRUE}, drops empty dimensions of return using
#' \code{\link[base]{drop}}. If \code{FALSE} does not apply \code{\link[base]{drop}}.
#'
#' @returns A vector or matrix containing the first derivatives.
#'
#' @references
#' \insertAllCited{}
#'
snbinom <- function(x, mu, size, parameter = c("mu", "size"), drop = TRUE) {
parameter <- sapply(parameter, function(x) match.arg(x, c("mu", "size")))
s <- cbind(
if ("mu" %in% parameter) x/mu - (x + size)/(mu + size) else NULL,
if ("size" %in% parameter) digamma(x + size) - digamma(size) +
log(size) + 1 - log(mu + size) - (x + size) / (mu + size) else NULL
)
colnames(s) <- c("mu", "size")[c("mu", "size") %in% parameter]
s[(x < 0) | (abs(x - round(x)) > sqrt(.Machine$double.eps)), ] <- 0
if (drop & NCOL(s) < 2L) drop(s) else s
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.