Nothing
#' Extended Family Objects for Models
#'
#' Objects of class \code{"familyWALS"} inherit from \code{"\link[stats]{family}"}
#' and extend those with (transformation) functions required for
#' \code{\link[WALS]{walsGLM}} and \code{\link[WALS]{walsNB}}.
#'
#' @param link Specifies the model link function. Typically a character string
#' or an object of class \code{"link-glm"} generated by
#' \code{\link[stats]{make.link}}. See \code{\link[stats]{family}} for more details.
#' Currently, only a limited number of links are supported. See below for more
#' details.
#' @param object The function \code{familyWALS()} extracts the family objects stored
#' in \code{"walsGLM"} objects.
#' @param ... Further arguments passed to methods.
#'
#' The \code{negbinWALS()} family currently only accepts \code{"log"}, while
#' \code{negbinFixedWALS()} supports both \code{"log"} and \code{"canonical"}.
#'
#' @details
#' \code{familyWALS()} is a generic function that extracts the family used in
#' \code{"walsGLM"} objects.
#'
#' \code{negbinFixedWALS()} creates the \code{"familyWALS"} object for negative
#' binomial distribution type 2 (NB2) with fixed dispersion parameter. It extends
#' \code{\link[WALS]{negativeBinomial}}.
#'
#' \code{negbinWALS()} creates objects of the specialized class \code{"familyNBWALS"}
#' which inherits from \code{"familyWALS"} and \code{"family"}. It constructs the
#' \code{"familyNBWALS"} object for the negative binomial distribution type 2 (NB2)
#' with variable dispersion parameter by extending \code{\link[WALS]{negativeBinomial}}
#' and \code{negbinFixedWALS} with functions required in \code{\link[WALS]{walsNB}.}
#' **\code{negbinWALS} should only be used in \code{\link[WALS]{walsNBfit}} and
#' not in \code{\link[WALS]{walsGLM}} because the NB2 with variable dispersion
#' parameter is not a GLM!**
#'
#' ### Supported links
#' Currently, \code{binomialWALS()} and \code{poissonWALS()} only support their
#' canonical links, i.e. \code{"logit"} and \code{"log"}, respectively.
#' \code{negbinFixedWALS()} supports both, the \code{"canonical"} link and
#' the \code{"log"} link, however, the first is not recommended due to numerical
#' difficulties in the fitting process. In contrast, \code{negbinWALS()} only
#' supports the \code{"log"} link.
#'
#' @returns An object of class \code{"familyWALS"} to be used in
#' \code{\link[WALS]{walsGLM}} that inherits from \code{"\link[stats]{family}"}.
#' This is a list that contains elements returned from the corresponding family
#' function that it extends. Additionally, the following elements are available:
#' \item{theta.eta}{function. Derivative of the canonical parameter \eqn{\theta}
#' with respect to the linear link \eqn{\eta}, i.e. \eqn{d \theta / d \eta}.}
#' \item{psi}{function. \eqn{\psi} defined on p. 3 of \insertCite{deluca2018glm}{WALS}.}
#' \item{initializeY}{function. Preprocesses the response, e.g. in
#' \code{binomialWALS()} it transforms factors to numeric 0s and 1s.}
#' \item{transformY}{function. Transforms the response to \eqn{\bar{y}}.
#' See eq. (5) in \insertCite{deluca2018glm}{WALS} for GLMs and
#' \insertCite{huynhwalsnb}{WALS} for \code{negbinWALS()} used in
#' \code{\link[WALS]{walsNB}}.}
#' \item{transformX}{function. Transforms the regressors to \eqn{\bar{X}_1} and
#' \eqn{\bar{X}_2}, respectively. See eq. (5) in \insertCite{deluca2018glm}{WALS}
#' for GLMs and \insertCite{huynhwalsnb}{WALS} for \code{negbinWALS()} used in
#' \code{\link[WALS]{walsNB}}.}
#' \item{density}{function. The probability density/mass function of the family.}
#'
#' \code{poissonWALS()} and \code{negbinFixedWALS()} return objects of class
#' \code{"familyWALScount"} that inherit from \code{"familyWALS"} and
#' \code{"family"}. These are lists that contain the same elements as
#' \code{"familyWALS"} objects described above.
#'
#' @references
#' \insertAllCited{}
#'
#' @seealso [family], [walsGLM].
#'
#' @examples
#' ## Use in walsGLM():
#' data("NMES1988", package = "AER")
#' NMES1988 <- na.omit(NMES1988)
#' fitPoisson <- walsGLM(emergency ~ health + chronic + age + gender |
#' I((age^2)/10) + married + region, family = poissonWALS(),
#' data = NMES1988, prior = laplace())
#' summary(fitPoisson)
#'
#' ## Plot derivatives of binomialWALS() with default 'logit' link:
#' bi <- binomialWALS()
#' plot(bi$mu.eta, from = -10, to = 10)
#' plot(bi$theta.eta, from = -10, to = 10) # constant. logit is canonical link
#'
#' @aliases familyWALScount
#' @export
familyWALS <- function(object, ...) UseMethod("familyWALS", object)
#' Extends \code{poisson} family from \code{stats} with transformations required
#' for \code{walsGLM}.
#' @rdname familyWALS
#' @export
poissonWALS <- function(link = "log") {
# copy default family
fam <- poisson(link)
if (fam$link == "log") {
# Canonical link
# d theta/ d eta as function of linear link eta
fam$theta.eta <- function(etaStart) rep(1.0, length.out = length(etaStart))
fam$psi <- function(etaStart) fam$variance(fam$linkinv(etaStart))
# could insert specialized versions of transformX and transformY here
# if they are numerically better
} else stop(sprintf("%s link not implemented yet", fam$link))
fam$initializeY <- function(y) {
# only checks y
if (any(y < 0)) stop("negative values not allowed for the 'Poisson' family")
return(y)
}
## functions for Xbar_1, Xbar_2 and ybar
# does nothing with y, just passes it
fam$transformX <- function(X, etaStart, y = NULL) transformX(X, etaStart,
fam$psi(etaStart))
fam$transformY <- function(y, X1bar, X2bar, beta1, beta2, etaStart) {
y <- fam$initializeY(y)
transformY(y, X1bar, X2bar, beta1, beta2, etaStart,
vBar = fam$theta.eta(etaStart), muBar = fam$linkinv(etaStart),
psiBar = fam$psi(etaStart))
}
fam$density <- function(x, eta, log = FALSE, ...) {
mu <- fam$linkinv(eta)
return(dpois(x, lambda = mu, log = log))
}
# Support for R < 4.3.0, so summary.walsGLM works
if (is.null(fam$dispersion)) fam$dispersion <- 1
class(fam) <- c("familyWALScount", "familyWALS", class(fam))
return(fam)
}
#' Extends \code{binomial} family from \code{stats} with transformations required
#' for \code{walsGLM}.
#' @rdname familyWALS
#' @export
binomialWALS <- function(link = "logit") {
fam <- binomial(link)
if (fam$link == "logit") {
# Canonical link
fam$theta.eta <- function(etaStart) rep(1.0, length.out = length(etaStart))
fam$psi <- function(etaStart) fam$variance(fam$linkinv(etaStart))
} else stop(sprintf("%s link not implemented yet", fam$link))
fam$initializeY <- function(y) {
# convert factor to numeric
if (is.factor(y)) y <- y != levels(y)[1L]
return(y)
}
## functions for Xbar_1, Xbar_2 and ybar
# does nothing with y, just passes it
fam$transformX <- function(X, etaStart, y = NULL) transformX(X, etaStart,
fam$psi(etaStart))
fam$transformY <- function(y, X1bar, X2bar, beta1, beta2, etaStart) {
y <- fam$initializeY(y)
transformY(y, X1bar, X2bar, beta1, beta2, etaStart,
vBar = fam$theta.eta(etaStart), muBar = fam$linkinv(etaStart),
psiBar = fam$psi(etaStart))
}
fam$density <- function(x, eta, log = FALSE, ...) {
mu <- fam$linkinv(eta)
return(dbinom(x, size = 1, prob = mu, log = log, ...))
}
# Support for R < 4.3.0, so summary.walsGLM works
if (is.null(fam$dispersion)) fam$dispersion <- 1
class(fam) <- c("familyWALS", class(fam))
return(fam)
}
#' Negative binomial family
#'
#' Reconstruct family object for negative binomial type 2 (NB2) with fixed
#' scale parameter theta. Analogous to \code{\link[MASS]{negative.binomial}} in
#' \code{MASS} \insertCite{mass2002}{WALS} but \code{MASS} uses non-canonical link.
#'
#' @param theta dispersion parameter of NB2, always larger than 0.
#' @param link specifies link function, currently only "log" and "canonical"
#' are supported.
#'
#' @references
#' \insertAllCited{}
#'
#' @seealso [family], [familyWALS], [negbinWALS], [negbinFixedWALS].
negativeBinomial <- function(theta, link = "log") {
if (link == "canonical") {
# implementing functions required in glm.fit
linkfun <- function(mu) log(mu) - log(mu + theta)
variance <- function(mu) mu*(1 + mu/theta)
linkinv <- function(eta) theta/(exp(-eta) - 1)
mu.eta <- function(eta) theta * (exp(-eta) / ((exp(-eta) - 1)^2))
## alternatively (maybe more numerically stable)
# mu.eta <- function(eta) theta / (exp(eta) - 2 + exp(-eta))
valideta <- function(eta) TRUE
validmu <- function(mu) all(is.finite(mu)) && all(mu > 0)
## copied from MASS
aic <- function(y, n, mu, wt, dev) {
term <- ( (y + theta) * log(mu + theta) - y * log(mu) + lgamma(y + 1)
- theta * log(theta) + lgamma(theta) - lgamma(theta + y) )
return(2 * sum(term * wt))
}
dev.resids <- function(y, mu, wt) {
return(2 * wt * (y * log(pmax(1, y)/mu) - (y + theta) * log((y + theta)/(mu + theta))))
}
# initializes mustart and n for IRLS in glm.fit
initialize <- expression({
if (any(y < 0))
stop("negative values not allowed for the negative binomial family")
n <- rep(1, nobs)
mustart <- y + (y == 0)/6
})
famname <- paste("Negative Binomial(", format(round(theta, 4)), ")",
sep = "")
return(structure(list(family = famname, link = link, linkfun = linkfun,
linkinv = linkinv, variance = variance,
dev.resids = dev.resids, aic = aic, mu.eta = mu.eta,
initialize = initialize, validmu = validmu,
valideta = valideta, simulate = NULL),
class = "family"))
} else {
return(MASS::negative.binomial(theta = theta, link = link))
}
}
#' NB2 family with fixed dispersion parameter and additional functions
#' @rdname familyWALS
#' @export
negbinFixedWALS <- function(scale, link) {
alpha <- log(scale)
fam <- negativeBinomial(theta = scale, link = link)
fam$initializeY <- function(y) {
# only checks for valid values
if (any(y < 0)) stop("negative values not allowed for the negative binomial family")
return(y)
}
if (fam$link == "canonical") {
# Canonical link
# d theta/ d eta as function of linear link eta
fam$theta.eta <- function(etaStart) rep(1.0, length.out = length(etaStart))
fam$psi <- function(etaStart) scale * (exp(-etaStart) / ((exp(-etaStart) - 1)^2))
## alternatively (maybe more numerically stable)
# fam$psi <- function(etaStart) scale / (exp(etaStart) - 2 + exp(-etaStart))
fam$transformX <- function(X, etaStart, y = NULL) {
# does nothing with y, just passes it
# as.vector to allow broadcasting along columns
return(as.vector( (sqrt(scale)*exp(0.5*etaStart)) / (1 - exp(etaStart)) ) * X)
}
fam$transformY <- function(y, X1bar, X2bar, beta1, beta2, etaStart) {
y <- fam$initializeY(y)
return( X1bar %*% beta1 + X2bar %*% beta2
+ (exp(-0.5)/sqrt(scale)) * (y*(1 - exp(etaStart)) - scale*exp(etaStart)) )
}
} else if (fam$link == "log") {
# d theta / d eta as function of linear link eta and y
fam$theta.eta <- function(eta) scale / (exp(eta) + scale)
fam$psi <- function(eta, y) {
return(exp( eta + alpha + ( log(y + scale) - 2.0*log(exp(eta) + scale) ) ))
}
fam$transformX <- function(X, eta, y) { # transform X to Xbar
# the first term in front of X is sqrt(psi). Explicitly compute it
# instead of using out$psi so we can pull sqrt() into exp() and avoid
# squaring the denominator.
return(as.numeric( (exp(0.5*(eta + alpha)) * sqrt(y + scale)) /
(exp(eta) + scale) ) * X)
}
fam$transformY <- function(y, X1bar, X2bar, beta1, beta2, etaStart) {
y <- fam$initializeY(y)
return( X1bar %*% beta1 + X2bar %*% beta2
+ exp(-0.5*etaStart) * sqrt( scale / (scale + y))
* (y - exp(etaStart)) )
}
} else stop(sprintf("%s link not implemented yet", fam$link))
fam$density <- function(x, eta, log = FALSE, ...) {
mu <- fam$linkinv(eta)
return(dnbinom(x, size = scale, mu = mu, log = log))
}
class(fam) <- c("familyWALScount", "familyWALS", class(fam))
return(fam)
}
#' NB2 family with variable scale parameter and additional functions
#' @rdname familyWALS
#' @aliases familyNBWALS
#' @param scale dispersion parameter of NB2 to be used, always larger than 0.
#'
#' @returns \code{negbinWALS()} creates an object of class \code{"familyNBWALS"}
#' (only used internally in \code{\link[WALS]{walsNB}}) that inherits from
#' \code{"familyWALScount"}, \code{"familyWALS"} and \code{"\link[stats]{family}"}.
#' This is a list that contains all elements returned from \code{negbinFixed}
#' and the elements described above for objects of class \code{"familyWALS"}.
#' Additionally contains the following elements with functions required in
#' \code{\link[WALS]{walsNB}} that are described in
#' \insertCite{huynhwalsnb}{WALS}:
#' \item{q}{function. Computes \eqn{\bar{q}}.}
#' \item{g}{function. Computes \eqn{\bar{g}}.}
#' \item{transformY0}{function. Computes \eqn{\bar{y}_0}.}
#' \item{t}{function. Computes \eqn{\bar{t}}.}
#' \item{epsilon}{function. Computes \eqn{\bar{\epsilon}}.}
#' \item{epsiloninv}{function. Computes \eqn{\bar{\epsilon}^{-1}}.}
#' \item{kappaSum}{function. Computes \eqn{\bar{\kappa}^{\top} \mathbf{1}}.}
#' \item{computeAlpha}{function. Computes the log-dispersion parameter
#' \eqn{\log(\rho)} given (model-averaged) estimates of the regression
#' coefficients of the transformed regressors \eqn{\gamma_{1}} and \eqn{\gamma_{2}}.}
#'
#' @export
negbinWALS <- function(scale, link) {
alpha <- log(scale)
out <- negbinFixedWALS(scale, link)
if (link == "log") {
out$q <- function(eta, y) { # q_i = C'(y-muStart)
return(exp(eta - 2*log(exp(eta) + exp(alpha))) * (y - exp(eta)))
}
out$g <- function() return(scale)
out$transformY0 <- function(y, X1bar, X2bar, beta1, beta2, eta) {
y <- out$initializeY(y)
mu <- exp(eta)
uplus <- ((y - mu) * ( exp(0.5*(alpha - eta - log(y + scale))) *
((mu*(1.0 - alpha) + scale)/(mu + scale)) ))
return(X1bar %*% beta1 + X2bar %*% beta2 + uplus)
}
out$t <- function(eta, y) {
n <- length(y)
mu <- exp(eta)
sum1scale <- sum((y - mu) / (exp(2.0*eta - alpha) + 2.0*exp(eta) + exp(alpha)))
sum2scale <- sum(mu / (exp(eta - alpha) + 1))
g2ksum <- (sum1scale + sum2scale +
scale * (sum(trigamma(y + scale)) - n*trigamma(scale)))
return(scale * ( out$kappaSum(eta, y)*(1.0 - alpha) - sum(out$q(eta,y)*eta)) -
alpha*(g2ksum))
}
out$epsilon <- function(eta, y) {
return(1.0 / out$epsiloninv(eta, y))
}
out$epsiloninv <- function(eta, y) {
n <- length(y)
mu <- exp(eta)
sum1scale <- sum((y - mu) / (exp(2.0*eta - alpha) + 2.0*exp(eta) + exp(alpha)))
sum2 <- sum((2.0*mu - y) / (mu + scale))
return(sum1scale + sum2 + (sum(digamma(y + scale)) - n*digamma(scale)) -
sum(log(mu + scale)) +
scale*(sum(trigamma(y + scale)) - n*trigamma(scale)) + n * alpha)
}
out$kappaSum <- function(eta, y){
n <- length(y)
mu <- exp(eta)
return(sum((mu - y)/(mu + scale)) - (sum(log(mu + scale)) - n*alpha) +
(sum(digamma(y + scale)) - n * digamma(scale)))
}
out$computeAlpha <- function(gamma1, gamma2, Z1, Z2, y, eta, q) {
kappaSum <- out$kappaSum(eta, y)
sum2 <- out$epsilon(eta, y) * sum(q * (Z1 %*% gamma1 + Z2 %*% gamma2))
return((sum(q*eta) - kappaSum) / out$epsiloninv(eta, y) + alpha - sum2)
}
} else {
stop("link not implemented")
}
class(out) <- c("familyNBWALS", class(out))
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.