R/familyWALS.R

Defines functions negbinWALS negbinFixedWALS negativeBinomial binomialWALS poissonWALS familyWALS

Documented in binomialWALS familyWALS negativeBinomial negbinFixedWALS negbinWALS poissonWALS

#' Extended Family Objects for Models
#'
#' Objects of class \code{"familyWALS"} inherit from \code{"\link[stats]{family}"}
#' and extend those with (transformation) functions required for
#' \code{\link[WALS]{walsGLM}} and \code{\link[WALS]{walsNB}}.
#'
#' @param link Specifies the model link function. Typically a character string
#' or an object of class \code{"link-glm"} generated by
#' \code{\link[stats]{make.link}}. See \code{\link[stats]{family}} for more details.
#' Currently, only a limited number of links are supported. See below for more
#' details.
#' @param object The function \code{familyWALS()} extracts the family objects stored
#' in \code{"walsGLM"} objects.
#' @param ... Further arguments passed to methods.
#'
#' The \code{negbinWALS()} family currently only accepts \code{"log"}, while
#' \code{negbinFixedWALS()} supports both \code{"log"} and \code{"canonical"}.
#'
#' @details
#' \code{familyWALS()} is a generic function that extracts the family used in
#' \code{"walsGLM"} objects.
#'
#' \code{negbinFixedWALS()} creates the \code{"familyWALS"} object for negative
#' binomial distribution type 2 (NB2) with fixed dispersion parameter. It extends
#' \code{\link[WALS]{negativeBinomial}}.
#'
#' \code{negbinWALS()} creates objects of the specialized class \code{"familyNBWALS"}
#' which inherits from \code{"familyWALS"} and \code{"family"}. It constructs the
#' \code{"familyNBWALS"} object for the negative binomial distribution type 2 (NB2)
#' with variable dispersion parameter by extending \code{\link[WALS]{negativeBinomial}}
#' and \code{negbinFixedWALS} with functions required in \code{\link[WALS]{walsNB}.}
#' **\code{negbinWALS} should only be used in \code{\link[WALS]{walsNBfit}} and
#' not in \code{\link[WALS]{walsGLM}} because the NB2 with variable dispersion
#' parameter is not a GLM!**
#'
#' ### Supported links
#' Currently, \code{binomialWALS()} and \code{poissonWALS()} only support their
#' canonical links, i.e. \code{"logit"} and \code{"log"}, respectively.
#' \code{negbinFixedWALS()} supports both, the \code{"canonical"} link and
#' the \code{"log"} link, however, the first is not recommended due to numerical
#' difficulties in the fitting process. In contrast, \code{negbinWALS()} only
#' supports the \code{"log"} link.
#'
#' @returns An object of class \code{"familyWALS"} to be used in
#' \code{\link[WALS]{walsGLM}} that inherits from \code{"\link[stats]{family}"}.
#' This is a list that contains elements returned from the corresponding family
#' function that it extends. Additionally, the following elements are available:
#' \item{theta.eta}{function. Derivative of the canonical parameter \eqn{\theta}
#' with respect to the linear link \eqn{\eta}, i.e. \eqn{d \theta / d \eta}.}
#' \item{psi}{function. \eqn{\psi} defined on p. 3 of \insertCite{deluca2018glm}{WALS}.}
#' \item{initializeY}{function. Preprocesses the response, e.g. in
#' \code{binomialWALS()} it transforms factors to numeric 0s and 1s.}
#' \item{transformY}{function. Transforms the response to \eqn{\bar{y}}.
#' See eq. (5) in \insertCite{deluca2018glm}{WALS} for GLMs and
#' \insertCite{huynhwalsnb}{WALS} for \code{negbinWALS()} used in
#' \code{\link[WALS]{walsNB}}.}
#' \item{transformX}{function. Transforms the regressors to \eqn{\bar{X}_1} and
#' \eqn{\bar{X}_2}, respectively. See eq. (5) in \insertCite{deluca2018glm}{WALS}
#' for GLMs and \insertCite{huynhwalsnb}{WALS} for \code{negbinWALS()} used in
#' \code{\link[WALS]{walsNB}}.}
#' \item{density}{function. The probability density/mass function of the family.}
#'
#' \code{poissonWALS()} and \code{negbinFixedWALS()} return objects of class
#' \code{"familyWALScount"} that inherit from \code{"familyWALS"} and
#' \code{"family"}. These are lists that contain the same elements as
#' \code{"familyWALS"} objects described above.
#'
#' @references
#' \insertAllCited{}
#'
#' @seealso [family], [walsGLM].
#'
#' @examples
#' ## Use in walsGLM():
#' data("NMES1988", package = "AER")
#' NMES1988 <- na.omit(NMES1988)
#' fitPoisson <- walsGLM(emergency ~ health + chronic + age + gender |
#'                        I((age^2)/10) + married + region, family = poissonWALS(),
#'                       data = NMES1988, prior = laplace())
#' summary(fitPoisson)
#'
#' ## Plot derivatives of binomialWALS() with default 'logit' link:
#' bi <- binomialWALS()
#' plot(bi$mu.eta, from = -10, to = 10)
#' plot(bi$theta.eta, from = -10, to = 10) # constant. logit is canonical link
#'
#' @aliases familyWALScount
#' @export
familyWALS <- function(object, ...) UseMethod("familyWALS", object)


#' Extends \code{poisson} family from \code{stats} with transformations required
#' for \code{walsGLM}.
#' @rdname familyWALS
#' @export
poissonWALS <- function(link = "log") {
  # copy default family
  fam <- poisson(link)


  if (fam$link == "log") {
    # Canonical link

    # d theta/ d eta as function of linear link eta
    fam$theta.eta <- function(etaStart) rep(1.0, length.out = length(etaStart))
    fam$psi <- function(etaStart) fam$variance(fam$linkinv(etaStart))

    # could insert specialized versions of transformX and transformY here
    # if they are numerically better

  } else stop(sprintf("%s link not implemented yet", fam$link))

  fam$initializeY <- function(y) {
    # only checks y
    if (any(y < 0)) stop("negative values not allowed for the 'Poisson' family")
    return(y)
  }

  ## functions for Xbar_1, Xbar_2 and ybar
  # does nothing with y, just passes it
  fam$transformX <- function(X, etaStart, y = NULL) transformX(X, etaStart,
                                                               fam$psi(etaStart))
  fam$transformY <- function(y, X1bar, X2bar, beta1, beta2, etaStart) {
    y <- fam$initializeY(y)
    transformY(y, X1bar, X2bar, beta1, beta2, etaStart,
               vBar = fam$theta.eta(etaStart), muBar = fam$linkinv(etaStart),
               psiBar = fam$psi(etaStart))
  }

  fam$density <- function(x, eta, log = FALSE, ...) {
    mu <- fam$linkinv(eta)
    return(dpois(x, lambda = mu, log = log))
  }

  # Support for R < 4.3.0, so summary.walsGLM works
  if (is.null(fam$dispersion)) fam$dispersion <- 1

  class(fam) <- c("familyWALScount", "familyWALS", class(fam))
  return(fam)
}

#' Extends \code{binomial} family from \code{stats} with transformations required
#' for \code{walsGLM}.
#' @rdname familyWALS
#' @export
binomialWALS <- function(link = "logit") {
  fam <- binomial(link)

  if (fam$link == "logit") {
    # Canonical link

    fam$theta.eta <- function(etaStart) rep(1.0, length.out = length(etaStart))
    fam$psi <- function(etaStart) fam$variance(fam$linkinv(etaStart))

  } else stop(sprintf("%s link not implemented yet", fam$link))

  fam$initializeY <- function(y) {
    # convert factor to numeric
    if (is.factor(y)) y <- y != levels(y)[1L]
    return(y)
  }

  ## functions for Xbar_1, Xbar_2 and ybar
  # does nothing with y, just passes it
  fam$transformX <- function(X, etaStart, y = NULL) transformX(X, etaStart,
                                                               fam$psi(etaStart))
  fam$transformY <- function(y, X1bar, X2bar, beta1, beta2, etaStart) {
    y <- fam$initializeY(y)
    transformY(y, X1bar, X2bar, beta1, beta2, etaStart,
               vBar = fam$theta.eta(etaStart), muBar = fam$linkinv(etaStart),
               psiBar = fam$psi(etaStart))
  }

  fam$density <- function(x, eta, log = FALSE, ...) {
    mu <- fam$linkinv(eta)
    return(dbinom(x, size = 1, prob = mu, log = log, ...))
  }

  # Support for R < 4.3.0, so summary.walsGLM works
  if (is.null(fam$dispersion)) fam$dispersion <- 1

  class(fam) <- c("familyWALS", class(fam))
  return(fam)
}


#' Negative binomial family
#'
#' Reconstruct family object for negative binomial type 2 (NB2) with fixed
#' scale parameter theta. Analogous to \code{\link[MASS]{negative.binomial}} in
#' \code{MASS} \insertCite{mass2002}{WALS} but \code{MASS} uses non-canonical link.
#'
#' @param theta dispersion parameter of NB2, always larger than 0.
#' @param link specifies link function, currently only "log" and "canonical"
#' are supported.
#'
#' @references
#' \insertAllCited{}
#'
#' @seealso [family], [familyWALS], [negbinWALS], [negbinFixedWALS].
negativeBinomial <- function(theta, link = "log") {
  if (link == "canonical") {
    # implementing functions required in glm.fit
    linkfun <- function(mu) log(mu) - log(mu + theta)
    variance <- function(mu) mu*(1 + mu/theta)
    linkinv <- function(eta) theta/(exp(-eta) - 1)
    mu.eta <- function(eta) theta * (exp(-eta) / ((exp(-eta) - 1)^2))
    ## alternatively (maybe more numerically stable)
    # mu.eta <- function(eta) theta / (exp(eta) - 2 + exp(-eta))
    valideta <- function(eta) TRUE
    validmu <- function(mu) all(is.finite(mu)) && all(mu > 0)

    ## copied from MASS
    aic <- function(y, n, mu, wt, dev) {
      term <- ( (y + theta) * log(mu + theta) - y * log(mu) + lgamma(y + 1)
                - theta * log(theta) + lgamma(theta) - lgamma(theta + y) )
      return(2 * sum(term * wt))
    }

    dev.resids <- function(y, mu, wt) {
      return(2 * wt * (y * log(pmax(1, y)/mu) - (y + theta) * log((y + theta)/(mu + theta))))
    }

    # initializes mustart and n for IRLS in glm.fit
    initialize <- expression({
      if (any(y < 0))
        stop("negative values not allowed for the negative binomial family")
      n <- rep(1, nobs)
      mustart <- y + (y == 0)/6
    })


    famname <- paste("Negative Binomial(", format(round(theta, 4)), ")",
                     sep = "")
    return(structure(list(family = famname, link = link, linkfun = linkfun,
                   linkinv = linkinv, variance = variance,
                   dev.resids = dev.resids, aic = aic, mu.eta = mu.eta,
                   initialize = initialize, validmu = validmu,
                   valideta = valideta, simulate = NULL),
              class = "family"))

  } else {
    return(MASS::negative.binomial(theta = theta, link = link))
  }
}



#' NB2 family with fixed dispersion parameter and additional functions
#' @rdname familyWALS
#' @export
negbinFixedWALS <- function(scale, link) {
  alpha <- log(scale)
  fam <- negativeBinomial(theta = scale, link = link)

  fam$initializeY <- function(y) {
    # only checks for valid values
    if (any(y < 0)) stop("negative values not allowed for the negative binomial family")
    return(y)
  }

  if (fam$link == "canonical") {
    # Canonical link

    # d theta/ d eta as function of linear link eta
    fam$theta.eta <- function(etaStart) rep(1.0, length.out = length(etaStart))

    fam$psi <- function(etaStart) scale * (exp(-etaStart) / ((exp(-etaStart) - 1)^2))

    ## alternatively (maybe more numerically stable)
    # fam$psi <- function(etaStart) scale / (exp(etaStart) - 2 + exp(-etaStart))

    fam$transformX <- function(X, etaStart, y = NULL) {
      # does nothing with y, just passes it
      # as.vector to allow broadcasting along columns
      return(as.vector( (sqrt(scale)*exp(0.5*etaStart)) / (1 - exp(etaStart)) ) * X)
    }
    fam$transformY <- function(y, X1bar, X2bar, beta1, beta2, etaStart) {
      y <- fam$initializeY(y)
      return( X1bar %*% beta1 + X2bar %*% beta2
              + (exp(-0.5)/sqrt(scale)) * (y*(1 - exp(etaStart)) - scale*exp(etaStart)) )
    }
  } else if (fam$link == "log") {

    # d theta / d eta as function of linear link eta and y
    fam$theta.eta <- function(eta) scale / (exp(eta) + scale)

    fam$psi <- function(eta, y) {
      return(exp( eta + alpha + ( log(y + scale) - 2.0*log(exp(eta) + scale) ) ))
    }

    fam$transformX <- function(X, eta, y) { # transform X to Xbar
      # the first term in front of X is sqrt(psi). Explicitly compute it
      # instead of using out$psi so we can pull sqrt() into exp() and avoid
      # squaring the denominator.
      return(as.numeric( (exp(0.5*(eta + alpha)) * sqrt(y + scale)) /
                           (exp(eta) + scale) ) * X)
    }

    fam$transformY <- function(y, X1bar, X2bar, beta1, beta2, etaStart) {
      y <- fam$initializeY(y)
      return( X1bar %*% beta1 + X2bar %*% beta2
              + exp(-0.5*etaStart) * sqrt( scale / (scale + y))
              * (y - exp(etaStart)) )
    }

  } else stop(sprintf("%s link not implemented yet", fam$link))


  fam$density <- function(x, eta, log = FALSE, ...) {
    mu <- fam$linkinv(eta)
    return(dnbinom(x, size = scale, mu = mu, log = log))
  }

  class(fam) <- c("familyWALScount", "familyWALS", class(fam))
  return(fam)
}

#' NB2 family with variable scale parameter and additional functions
#' @rdname familyWALS
#' @aliases familyNBWALS
#' @param scale dispersion parameter of NB2 to be used, always larger than 0.
#'
#' @returns \code{negbinWALS()} creates an object of class \code{"familyNBWALS"}
#' (only used internally in \code{\link[WALS]{walsNB}}) that inherits from
#' \code{"familyWALScount"}, \code{"familyWALS"} and \code{"\link[stats]{family}"}.
#' This is a list that contains all elements returned from \code{negbinFixed}
#' and the elements described above for objects of class \code{"familyWALS"}.
#' Additionally contains the following elements with functions required in
#' \code{\link[WALS]{walsNB}} that are described in
#' \insertCite{huynhwalsnb}{WALS}:
#' \item{q}{function. Computes \eqn{\bar{q}}.}
#' \item{g}{function. Computes \eqn{\bar{g}}.}
#' \item{transformY0}{function. Computes \eqn{\bar{y}_0}.}
#' \item{t}{function. Computes \eqn{\bar{t}}.}
#' \item{epsilon}{function. Computes \eqn{\bar{\epsilon}}.}
#' \item{epsiloninv}{function. Computes \eqn{\bar{\epsilon}^{-1}}.}
#' \item{kappaSum}{function. Computes \eqn{\bar{\kappa}^{\top} \mathbf{1}}.}
#' \item{computeAlpha}{function. Computes the log-dispersion parameter
#' \eqn{\log(\rho)} given (model-averaged) estimates of the regression
#' coefficients of the transformed regressors \eqn{\gamma_{1}} and \eqn{\gamma_{2}}.}
#'
#' @export
negbinWALS <- function(scale, link) {
  alpha <- log(scale)
  out <- negbinFixedWALS(scale, link)

  if (link == "log") {
    out$q <- function(eta, y) { # q_i = C'(y-muStart)
      return(exp(eta - 2*log(exp(eta) + exp(alpha))) * (y - exp(eta)))
    }

    out$g <- function() return(scale)

    out$transformY0 <- function(y, X1bar, X2bar, beta1, beta2, eta) {
      y <- out$initializeY(y)
      mu <- exp(eta)
      uplus <- ((y - mu) * ( exp(0.5*(alpha - eta - log(y + scale))) *
        ((mu*(1.0 - alpha) + scale)/(mu + scale))  ))

      return(X1bar %*% beta1 + X2bar %*% beta2 + uplus)
    }

    out$t <- function(eta, y) {
      n <- length(y)
      mu <- exp(eta)
      sum1scale <- sum((y - mu) / (exp(2.0*eta - alpha) + 2.0*exp(eta) + exp(alpha)))
      sum2scale <- sum(mu / (exp(eta - alpha) + 1))
      g2ksum <- (sum1scale + sum2scale +
                   scale * (sum(trigamma(y + scale)) - n*trigamma(scale)))

      return(scale * ( out$kappaSum(eta, y)*(1.0 - alpha) - sum(out$q(eta,y)*eta)) -
               alpha*(g2ksum))
    }

    out$epsilon <- function(eta, y) {
      return(1.0 / out$epsiloninv(eta, y))
    }

    out$epsiloninv <- function(eta, y) {
      n <- length(y)
      mu <- exp(eta)
      sum1scale <- sum((y - mu) / (exp(2.0*eta - alpha) + 2.0*exp(eta) + exp(alpha)))
      sum2 <- sum((2.0*mu - y) / (mu + scale))

      return(sum1scale + sum2 + (sum(digamma(y + scale)) - n*digamma(scale)) -
        sum(log(mu + scale)) +
        scale*(sum(trigamma(y + scale)) - n*trigamma(scale)) + n * alpha)
    }

    out$kappaSum <- function(eta, y){
      n <- length(y)
      mu <- exp(eta)
      return(sum((mu - y)/(mu + scale)) - (sum(log(mu + scale)) - n*alpha) +
        (sum(digamma(y + scale)) - n * digamma(scale)))
    }

    out$computeAlpha <- function(gamma1, gamma2, Z1, Z2, y, eta, q) {
      kappaSum <- out$kappaSum(eta, y)
      sum2 <- out$epsilon(eta, y) * sum(q * (Z1 %*% gamma1 + Z2 %*% gamma2))

      return((sum(q*eta) - kappaSum) / out$epsiloninv(eta, y) + alpha - sum2)
    }

  } else {
    stop("link not implemented")
  }


  class(out) <- c("familyNBWALS", class(out))
  return(out)
}

Try the WALS package in your browser

Any scripts or data that you put into this service are public.

WALS documentation built on June 22, 2024, 9:42 a.m.