R/zerotrunc.R
In countreg: Count Data Regression

Documented in coef.zerotrunc estfun.zerotrunc extractAIC.zerotrunc fitted.zerotrunc getSummary.zerotrunc logLik.zerotrunc model.frame.zerotrunc model.matrix.zerotrunc nobs.zerotrunc predict.zerotrunc predprob.zerotrunc print.summary.zerotrunc print.zerotrunc residuals.zerotrunc summary.zerotrunc terms.zerotrunc vcov.zerotrunc zerotrunc zerotrunc.control

zerotrunc <- function(formula, data, subset, na.action, weights, offset,
                      dist = c("poisson", "negbin", "geometric"), theta = Inf,
	   	      control = zerotrunc.control(...),
		      model = TRUE, y = TRUE, x = FALSE, ...)
{
  ## set up likelihood components
  llPoisson <- function(parms) {
    mu <- as.vector(exp(X %*% parms + offset))
    sum(weights * (
      dpois(Y, lambda = mu, log = TRUE) - ppois(0, lambda = mu, lower.tail = FALSE, log.p = TRUE)
    ))
  }

  llNegBin <- function(parms) {
    ## parameters
    mu <- as.vector(exp(X %*% parms[1:k] + offset))
    theta <- exp(parms[k+1])
    sum(weights * suppressWarnings(
      dnbinom(Y, size = theta, mu = mu, log = TRUE) - 
      pnbinom(0, size = theta, mu = mu, lower.tail = FALSE, log.p = TRUE)
    ))
  }

  llGeom <- function(parms) llNegBin(c(parms, 0))

  llNBFixed <- function(parms) llNegBin(c(parms, log(theta)))

  gradPoisson <- function(parms) {
    eta <- as.vector(X %*% parms + offset)
    mu <- exp(eta)
    colSums(((Y - mu) - exp(ppois(0, lambda = mu, log.p = TRUE) -
      ppois(0, lambda = mu, lower.tail = FALSE, log.p = TRUE) + eta)) * weights * X)
  }
  
  gradGeom <- function(parms) {
    eta <- as.vector(X %*% parms + offset)
    mu <- exp(eta)      
    colSums(((Y - mu * (Y + 1)/(mu + 1)) -
      exp(pnbinom(0, mu = mu, size = 1, log.p = TRUE) -
        pnbinom(0, mu = mu, size = 1, lower.tail = FALSE, log.p = TRUE) -
	log(mu + 1) + eta)) * weights * X)
  }

  gradNegBin <- function(parms) {
    eta <- as.vector(X %*% parms[1:k] + offset)
    mu <- exp(eta)      
    theta <- exp(parms[k+1])
    logratio <- pnbinom(0, mu = mu, size = theta, log.p = TRUE) -
        pnbinom(0, mu = mu, size = theta, lower.tail = FALSE, log.p = TRUE)
    rval <- colSums(((Y - mu * (Y + theta)/(mu + theta)) -
      exp(logratio + log(theta) - log(mu + theta) + eta)) * weights * X)
    rval2 <- sum((digamma(Y + theta) - digamma(theta) +    
      log(theta) - log(mu + theta) + 1 - (Y + theta)/(mu + theta) +
      exp(logratio) * (log(theta) - log(mu + theta) + 1 - theta/(mu + theta))) * weights) * theta
    c(rval, rval2)
  }  
  
  gradNBFixed <- function(parms) {
    eta <- as.vector(X %*% parms + offset)
    mu <- exp(eta)      
    logratio <- pnbinom(0, mu = mu, size = theta, log.p = TRUE) -
        pnbinom(0, mu = mu, size = theta, lower.tail = FALSE, log.p = TRUE)
    colSums(((Y - mu * (Y + theta)/(mu + theta)) -
      exp(logratio + log(theta) - log(mu + theta) + eta)) * weights * X)
  }

  ## dist/theta processing
  if(!missing(theta)) {
    if(!missing(dist)) warning("supply either 'theta' or 'dist' but not both, 'dist' is ignored")
    if(!is.null(theta) && theta <= 0) stop("theta has to be > 0")
    dist <- if(is.null(theta)) {
      "negbin"
    } else if(!is.finite(theta)) {
      "poisson"
    } else if(isTRUE(all.equal(theta, 1))) {
      "geometric"
    } else {
      "negbin-fixed"
    }
  } else {
    dist <- match.arg(dist)
    theta <- switch(dist,
      "poisson" = Inf,
      "geometric" = 1,
      "negbin" = NULL)
  }

  ## collect likelihood components
  llfun <- switch(dist,
                  "poisson" = llPoisson,
	          "geometric" = llGeom,
	          "negbin" = llNegBin,
		  "negbin-fixed" = llNBFixed)
  grad <- switch(dist,
                 "poisson" = gradPoisson,
		 "geometric" = gradGeom,
		 "negbin" = gradNegBin,
		 "negbin-fixed" = gradNBFixed)

  ## call and formula
  cl <- match.call()
  if(missing(data)) data <- environment(formula)
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "subset", "na.action", "weights", "offset"), names(mf), 0)
  mf <- mf[c(1, m)]
  mf$drop.unused.levels <- TRUE
  
  ## call model.frame()
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  
  ## extract terms, model matrices, response
  mt <- terms(formula, data = data)
  X <- model.matrix(mt, mf)
  Y <- model.response(mf, "numeric")

  ## sanity checks
  if(length(Y) < 1) stop("empty model")
  if(any(Y < 0)) stop("invalid dependent variable, negative counts")
  if(any(Y == 0)) stop("invalid dependent variable, no zeros allowed")  
  if(!isTRUE(all.equal(as.vector(Y), as.integer(round(Y + 0.001)))))
    stop("invalid dependent variable, non-integer values")
  Y <- as.integer(round(Y + 0.001))
  
  ## convenience variables
  n <- length(Y)
  k <- NCOL(X)

  ## weights and offset
  weights <- model.weights(mf)
  if(is.null(weights)) weights <- 1
  if(length(weights) == 1) weights <- rep(weights, n)
  weights <- as.vector(weights)
  names(weights) <- rownames(mf)
  
  offset <- model.offset(mf)
  if(is.null(offset)) offset <- 0
  if(length(offset) == 1) offset <- rep(offset, n)
  offset <- as.vector(offset)

  ## starting values
  start <- control$start
  start_theta <- NULL
  if(!is.null(start)) {
    if(dist != "negbin") {
      start_theta <- NULL
      if(length(start) != k) {
        warning("invalid starting values, model coefficients not correctly specified")
        start <- NULL
      }
    } else {
      if(length(start) == k + 1) {
        start_theta <- start[k+1]
        start <- start[1:k]
      } else if(length(start) == k) {
        start_theta <- NULL
      } else {
   	warning("invalid starting values, model coefficients not correctly specified")
   	start <- start_theta <- NULL  
      }
    }
  }
  
  ## default starting values
  if(is.null(start)) start <- glm.fit(X, Y, family = poisson(), weights = weights, offset = offset)$coefficients
  if(is.null(start_theta)) start_theta <- 1
  if(dist == "negbin") start <- c(start, log(start_theta))

  ## model fitting
  ## control parameters
  method <- control$method
  hessian <- control$hessian
  ocontrol <- control
  control$method <- control$hessian <- control$start <- NULL

  ## ML estimation
  fit <- optim(fn = llfun, gr = grad, par = start,
    method = method, hessian = hessian, control = control)

  ## coefficients
  cf <- fit$par[1:k]
  if(dist == "negbin") theta <- as.vector(exp(fit$par[k + 1]))
  names(cf) <- colnames(X)

  ## covariances
  vc <- if(hessian) {
    tryCatch(-solve(as.matrix(fit$hessian)),
      error = function(e) {
        warning(e$message, call = FALSE)
        matrix(NA_real_, nrow = k + (dist == "negbin"), ncol = k + (dist == "negbin"))
      })
  } else {
    matrix(NA_real_, nrow = k + (dist == "negbin"), ncol = k + (dist == "negbin"))
  }
  if(dist == "negbin") {
    SE.logtheta <- as.vector(sqrt(diag(vc)[k + 1]))
    vc <- vc[-(k+1), -(k+1), drop = FALSE]
  } else {
    SE.logtheta <- NULL
  }
  colnames(vc) <- rownames(vc) <- colnames(X)

  ## fitted and residuals
  mu <- exp(X %*% cf + offset)[,1]
  p0 <- if(dist == "poisson") ppois(0, lambda = mu, lower.tail = FALSE, log.p = TRUE)
    else pnbinom(0, size = theta, mu = mu, lower.tail = FALSE, log.p = TRUE)
  Yhat <- exp(log(mu) - p0)
  res <- sqrt(weights) * (Y - Yhat)

  ## effective observations
  nobs <- sum(weights > 0) ## = n - sum(weights == 0)

  rval <- list(coefficients = cf,
    residuals = res,
    fitted.values = Yhat,
    optim = fit,
    method = method,
    control = control,
    start = start,
    weights = if(identical(as.vector(weights), rep(1, n))) NULL else weights,
    offset = if(identical(offset, rep(0, n))) NULL else offset,
    n = nobs,
    df.null = nobs - 1 - (dist == "negbin"),
    df.residual = nobs - k - (dist == "negbin"),
    terms = mt,
    theta = theta,
    SE.logtheta = SE.logtheta,
    loglik = fit$value,
    vcov = vc,
    dist = dist,
    converged = fit$convergence < 1,
    call = cl,
    formula = formula,
    levels = .getXlevels(mt, mf),
    contrasts = attr(X, "contrasts")
  )
  if(model) rval$model <- mf
  if(y) rval$y <- Y
  if(x) rval$x <- X
      
  class(rval) <- "zerotrunc"
  return(rval)
}

zerotrunc.control <- function(method = "BFGS", maxit = 10000, start = NULL, ...) {
  rval <- list(method = method, maxit = maxit, start = start)
  rval <- c(rval, list(...))
  if(!is.null(rval$fnscale)) warning("fnscale must not be modified")
  rval$fnscale <- -1
  if(!is.null(rval$hessian)) warning("hessian must not be modified")
  rval$hessian <- TRUE
  if(is.null(rval$reltol)) rval$reltol <- .Machine$double.eps^(1/1.6)
  rval
}

coef.zerotrunc <- function(object, ...) {
  object$coefficients
}

vcov.zerotrunc <- function(object, ...) {
  object$vcov
}

logLik.zerotrunc <- function(object, ...) {
  structure(object$loglik, df = object$n - object$df.residual, nobs = object$n, class = "logLik")
}

nobs.zerotrunc <- function(object, ...) object$n

print.zerotrunc <- function(x, digits = max(3, getOption("digits") - 3), ...)
{
  if(x$dist == "negbin-fixed") {
    dist <- "negbin"
    fixed <- TRUE
  } else {
    dist <- x$dist
    fixed <- FALSE
  }

  cat("\nCall:", deparse(x$call, width.cutoff = floor(getOption("width") * 0.85)), "", sep = "\n")
  
  if(!x$converged) {
    cat("model did not converge\n")
  } else {
    cat(paste("Coefficients (truncated ", dist, " with log link):\n", sep = ""))
    print.default(format(x$coefficients, digits = digits), print.gap = 2, quote = FALSE)
    cat("\n")
    if(dist == "negbin") cat(paste(ifelse(fixed, "Theta (fixed) =", "Theta ="),
      round(x$theta, digits), "\n\n"))
  }
  
  invisible(x)
}

summary.zerotrunc <- function(object,...)
{
  ## deviance residuals
  object$residuals <- residuals(object, type = "deviance")

  ## compute z statistics
  cf <- object$coefficients
  se <- sqrt(diag(object$vcov))
  k <- length(cf)
  
  if(object$dist == "negbin") {
    cf <- c(cf, "Log(theta)" = as.vector(log(object$theta)))
    se <- c(se, object$SE.logtheta)
  }
  zstat <- cf/se
  pval <- 2*pnorm(-abs(zstat))
  cf <- cbind(cf, se, zstat, pval)
  colnames(cf) <- c("Estimate", "Std. Error", "z value", "Pr(>|z|)")
  object$coefficients <- cf

  ## number of iterations
  object$iterations <- tail(na.omit(object$optim$count), 1)
  
  ## delete some slots
  object$fitted.values <- object$terms <- object$model <- object$y <-
    object$x <- object$levels <- object$contrasts <- object$start <- NULL

  ## return
  class(object) <- "summary.zerotrunc"
  object
}

print.summary.zerotrunc <- function(x, digits = max(3, getOption("digits") - 3), ...)
{
  cat("\nCall:", deparse(x$call, width.cutoff = floor(getOption("width") * 0.85)), "", sep = "\n")
  
  if(!x$converged) {
    cat("model did not converge\n")
  } else {

    if(x$dist == "negbin-fixed") {
      dist <- "negbin"
      fixed <- TRUE
    } else {
      dist <- x$dist
      fixed <- FALSE
    }

    cat("Deviance residuals:\n")
    print(structure(quantile(x$residuals),
      names = c("Min", "1Q", "Median", "3Q", "Max")), digits = digits, ...)  

    cat(paste("\nCoefficients (truncated ", dist, " with log link):\n", sep = ""))
    printCoefmat(x$coefficients, digits = digits, ...)
  
    if(dist == "negbin") cat(paste(ifelse(fixed, "\nTheta (fixed) =", "\nTheta ="),
      round(x$theta, digits)))
    cat(paste("\nNumber of iterations in", x$method, "optimization:", x$iterations, "\n"))
    cat("Log-likelihood:", formatC(x$loglik, digits = digits), "on", x$n - x$df.residual, "Df\n")
  }
  
  invisible(x)
}

terms.zerotrunc <- function(x, ...) {
  x$terms
}

model.frame.zerotrunc <- function(formula, ...) {
  if(!is.null(formula$model)) return(formula$model)
  NextMethod()
}

model.matrix.zerotrunc <- function(object, ...) {
  rval <- if(!is.null(object$x)) object$x
    else model.matrix(object$terms, model.frame(object), contrasts = object$contrasts)
  return(rval)
}

predict.zerotrunc <- function(object, newdata, type = c("response", "prob", "count", "zero"),
  na.action = na.pass, ...)
{
    type <- match.arg(type)

    ## if no new data supplied
    if(missing(newdata)) {
      if(type != "response") {
        if(!is.null(object$x)) {
	  X <- object$x
	} else if(!is.null(object$model)) {
          X <- model.matrix(object$terms, object$model, contrasts = object$contrasts)
	} else {
	  stop("predicted probabilities cannot be computed with missing newdata")
	}
	offset <- if(is.null(object$offset)) rep(0, NROW(X)) else object$offset
      } else {
        return(object$fitted.values)
      }
    } else {
      mf <- model.frame(delete.response(object$terms), newdata, na.action = na.action, xlev = object$levels)
      X <- model.matrix(delete.response(object$terms), mf, contrasts = object$contrasts)
      offset <- if(!is.null(off.num <- attr(object$terms, "offset"))) 
          eval(attr(object$terms, "variables")[[off.num + 1]], newdata)
        else if(!is.null(object$offset)) eval(object$call$offset, newdata)
      if(is.null(offset)) offset <- rep(0, NROW(X))
    }

    mu <- exp(X %*% object$coefficients + offset)[,1]
    p0 <- if(object$dist == "poisson") ppois(0, lambda = mu, lower.tail = FALSE, log.p = TRUE)
      else pnbinom(0, size = object$theta, mu = mu, lower.tail = FALSE, log.p = TRUE)
    
    if(type == "response") rval <- exp(log(mu) - p0)
    if(type == "count") rval <- mu
    if(type == "zero") rval <- exp(p0)

    ## predicted probabilities
    if(type == "prob") {
      y <- if(!is.null(object$y)) object$y else model.response(model.frame(object))
      yUnique <- 1:max(y)
      nUnique <- length(yUnique)
      rval <- matrix(NA, nrow = length(mu), ncol = nUnique)
      dimnames(rval) <- list(rownames(X), yUnique)
      
      if(object$dist == "poisson") {
        for(i in 1:nUnique) rval[,i] <- exp(dpois(yUnique[i], lambda = mu, log = TRUE) - p0)
      } else {
        for(i in 1:nUnique) rval[,i] <- exp(dnbinom(yUnique[i], mu = mu, size = object$theta, log = TRUE) - p0)
      }
    }
   
    rval
}

fitted.zerotrunc <- function(object, ...) {
  object$fitted.values
}

residuals.zerotrunc <- function(object, type = c("deviance", "pearson", "response"), ...) {

  type <- match.arg(type)
  res <- object$residuals
  wts <- object$weights
  if(is.null(wts)) wts <- 1
  
  switch(type,
  
  "response" = {
    return(res)
  },
  
  "pearson" = {
    mu <- predict(object, type = "count")
    p0 <- mu/object$fitted.values
    theta1 <- 1/object$theta
    vv <- (mu + (1 + 1/object$theta - 1/p0) * mu^2) / p0
    return(res/sqrt(vv))
  },
  
  "deviance" = {
    yhat <- object$fitted.values
    y <- yhat + object$residuals/sqrt(wts)
    mu <- predict(object, type = "count")
    theta <- object$theta
    
    if(object$dist == "poisson") {
      mu2y <- function(mu) mu / ifelse(mu > 0, ppois(0, lambda = mu, lower.tail = FALSE), 1)
      y2mu <- function(y) {
        yunique <- sort(unique(y))
	munique <- sapply(yunique,
	  function(z) uniroot(function(mu) z - mu2y(mu), interval = c(0, z))$root)
	munique[factor(y, levels = yunique)]
      }      
      ll <- function(mu) {
  	dpois(y, lambda = mu, log = TRUE) -
        ppois(0, lambda = mu, lower.tail = FALSE, log.p = TRUE)
      }
    } else {
      mu2y <- function(mu) mu / ifelse(mu > 0, pnbinom(0, size = theta, mu = mu, lower.tail = FALSE), 1)
      y2mu <- function(y) {
        yunique <- sort(unique(y))
	munique <- sapply(yunique,
	  function(z) uniroot(function(mu) z - mu2y(mu), interval = c(0, z))$root)
	munique[factor(y, levels = yunique)]
      }
      ll <- function(mu) {
        dnbinom(y, size = theta, mu = mu, log = TRUE) - 
  	pnbinom(0, size = theta, mu = mu, lower.tail = FALSE, log.p = TRUE)
      }
    }
    
    return(sqrt(wts) * sign(y - yhat) * sqrt(2 * abs(ll(y2mu(y)) - ll(mu))))
  })
}

predprob.zerotrunc <- function(obj, ...){
    predict(obj, type = "prob", ...)
}

extractAIC.zerotrunc <- function(fit, scale = NULL, k = 2, ...) {
  c(attr(logLik(fit), "df"), AIC(fit, k = k))
}

estfun.zerotrunc <- function(x, ...) {
  ## extract data
  Y <- if(is.null(x$y)) model.response(model.frame(x)) else x$y
  X <- model.matrix(x)
  beta <- coef(x)
  theta <- x$theta
  offset <- if(is.null(x$offset)) 0 else x$offset
  wts <- weights(x)
  if(is.null(wts)) wts <- 1

  ## count component: working residuals
  eta <- as.vector(X %*% beta + offset)
  mu <- exp(eta)

  wres <- as.numeric(Y > 0) * switch(x$dist,
    "poisson" = {
      (Y - mu) - exp(ppois(0, lambda = mu, log.p = TRUE) -
        ppois(0, lambda = mu, lower.tail = FALSE, log.p = TRUE) + eta)    
    },
    "geometric" = {
      (Y - mu * (Y + 1)/(mu + 1)) - exp(pnbinom(0, mu = mu, size = 1, log.p = TRUE) -
        pnbinom(0, mu = mu, size = 1, lower.tail = FALSE, log.p = TRUE) - log(mu + 1) + eta)
    },
    "negbin" = {
      (Y - mu * (Y + theta)/(mu + theta)) - exp(pnbinom(0, mu = mu, size = theta, log.p = TRUE) -
        pnbinom(0, mu = mu, size = theta, lower.tail = FALSE, log.p = TRUE) +
	log(theta) - log(mu + theta) + eta)
    })

  ## compute gradient from data
  rval <- cbind(wres * wts * X)
  colnames(rval) <- names(beta)
  rownames(rval) <- rownames(X)
  return(rval)
}

getSummary.zerotrunc <- function(obj, alpha = 0.05, ...) {
  ## extract summary object
  s <- summary(obj)
  
  ## coefficient matrix and confidence interval
  ## compute confidence interval manually to include Log(theta)
  cf <- cbind(s$coefficients,
    s$coefficients[, 1] + qnorm(alpha/2) * s$coefficients[, 2],
    s$coefficients[, 1] + qnorm(1 - alpha/2) * s$coefficients[, 2])
  colnames(cf) <- c("est", "se", "stat", "p", "lwr", "upr")

  ## further summary statistics
  sstat <- c(
    "theta" = s$theta,
    "N" = nobs(obj),
    "logLik" = as.vector(logLik(obj)),
    "AIC" = AIC(obj),
    "BIC" = BIC(obj))

  ## return everything
  return(list(
    coef = cf,
    sumstat = sstat,
    contrasts = obj$contrasts,
    xlevels = obj$levels,
    call = obj$call
  ))
}