R/ztpreg.R
In ipeglim: Imprecise Inferential Framework on Statistical Reasoning

Documented in ztpreg

#' @title Zero-truncated Poisson and Negative Binomial Regression models with
#' Maximum likelihood estimation
#'
#' @description Zero-truncated Poisson and Negative Bionomial regression models are
#' used for estimating the unknown population size using a single
#' registration file in the stuides of Heijden (2003) and Cruffy
#' (2008).
#' Zero-truncated Negative Binomial is an alternative of
#' zero-truncated Poisson model when an overdispersion is suspected.
#'
#' @param formula a symbolic description of the model to be fit.
#' @param data a data frame containg the variables in the model.
#' @param dist a description of sampling model to be used in the model.
#' @param method the method to be used in fitting model.  The default
#' method uses \code{BFGS} in the \code{optim} function.
#' @param hessian logical, Should a numerically differentiated Hessian
#' matrix about regression parameters be returned?
#' @param ztrunc logical, Is the sampling model used in the model
#' assumed that zeros are truncated?
#' @param ... other arguments
#' 
#' @references
#' Peter GM van der Heijden, Rami Bustami, Maarten JLF
#' Cruyff, Godfried Engbersen and Hans C van Houwelingen (2003), Point
#' and interval estimation of the population size using the truncated
#' Poisson regression model, \emph{Statistical Modelling}, \bold{3},
#' pp. 305-322. 
#'
#' Cruyff MJ and van der Heijden (2008) Point and interval
#' estimation of the population size using a zero-truncated negative
#' binomial regression model, \emph{Biometrical Journal}, \bold{50}(6),
#' pp. 1035-1050. 
#' 
#' Dankmar Boehning and Peter G. M. van der Heijden (2009),
#' A Covarite adjustment for zero-truncated approaches to estimating
#' the size of hidden and elusive populations, \emph{The Annals of Applied
#' Statistics}, \bold{3}(2), pp. 595-610.
#'
#' @examples
#' # 
#' # dat <- simulateYX(N=1e3, Xreg=FALSE, param=2, ztrunc=TRUE)
#' # y <- dat$y
#'
#' # m <- ztpr(formula=y~1, ztrunc=TRUE, dist="poisson")
#' # fit <- summary(m, HT.est=TRUE, LM.test=TRUE)
#' # mhat <- exp(fit$cfs)
#' # print(fit$LM.chisq)
#' # print(c(fit$N, fit$cil, fit$ciu))
#'
#' 
#' @author Chel Hee Lee <\email{gnustats@@gmail.com}>
#' @return An object of class \code{ztpr} with components including
#' \item{formula}{formula used to be fitted,}
#' \item{converged}{integer code which indicates a successful
#' completion of optimization process,}
#' \item{niters}{integer that indicates a number of iterations until
#' convergence to estimates,}
#' \item{cfs}{estimated regression coefficients,}
#' \item{vcv}{estimated variance-covariance matrix of regression
#' coefficients which is obtained by the inverse of Hessian matrix}
#' \item{llk}{value of log-likelihood function at \code{cfs}.}
#' 
#' @seealso \code{\link{optim}}, \code{\link{glm.fit}}
#' @section TODO:
#' \itemize{
#' \item Currently, \code{predict}, \code{extractAIC}, \code{residuals},
#' \code{fitted}, \code{residual plot}, \code{deviance} are not
#' supported.
#' \item Support another method of 'L-BFGS-B'.
#' }
#' @keywords regression zero-truncation
#' @export
ztpreg <- function(formula, data, dist=c("poisson", "nbinom"), method=c("BFGS", "L-BFGS-B"), hessian=TRUE, ztrunc=TRUE, ...){

  # sanity check
  stopifnot(!missing(formula), !missing(dist))
  if(missing(method)) method <- "BFGS"

  mf <- match.call(expand.dots = FALSE)  
  if(missing(data)) data <- environment(formula)
  m <- match(c("formula", "data"), names(mf), 0)
  mf <- mf[c(1, m)]
  mf$drop.unused.levels <- TRUE
  
  ## call model.frame()
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  
  ## extract terms, model matrices, response
  mt <- attr(mf, "terms")
  y <- model.response(mf, "any")
  X <- model.matrix(mt, mf)
#  if(is.empty.model(mt)) X <- matrix(0, nrow=nrow(X), ncol=ncol(X))
    
  n <- nrow(X)
  p <- ncol(X)
  shape <- switch(dist, "nbinom"=NULL, "poisson"=Inf)
  
  fnllpoisson <- function(beta){
    mu <- as.vector(exp(X%*%beta))
    lscaler <- ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=TRUE)
    llik <- sum(dpois(x=y, lambda=mu, log=TRUE)-lscaler)
  }
  
  grllpoisson <- function(beta){
    mu <- as.vector(exp(X%*%beta))
    ldens0 <- ppois(q=0, lambda=mu, lower.tail=TRUE, log.p=TRUE)
    lscaler <- ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=TRUE)
    delta <- exp(ldens0 - lscaler + log(mu))
    gr <- colSums((y-mu-delta)*X)
  }

  fnllnbinom <- function(beta){
    mu <- as.vector(exp(X%*%beta[1:p]))
    shape <- exp(beta[p+1])
    ldens0 <- suppressWarnings(dnbinom(x=y, mu=mu, size=shape, log=TRUE))
    lscaler <- suppressWarnings(pnbinom(q=0, mu=mu, size=shape, lower.tail=FALSE, log.p=TRUE))
    gr <- sum(ldens0-lscaler)
  } # warnings are caused by shape=Inf (i.e., Poisson)
  
  grllnbinom <- function(beta){
    eta <- as.vector(X%*%beta[1:p])
    mu <- exp(eta)
    shape <- exp(beta[p+1])
    lratio <- pnbinom(q=0, mu=mu, size=shape, lower.tail=TRUE, log.p=TRUE)-pnbinom(q=0, mu=mu, size=shape, lower.tail=FALSE, log.p=TRUE)
    gr1 <- colSums(((y - mu*(y+shape)/(mu+shape)) - exp(lratio+log(shape)-log(mu+shape)+eta))*X)
    gr2 <- sum((digamma(y+shape) - digamma(shape) + log(shape) - log(mu+shape) + 1 - (y+shape)/(mu+shape) + exp(lratio)*(log(shape) - log(mu+shape) + 1 - shape/(mu+shape))))*shape
    gr <- c(gr1, gr2)
  }

  fnllik <- switch(dist, "poisson"=fnllpoisson, "nbinom"=fnllnbinom)
  grllik <- switch(dist, "poisson"=grllpoisson, "nbinom"=grllnbinom)

  init <- glm(formula=formula, data=data, family=poisson(link="log"))$coef
  if(dist=="nbinom") init <- c(init, 0)
    
  fit <- optim(par=init, fn=fnllik, gr=grllik, method=method, hessian=hessian, control=list(fnscale=-1))
  converged <- fit$convergence < 1
  niters <- fit$counts[1]
  llk <- fit$value

  cfs <- fit$par
  if(dist=="nbinom") names(cfs)[p+1] <- "log(shape)"
  vcv <- -solve(as.matrix(fit$hessian))
  # when 'dist==nbinom', vcv is a singular matrix as shown in the below:
  # Error in solve.default(as.matrix(fit$hessian)) : 
  # Lapack routine dgesv: system is exactly singular: U[1,1] = 0

  if(dist=="nbinom") colnames(vcv) <- rownames(vcv) <- names(cfs)

  rvals <- list(formula=formula, y=y, X=X, dist=dist, start=init, niters=niters, llk=llk, method=method,
  converged=converged, cfs=cfs, vcv=vcv, hessian=fit$hessian, se.check=all(diag(vcv)>0))
      
  class(rvals) <- c("ztpreg")
  return(rvals)
}
NULL