Nothing
#' @title Zero-truncated Poisson and Negative Binomial Regression models with
#' Maximum likelihood estimation
#'
#' @description Zero-truncated Poisson and Negative Bionomial regression models are
#' used for estimating the unknown population size using a single
#' registration file in the stuides of Heijden (2003) and Cruffy
#' (2008).
#' Zero-truncated Negative Binomial is an alternative of
#' zero-truncated Poisson model when an overdispersion is suspected.
#'
#' @param formula a symbolic description of the model to be fit.
#' @param data a data frame containg the variables in the model.
#' @param dist a description of sampling model to be used in the model.
#' @param method the method to be used in fitting model. The default
#' method uses \code{BFGS} in the \code{optim} function.
#' @param hessian logical, Should a numerically differentiated Hessian
#' matrix about regression parameters be returned?
#' @param ztrunc logical, Is the sampling model used in the model
#' assumed that zeros are truncated?
#' @param ... other arguments
#'
#' @references
#' Peter GM van der Heijden, Rami Bustami, Maarten JLF
#' Cruyff, Godfried Engbersen and Hans C van Houwelingen (2003), Point
#' and interval estimation of the population size using the truncated
#' Poisson regression model, \emph{Statistical Modelling}, \bold{3},
#' pp. 305-322.
#'
#' Cruyff MJ and van der Heijden (2008) Point and interval
#' estimation of the population size using a zero-truncated negative
#' binomial regression model, \emph{Biometrical Journal}, \bold{50}(6),
#' pp. 1035-1050.
#'
#' Dankmar Boehning and Peter G. M. van der Heijden (2009),
#' A Covarite adjustment for zero-truncated approaches to estimating
#' the size of hidden and elusive populations, \emph{The Annals of Applied
#' Statistics}, \bold{3}(2), pp. 595-610.
#'
#' @examples
#' #
#' # dat <- simulateYX(N=1e3, Xreg=FALSE, param=2, ztrunc=TRUE)
#' # y <- dat$y
#'
#' # m <- ztpr(formula=y~1, ztrunc=TRUE, dist="poisson")
#' # fit <- summary(m, HT.est=TRUE, LM.test=TRUE)
#' # mhat <- exp(fit$cfs)
#' # print(fit$LM.chisq)
#' # print(c(fit$N, fit$cil, fit$ciu))
#'
#'
#' @author Chel Hee Lee <\email{gnustats@@gmail.com}>
#' @return An object of class \code{ztpr} with components including
#' \item{formula}{formula used to be fitted,}
#' \item{converged}{integer code which indicates a successful
#' completion of optimization process,}
#' \item{niters}{integer that indicates a number of iterations until
#' convergence to estimates,}
#' \item{cfs}{estimated regression coefficients,}
#' \item{vcv}{estimated variance-covariance matrix of regression
#' coefficients which is obtained by the inverse of Hessian matrix}
#' \item{llk}{value of log-likelihood function at \code{cfs}.}
#'
#' @seealso \code{\link{optim}}, \code{\link{glm.fit}}
#' @section TODO:
#' \itemize{
#' \item Currently, \code{predict}, \code{extractAIC}, \code{residuals},
#' \code{fitted}, \code{residual plot}, \code{deviance} are not
#' supported.
#' \item Support another method of 'L-BFGS-B'.
#' }
#' @keywords regression zero-truncation
#' @export
ztpreg <- function(formula, data, dist=c("poisson", "nbinom"), method=c("BFGS", "L-BFGS-B"), hessian=TRUE, ztrunc=TRUE, ...){
# sanity check
stopifnot(!missing(formula), !missing(dist))
if(missing(method)) method <- "BFGS"
mf <- match.call(expand.dots = FALSE)
if(missing(data)) data <- environment(formula)
m <- match(c("formula", "data"), names(mf), 0)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
## call model.frame()
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
## extract terms, model matrices, response
mt <- attr(mf, "terms")
y <- model.response(mf, "any")
X <- model.matrix(mt, mf)
# if(is.empty.model(mt)) X <- matrix(0, nrow=nrow(X), ncol=ncol(X))
n <- nrow(X)
p <- ncol(X)
shape <- switch(dist, "nbinom"=NULL, "poisson"=Inf)
fnllpoisson <- function(beta){
mu <- as.vector(exp(X%*%beta))
lscaler <- ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=TRUE)
llik <- sum(dpois(x=y, lambda=mu, log=TRUE)-lscaler)
}
grllpoisson <- function(beta){
mu <- as.vector(exp(X%*%beta))
ldens0 <- ppois(q=0, lambda=mu, lower.tail=TRUE, log.p=TRUE)
lscaler <- ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=TRUE)
delta <- exp(ldens0 - lscaler + log(mu))
gr <- colSums((y-mu-delta)*X)
}
fnllnbinom <- function(beta){
mu <- as.vector(exp(X%*%beta[1:p]))
shape <- exp(beta[p+1])
ldens0 <- suppressWarnings(dnbinom(x=y, mu=mu, size=shape, log=TRUE))
lscaler <- suppressWarnings(pnbinom(q=0, mu=mu, size=shape, lower.tail=FALSE, log.p=TRUE))
gr <- sum(ldens0-lscaler)
} # warnings are caused by shape=Inf (i.e., Poisson)
grllnbinom <- function(beta){
eta <- as.vector(X%*%beta[1:p])
mu <- exp(eta)
shape <- exp(beta[p+1])
lratio <- pnbinom(q=0, mu=mu, size=shape, lower.tail=TRUE, log.p=TRUE)-pnbinom(q=0, mu=mu, size=shape, lower.tail=FALSE, log.p=TRUE)
gr1 <- colSums(((y - mu*(y+shape)/(mu+shape)) - exp(lratio+log(shape)-log(mu+shape)+eta))*X)
gr2 <- sum((digamma(y+shape) - digamma(shape) + log(shape) - log(mu+shape) + 1 - (y+shape)/(mu+shape) + exp(lratio)*(log(shape) - log(mu+shape) + 1 - shape/(mu+shape))))*shape
gr <- c(gr1, gr2)
}
fnllik <- switch(dist, "poisson"=fnllpoisson, "nbinom"=fnllnbinom)
grllik <- switch(dist, "poisson"=grllpoisson, "nbinom"=grllnbinom)
init <- glm(formula=formula, data=data, family=poisson(link="log"))$coef
if(dist=="nbinom") init <- c(init, 0)
fit <- optim(par=init, fn=fnllik, gr=grllik, method=method, hessian=hessian, control=list(fnscale=-1))
converged <- fit$convergence < 1
niters <- fit$counts[1]
llk <- fit$value
cfs <- fit$par
if(dist=="nbinom") names(cfs)[p+1] <- "log(shape)"
vcv <- -solve(as.matrix(fit$hessian))
# when 'dist==nbinom', vcv is a singular matrix as shown in the below:
# Error in solve.default(as.matrix(fit$hessian)) :
# Lapack routine dgesv: system is exactly singular: U[1,1] = 0
if(dist=="nbinom") colnames(vcv) <- rownames(vcv) <- names(cfs)
rvals <- list(formula=formula, y=y, X=X, dist=dist, start=init, niters=niters, llk=llk, method=method,
converged=converged, cfs=cfs, vcv=vcv, hessian=fit$hessian, se.check=all(diag(vcv)>0))
class(rvals) <- c("ztpreg")
return(rvals)
}
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.