zi.fit <- function(x, y, z, weights, offset, dist, link, control){
ziPoisson <- function(parms, trunc.start = FALSE) {
mu <- as.vector(exp(X %*% parms[1:kx] + offsetx))
if (trunc.start)
phi <- rep(0, length(mu))
else phi <- as.vector(linkinv(Z %*% parms[(kx + 1):(kx + kz)] + offsetz))
loglik0 <- log(phi + exp(log(1 - phi) - mu))
loglik1 <- log(1 - phi) + dpois(Y, lambda = mu, log = TRUE)
if (trunc.start){
sum(weights[Y1] * loglik1[Y1]) -
sum(weights[Y1] * log(1 - exp(loglik0[Y1])))
} else sum(weights[Y0] * loglik0[Y0]) + sum(weights[Y1] * loglik1[Y1])
}
ziNegBin <- function(parms, trunc.start = FALSE) {
mu <- as.vector(exp(X %*% parms[1:kx] + offsetx))
if (trunc.start)
phi <- rep(0, length(mu))
else phi <- as.vector(linkinv(Z %*% parms[(kx + 1):(kx + kz)] + offsetz))
theta <- exp(parms[(kx + kz) + 1])
loglik0 <- log(
phi + exp(
log(1 - phi) +
suppressWarnings(dnbinom(0, size = theta, mu = mu, log = TRUE))
)
)
loglik1 <- log(1 - phi) +
suppressWarnings(dnbinom(Y, size = theta, mu = mu, log = TRUE))
if (trunc.start){
sum(weights[Y1] * loglik1[Y1]) -
sum(weights[Y1] * log(1 - exp(loglik0[Y1])))
} else {
sum(weights[Y0] * loglik0[Y0]) + sum(weights[Y1] * loglik1[Y1])
}
}
ziGeom <- function(parms, trunc.start = FALSE){
ziNegBin(c(parms, 0), trunc.start)
}
countGradPoisson <- function(parms) {
eta <- as.vector(X %*% parms[1:kx] + offsetx)[Y1]
mu <- exp(eta)
colSums(
(
(Y[Y1] - mu) -
exp(
ppois(0, lambda = mu, log.p = TRUE) -
ppois(0, lambda = mu, lower.tail = FALSE, log.p = TRUE) + eta
)
) * weights[Y1] * X[Y1, , drop = FALSE]
)
}
countGradGeom <- function(parms) {
eta <- as.vector(X %*% parms[1:kx] + offsetx)[Y1]
mu <- exp(eta)
colSums(
(
(Y[Y1] - mu * (Y[Y1] + 1)/(mu + 1)) -
exp(
pnbinom(0, mu = mu, size = 1, log.p = TRUE) -
pnbinom(0, mu = mu, size = 1, lower.tail = FALSE, log.p = TRUE) -
log(mu + 1) + eta
)
) * weights[Y1] * X[Y1, , drop = FALSE]
)
}
countGradNegBin <- function(parms) {
eta <- as.vector(X %*% parms[1:kx] + offsetx)[Y1]
mu <- exp(eta)
theta <- exp(parms[kx + 1])
logratio <- pnbinom(0, mu = mu, size = theta, log.p = TRUE) -
pnbinom(0, mu = mu, size = theta, lower.tail = FALSE,
log.p = TRUE)
rval <- colSums(
(
(Y[Y1] - mu * (Y[Y1] + theta)/(mu + theta)) -
exp(logratio + log(theta) - log(mu + theta) + eta)
) * weights[Y1] * X[Y1, , drop = FALSE]
)
rval2 <- sum(
(
digamma(Y[Y1] + theta) - digamma(theta) + log(theta) - log(mu + theta) +
1 - (Y[Y1] + theta)/(mu + theta) + exp(logratio) *
(log(theta) - log(mu + theta) + 1 - theta/(mu + theta))
) * weights[Y1]) * theta
c(rval, rval2)
}
gradPoisson <- function(parms) {
eta <- as.vector(X %*% parms[1:kx] + offsetx)
mu <- exp(eta)
etaz <- as.vector(Z %*% parms[(kx + 1):(kx + kz)] + offsetz)
muz <- linkinv(etaz)
clogdens0 <- -mu
dens0 <- muz * (1 - as.numeric(Y1)) + exp(log(1 - muz) + clogdens0)
wres_count <- ifelse(
Y1, Y - mu, -exp(-log(dens0) + log(1 - muz) + clogdens0 + log(mu))
)
wres_zero <- ifelse(
Y1, -1/(1 - muz) * linkobj$mu.eta(etaz),
(linkobj$mu.eta(etaz) - exp(clogdens0) * linkobj$mu.eta(etaz))/dens0
)
colSums(cbind(wres_count * weights * X, wres_zero * weights * Z))
}
gradGeom <- function(parms) {
eta <- as.vector(X %*% parms[1:kx] + offsetx)
mu <- exp(eta)
etaz <- as.vector(Z %*% parms[(kx + 1):(kx + kz)] + offsetz)
muz <- linkinv(etaz)
clogdens0 <- dnbinom(0, size = 1, mu = mu, log = TRUE)
dens0 <- muz * (1 - as.numeric(Y1)) + exp(log(1 - muz) + clogdens0)
wres_count <- ifelse(
Y1, Y - mu * (Y + 1)/(mu + 1),
-exp(-log(dens0) + log(1 - muz) + clogdens0 - log(mu + 1) + log(mu))
)
wres_zero <- ifelse(
Y1, -1/(1 - muz) * linkobj$mu.eta(etaz),
(linkobj$mu.eta(etaz) - exp(clogdens0) * linkobj$mu.eta(etaz))/dens0
)
colSums(cbind(wres_count * weights * X, wres_zero * weights * Z))
}
gradNegBin <- function(parms) {
eta <- as.vector(X %*% parms[1:kx] + offsetx)
mu <- exp(eta)
etaz <- as.vector(Z %*% parms[(kx + 1):(kx + kz)] + offsetz)
muz <- linkinv(etaz)
theta <- exp(parms[(kx + kz) + 1])
clogdens0 <- dnbinom(0, size = theta, mu = mu, log = TRUE)
dens0 <- muz * (1 - as.numeric(Y1)) + exp(log(1 - muz) + clogdens0)
wres_count <- ifelse(
Y1, Y - mu * (Y + theta)/(mu + theta),
-exp(-log(dens0) + log(1 - muz) + clogdens0 + log(theta) -
log(mu + theta) + log(mu))
)
wres_zero <- ifelse(
Y1, -1/(1 - muz) * linkobj$mu.eta(etaz),
(linkobj$mu.eta(etaz) - exp(clogdens0) * linkobj$mu.eta(etaz))/dens0
)
wres_theta <- theta * ifelse(
Y1,
digamma(Y + theta) - digamma(theta) + log(theta) - log(mu + theta) + 1 -
(Y + theta)/(mu + theta),
exp(-log(dens0) + log(1 - muz) + clogdens0) *
(log(theta) - log(mu + theta) + 1 - theta/(mu + theta))
)
colSums(
cbind(wres_count * weights * X, wres_zero * weights * Z, wres_theta)
)
}
loglikfun <- switch(
dist, poisson = ziPoisson, geometric = ziGeom, negbin = ziNegBin
)
gradfun <- switch(
dist, poisson = gradPoisson, geometric = gradGeom, negbin = gradNegBin
)
linkobj <- make.link(link)
linkinv <- linkobj$linkinv
X <- x
Z <- z
Y <- y
Y <- as.integer(round(Y + 0.001))
n <- length(Y)
kx <- NCOL(X)
kz <- NCOL(Z)
Y0 <- Y <= 0
Y1 <- Y > 0
offsetx <- offset$count
if(is.null(offsetx)) offsetx <- 0
if(length(offsetx) == 1) offsetx <- rep.int(offsetx, n)
offsetx <- as.vector(offsetx)
offsetz <- offset$zero
if(is.null(offsetz)) offsetz <- 0
if(length(offsetz) == 1) offsetz <- rep.int(offsetz, n)
offsetz <- as.vector(offsetz)
start <- control$start
method <- control$method
hessian <- control$hessian
ocontrol <- control
control$method <- control$hessian <- control$EM <- control$start <- NULL
if(is.null(start)){
model_zero <- glm.fit(
Z, as.integer(Y0), weights = weights, family = binomial(link = link),
offset = offsetz
)
countloglikfun <- function(parms, ...){
loglikfun(
c(parms[1:kx], rep(0, kz), parms[-(1:kx)]), trunc.start = TRUE
)
}
countgradfun <- switch(
dist, poisson = countGradPoisson, geometric = countGradGeom,
negbin = countGradNegBin
)
lmstart <- lm.wfit(
X[Y1, , drop = FALSE], log(Y[Y1]) - offsetx[Y1], weights[Y1]
)$coefficients
lmstart <- ifelse(is.na(lmstart), 0, lmstart)
fit <- tryCatch(
optim(
par = c(lmstart, if (dist == "negbin") 0 else NULL),
fn = countloglikfun, gr = countgradfun,
method = method, control = control, hessian = FALSE,
),
error = function(e) list(convergence = 1)
)
if(fit$convergence > 0){
model_count <- glm.fit(
X, Y, family = poisson(), weights = weights, offset = offsetx
)
start <- list(
count = model_count$coefficients, zero = model_zero$coefficients
)
if(dist == "negbin") start$theta <- 1
} else {
start <- list(count = fit$par[1:kx], zero = model_zero$coefficients)
if (length(fit$par) > kx) start$theta <- exp(fit$par[-(1:kx)])
}
if(ocontrol$EM & dist == "poisson"){
mui <- model_count$fitted
probi <- model_zero$fitted
probi <- probi/(probi + (1 - probi) * dpois(0, mui))
probi[Y1] <- 0
ll_new <- loglikfun(c(start$count, start$zero))
ll_old <- 2 * ll_new
while (abs((ll_old - ll_new)/ll_old) > control$reltol){
ll_old <- ll_new
model_count <- glm.fit(
X, Y, weights = weights * (1 - probi), offset = offsetx,
family = poisson(), start = start$count
)
model_zero <- suppressWarnings(
glm.fit(
Z, probi, weights = weights, offset = offsetz,
family = binomial(link = link), start = start$zero
)
)
mui <- model_count$fitted
probi <- model_zero$fitted
probi <- probi/(probi + (1 - probi) * dpois(0, mui))
probi[Y1] <- 0
start <- list(count = model_count$coefficients,
zero = model_zero$coefficients)
ll_new <- loglikfun(c(start$count, start$zero))
}
}
if(ocontrol$EM & dist == "geometric"){
mui <- model_count$fitted
probi <- model_zero$fitted
probi <- probi/(probi + (1 - probi) * dnbinom(0, size = 1, mu = mui))
probi[Y1] <- 0
ll_new <- loglikfun(c(start$count, start$zero))
ll_old <- 2 * ll_new
while (abs((ll_old - ll_new)/ll_old) > control$reltol) {
ll_old <- ll_new
model_count <- suppressWarnings(
glm.fit(
X, Y, weights = weights * (1 - probi), offset = offsetx,
family = MASS::negative.binomial(1), start = start$count
)
)
model_zero <- suppressWarnings(
glm.fit(
Z, probi, weights = weights, offset = offsetz,
family = binomial(link = link), start = start$zero
)
)
start <- list(count = model_count$coefficients,
zero = model_zero$coefficients)
mui <- model_count$fitted
probi <- model_zero$fitted
probi <- probi/(probi + (1 - probi) * dnbinom(0, size = 1, mu = mui))
probi[Y1] <- 0
ll_new <- loglikfun(c(start$count, start$zero))
}
}
if (ocontrol$EM & dist == "negbin") {
mui <- model_count$fitted
probi <- model_zero$fitted
probi <- probi /
(probi + (1 - probi) * dnbinom(0, size = start$theta, mu = mui))
probi[Y1] <- 0
ll_new <- loglikfun(c(start$count, start$zero, log(start$theta)))
ll_old <- 2 * ll_new
offset <- offsetx
while (abs((ll_old - ll_new)/ll_old) > control$reltol) {
ll_old <- ll_new
model_count <- suppressWarnings(
MASS::glm.nb(
Y ~ 0 + X + offset(offset), weights = weights *
(1 - probi), start = start$count, init.theta = start$theta
)
)
model_zero <- suppressWarnings(
glm.fit(
Z, probi, weights = weights, offset = offsetz,
family = binomial(link = link), start = start$zero
)
)
start <- list(count = model_count$coefficients,
zero = model_zero$coefficients, theta = model_count$theta)
mui <- model_count$fitted
probi <- model_zero$fitted
probi <- probi /
(probi + (1 - probi) * dnbinom(0, size = start$theta, mu = mui))
probi[Y1] <- 0
ll_new <- loglikfun(c(start$count, start$zero, log(start$theta)))
}
}
}
fit <- optim(
par = c(
start$count, start$zero, if(dist == "negbin") log(start$theta) else NULL
),
fn = loglikfun, gr = gradfun,method = method, control = control,
hessian = hessian
)
coefc <- fit$par[1:kx]
names(coefc) <- names(start$count) <- colnames(X)
coefz <- fit$par[(kx + 1):(kx + kz)]
names(coefz) <- names(start$zero) <- colnames(Z)
vc <- tryCatch(
-solve(as.matrix(fit$hessian)), error = function(e) {
warning(e$message, call = FALSE)
k <- nrow(as.matrix(fit$hessian))
return(matrix(NA, k, k))
}
)
if (dist == "negbin") {
np <- kx + kz + 1
theta <- as.vector(exp(fit$par[np]))
SE.logtheta <- as.vector(sqrt(diag(vc)[np]))
vc <- vc[-np, -np, drop = FALSE]
} else {
theta <- NULL
SE.logtheta <- NULL
}
colnames(vc) <- rownames(vc) <- c(
paste("count", colnames(X), sep = "_"),
paste("zero", colnames(Z), sep = "_")
)
mu <- exp(X %*% coefc + offsetx)[, 1]
phi <- linkinv(Z %*% coefz + offsetz)[, 1]
Yhat <- (1 - phi) * mu
res <- sqrt(weights) * (Y - Yhat)
nobs <- sum(weights > 0)
list(
coefficients = list(count = coefc, zero = coefz),
residuals = res,
fitted.values = Yhat,
optim = fit,
method = method,
control = ocontrol,
start = start,
weights = if(identical(as.vector(weights),rep.int(1L,n))) NULL else weights,
offset = list(
count = if (identical(offsetx, rep.int(0, n))) NULL else offsetx,
zero = if (identical(offsetz, rep.int(0, n))) NULL else offsetz
),
n = nobs,
df.null = nobs - 2,
df.residual = nobs - (kx + kz + (dist == "negbin")),
theta = theta,
SE.logtheta = SE.logtheta,
loglik = fit$value,
vcov = vc,
dist = dist,
link = link,
linkinv = linkinv,
converged = fit$convergence < 1,
contrasts = list(
count = attr(X, "contrasts"), zero = attr(Z, "contrasts"))
)
}
predict_zi <- function(object, newdata){
X <- newdata$x
Z <- newdata$z
offsetx <- newdata$offset$count
offsetz <- newdata$offset$zero
if (is.null(offsetx)) offsetx <- rep.int(0, NROW(X))
if (is.null(offsetz)) offsetz <- rep.int(0, NROW(Z))
mu <- exp(X %*% object$coefficients$count + offsetx)[, 1]
phi <- object$linkinv(Z %*% object$coefficients$zero + offsetz)[, 1]
list(mu = mu, phi = phi, y_hat = (1 - phi) * mu)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.