Nothing
zipath <- function(X, ...) UseMethod("zipath")
zipath.default <- function(X, ...) {
if (extends(class(X), "Matrix"))
return(zipath.matrix(X=X, ...))
stop("no method for objects of class ", sQuote(class(X)),
" implemented")
}
zipath.formula <- function(formula, data, weights, offset=NULL, contrasts=NULL, ...){
## call and formula
cl <- match.call()
if(!attr(terms(formula), "intercept"))
stop("non-intercept model is not implemented")
if(missing(data)) data <- environment(formula)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "na.action", "weights", "offset"), names(mf), 0)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
## extended formula processing
if(length(formula[[3]]) > 1 && identical(formula[[3]][[1]], as.name("|")))
{
ff <- formula
formula[[3]][1] <- call("+")
mf$formula <- formula
ffc <- . ~ .
ffz <- ~ .
ffc[[2]] <- ff[[2]]
ffc[[3]] <- ff[[3]][[2]]
ffz[[3]] <- ff[[3]][[3]]
ffz[[2]] <- NULL
} else {
ffz <- ffc <- ff <- formula
ffz[[2]] <- NULL
}
if(inherits(try(terms(ffz), silent = TRUE), "try-error")) {
ffz <- eval(parse(text = sprintf( paste("%s -", deparse(ffc[[2]])), deparse(ffz) )))
}
## call model.frame()
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
## extract terms, model matrices, response
mt <- attr(mf, "terms")
mtX <- terms(ffc, data = data)
X <- model.matrix(mtX, mf)
Xold <- X
### This means X possibly has intercept (1st) column depending on the specifications
mtZ <- terms(ffz, data = data)
mtZ <- terms(update(mtZ, ~ .), data = data)
Z <- model.matrix(mtZ, mf)
Zold <- Z
Y <- model.response(mf, "numeric")
## weights and offset
weights <- model.weights(mf)
offsetx <- model_offset_2(mf, terms = mtX, offset = TRUE)
offsetz <- model_offset_2(mf, terms = mtZ, offset = FALSE)
RET <- zipath_fit(X, Z, Y, weights=weights, offsetx=offsetx, offsetz=offsetz, ...)
RET$call <- match.call()
class(RET) <- "zipath"
RET$terms = list(count = mtX, zero = mtZ, full = mt)
RET$call = cl
RET$formula = ff
RET$levels = .getXlevels(mt, mf)
RET$contrasts = list(count = attr(X, "contrasts"), zero = attr(Z, "contrasts"))
RET$model <- mf
RET
}
zipath.matrix <- function(X, Z, Y, weights, offsetx=NULL, offsetz=NULL, ...){
RET <- zipath_fit(X, Z, Y, weights, offsetx=offsetx, offsetz=offsetz, ...)
RET$call <- match.call()
return(RET)
}
zipath_fit <- function(X, Z, Y, weights, offsetx, offsetz, standardize=TRUE, intercept=TRUE, family = c("poisson", "negbin", "geometric"), link = c("logit", "probit", "cloglog", "cauchit", "log"), penalty = c("enet", "mnet", "snet"), start = NULL, y = TRUE, x = FALSE, nlambda=100, lambda.count=NULL, lambda.zero=NULL, type.path=c("active", "nonactive"), penalty.factor.count=NULL, penalty.factor.zero=NULL, lambda.count.min.ratio=.0001, lambda.zero.min.ratio=.1, alpha.count=1, alpha.zero=alpha.count, gamma.count=3, gamma.zero=gamma.count, rescale=FALSE, init.theta=NULL, theta.fixed=FALSE, EM=TRUE, maxit.em=200, convtype=c("count", "both"), maxit= 1000, maxit.theta =10, reltol = 1e-5, thresh=1e-6, eps.bino=1e-5, shortlist=FALSE, trace=FALSE, ...)
{
family <- match.arg(family)
if(is.null(init.theta) && family=="negbin" && theta.fixed)
stop("missing argument init.theta while family=='negbin' and theta.fixed is TRUE\n")
if(length(gamma.count) > 1 || length(gamma.zero) > 1)
stop("gamma.count or gamma.zero must be a scalar")
if(family=="geometric"){
family <- "negbin"
init.theta <- 1
theta.fixed <- TRUE
}
penalty <- match.arg(penalty)
link <- match.arg(link)
type.path <- match.arg(type.path)
if(type.path=="active")
active <- 1
else active <- 0
#lambda.min.ratio <- lambda.count.min.ratio
## set up likelihood
ziPoisson <- function(parms) {
## count mean
mu <- as.vector(exp(X %*% parms[1:kx] + offsetx))
## binary mean
phi <- as.vector(linkinv(Z %*% parms[(kx+1):(kx+kz)] + offsetz))
## log-likelihood for y = 0 and y >= 1
loglik0 <- log( phi + exp( log(1-phi) - mu ) ) ## -mu = dpois(0, lambda = mu, log = TRUE)
loglik1 <- log(1-phi) + dpois(Y, lambda = mu, log = TRUE)
## collect and return
loglik <- sum(weights[Y0] * loglik0[Y0]) + sum(weights[Y1] * loglik1[Y1])
loglik
}
ziNegBin <- function(parms) {
## count mean
mu <- as.vector(exp(X %*% parms[1:kx] + offsetx))
## binary mean
phi <- as.vector(linkinv(Z %*% parms[(kx+1):(kx+kz)] + offsetz))
## negbin size
theta <- exp(parms[(kx+kz)+1])
## log-likelihood for y = 0 and y >= 1
loglik0 <- log( phi + exp( log(1-phi) + suppressWarnings(dnbinom(0, size = theta, mu = mu, log = TRUE)) ) )
loglik1 <- log(1-phi) + suppressWarnings(dnbinom(Y, size = theta, mu = mu, log = TRUE))
## collect and return
loglik <- sum(weights[Y0] * loglik0[Y0]) + sum(weights[Y1] * loglik1[Y1])
loglik
}
#log negative binomial density
dnbinom1 <- function(x, size, mu){
n <- size
p <- size/(size+mu)
lgamma(x+n) - lgamma(n) - lgamma(x+1) + n*log(p) + x*log(1-p)
}
ziNegBin1 <- function(parms) {
## count mean
mu <- as.vector(exp(X %*% parms[1:kx] + offsetx))
## binary mean
phi <- as.vector(linkinv(Z %*% parms[(kx+1):(kx+kz)] + offsetz))
## negbin size
theta <- exp(parms[(kx+kz)+1])
## log-likelihood for y = 0 and y >= 1
loglik0 <- log( phi + exp( log(1-phi) + dnbinom1(0, size = theta, mu = mu)) )
loglik1 <- log(1-phi) + dnbinom1(Y, size = theta, mu = mu)
## collect and return
loglik <- rep(NA, n)
for(i in 1:n){
if(Y[i]==0) loglik[i] <- weights[i]*loglik0[i]
else
loglik[i] <- weights[i] * loglik1[i]
}
loglik
}
ziGeom1 <- function(parms) ziNegBin1(c(parms, 0))
ziGeom <- function(parms) ziNegBin(c(parms, 0))
ziNegBin2 <- function(parms) ziNegBin(c(parms, log(init.theta)))
pen_zipath <- function(start){
countcoef1 <- start$count[-1]
zerocoef1 <- start$zero[-1]
lc <- length(countcoef1) ### excluding intercept
if(lc > 0){
pen1 <- rep(0, lc)
for(j in 1:lc){
pen1[j] <- pen_eval(countcoef1[j], lone=lambda.count[k]*alpha.count, ltwo=lambda.count[k]*(1-alpha.count), gamma=gamma.count, penalty=penalty)
}
}
else pen1 <- 0
lz <- length(zerocoef1) ### excluding intercept
if(lz > 0){
pen2 <- rep(0, lz)
for(j in 1:lz)
pen2[j] <- pen_eval(zerocoef1[j], lone=lambda.zero[k]*alpha.zero, ltwo=lambda.zero[k]*(1-alpha.zero), gamma=gamma.zero, penalty=penalty)
}
else pen2 <- 0
return(n*(sum(pen1) + sum(pen2)))
}
###check KKT conditions-begin (only for LASSO penalty at the moment)
check_kkt <- function(parms){
for(jj in 1:nlambda){
cat("lambda iteration, jj=", jj, "\n")
cat("count model part coefficients\n")
tmp1 <- gradfun(parms[,jj])
cat("gradients\n")
print(tmp1)
for(j in 1:(kx-1)){
cat("\nvariable j=", j, "lambda.count[jj]=", lambda.count[jj], "coefc[j+1,jj]=", coefc[j+1, jj], "abs(coefc[j+1],jj)=", abs(coefc[j+1,jj]), "\n")
if(abs(coefc[j+1, jj]) > 0)
cat("tmp1[j+1]-n*lambda.count[jj]*sign(coefc[j+1,jj])",
tmp1[j+1]-n*lambda.count[jj]*sign(coefc[j+1,jj]), "\n")
else
cat("abs(tmp1[j+1])-n*lambda.count[jj])",
abs(tmp1[j+1])-n*lambda.count[jj], "\n")
}
cat("\nzero model part coefficients\n")
for(j in 1:(kz-1)){
cat("\nvariable j=", j, "lambda.zero[jj]=", lambda.zero[jj], "coefz[j+1,jj]=", coefz[j+1, jj], "abs(coefz[j+1],jj)=", abs(coefz[j+1,jj]), "\n")
if(abs(coefz[j+1,jj]) > 0)
cat("tmp1[kx+j+1]-n*lambda.zero[jj]*sign(coefz[j+1,jj])",
tmp1[kx+j+1]-n*lambda.zero[jj]*sign(coefz[j+1,jj]), "\n")
else
cat("abs(tmp1[kx+j+1])-n*lambda.zero[jj])",
abs(tmp1[kx+j+1])-n*lambda.zero[jj], "\n")
}
}
}
###check KKT conditions-end (only for count.alpha=zero.alpha=0, and LASSO penalty at the moment)
famtype <- switch(family,
"gaussian"=1,
"binomial"=2,
"poisson"=3,
"negbin"=4)
pentype <- switch(penalty,
"enet"=1,
"mnet"=2,
"snet"=3)
pen_zipath1 <- function(start){
m=length(start$count[-1])
pencount <- 0
if(m > 0)
pencount <- .Fortran("penGLM",
start=as.double(start$count[-1]),
m=as.integer(length(start$count[-1])),
lambda=as.double(lambda.count[k]*penalty.factor.count),
alpha=as.double(alpha.count),
gam=as.double(gamma.count),
penalty=as.integer(pentype),
pen=as.double(0),
PACKAGE="mpath")$pen
m=length(start$zero[-1])
penzero <- 0
if(m > 0)
penzero <- .Fortran("penGLM",
start=as.double(start$zero[-1]),
m=as.integer(length(start$zero[-1])),
lambda=as.double(lambda.zero[k]*penalty.factor.zero),
alpha=as.double(alpha.zero),
gam=as.double(gamma.zero),
penalty=as.integer(pentype),
pen=as.double(0),
PACKAGE="mpath")$pen
return(n*(pencount + penzero))
}
gradPoisson <- function(parms, total=TRUE) {
## count mean
eta <- as.vector(X %*% parms[1:kx] + offsetx)
mu <- exp(eta)
## binary mean
etaz <- as.vector(Z %*% parms[(kx+1):(kx+kz)] + offsetz)
muz <- linkinv(etaz)
## densities at 0
clogdens0 <- -mu
dens0 <- muz * (1 - as.numeric(Y1)) + exp(log(1 - muz) + clogdens0)
## working residuals
wres_count <- ifelse(Y1, Y - mu, -exp(-log(dens0) + log(1 - muz) + clogdens0 + log(mu)))
wres_zero <- ifelse(Y1, -1/(1-muz) * linkobj$mu.eta(etaz),
(linkobj$mu.eta(etaz) - exp(clogdens0) * linkobj$mu.eta(etaz))/dens0)
if(total)
colSums(cbind(wres_count * weights * X, wres_zero * weights * Z))
else cbind(wres_count * weights * X, wres_zero * weights * Z)
}
gradGeom <- function(parms, total=TRUE) {
## count mean
eta <- as.vector(X %*% parms[1:kx] + offsetx)
mu <- exp(eta)
## binary mean
etaz <- as.vector(Z %*% parms[(kx+1):(kx+kz)] + offsetz)
muz <- linkinv(etaz)
## densities at 0
clogdens0 <- dnbinom(0, size = 1, mu = mu, log = TRUE)
dens0 <- muz * (1 - as.numeric(Y1)) + exp(log(1 - muz) + clogdens0)
## working residuals
wres_count <- ifelse(Y1, Y - mu * (Y + 1)/(mu + 1), -exp(-log(dens0) +
log(1 - muz) + clogdens0 - log(mu + 1) + log(mu)))
wres_zero <- ifelse(Y1, -1/(1-muz) * linkobj$mu.eta(etaz),
(linkobj$mu.eta(etaz) - exp(clogdens0) * linkobj$mu.eta(etaz))/dens0)
if(total)
colSums(cbind(wres_count * weights * X, wres_zero * weights * Z))
else cbind(wres_count * weights * X, wres_zero * weights * Z)
}
gradNegBin <- function(parms, total=TRUE) {
## count mean
eta <- as.vector(X %*% parms[1:kx] + offsetx)
mu <- exp(eta)
## binary mean
etaz <- as.vector(Z %*% parms[(kx+1):(kx+kz)] + offsetz)
muz <- linkinv(etaz)
## negbin size
theta <- exp(parms[(kx+kz)+1])
## densities at 0
clogdens0 <- dnbinom(0, size = theta, mu = mu, log = TRUE)
dens0 <- muz * (1 - as.numeric(Y1)) + exp(log(1 - muz) + clogdens0)
## working residuals
wres_count <- ifelse(Y1, Y - mu * (Y + theta)/(mu + theta), -exp(-log(dens0) +
log(1 - muz) + clogdens0 + log(theta) - log(mu + theta) + log(mu)))
wres_zero <- ifelse(Y1, -1/(1-muz) * linkobj$mu.eta(etaz),
(linkobj$mu.eta(etaz) - exp(clogdens0) * linkobj$mu.eta(etaz))/dens0)
wres_theta <- theta * ifelse(Y1, digamma(Y + theta) - digamma(theta) +
log(theta) - log(mu + theta) + 1 - (Y + theta)/(mu + theta),
exp(-log(dens0) + log(1 - muz) + clogdens0) *
(log(theta) - log(mu + theta) + 1 - theta/(mu + theta)))
if(total)
colSums(cbind(wres_count * weights * X, wres_zero * weights * Z, wres_theta))
else cbind(wres_count * weights * X, wres_zero * weights * Z, wres_theta)
}
checkconv <- function(start, start_old, type=c("count","both"), eps=.Machine$double.eps, thresh=.001){
type <- match.arg(type)
checkcount <- .Fortran("checkConvergence",
m = as.integer(length(start$count)),
beta = as.double(start$count),
beta_old = as.double(start_old$count),
eps = as.double(eps),
thresh = as.double(thresh),
converged = as.integer(0),
PACKAGE = "mpath")$converged
if(type=="count") return(checkcount)
else
if(checkcount==0) return(FALSE)
else {
checkzero <- .Fortran("checkConvergence",
m = as.integer(length(start$zero)),
beta = as.double(start$zero),
beta_old = as.double(start_old$zero),
eps = as.double(eps),
thresh = as.double(thresh),
converged = as.integer(0),
PACKAGE = "mpath")$converged
return(checkzero)
}
}
convtype <- match.arg(convtype)
loglikfun <- switch(family,
"poisson" = ziPoisson,
"geometric" = ziGeom,
"negbin" = ziNegBin)
gradfun <- switch(family,
"poisson" = gradPoisson,
"geometric" = gradGeom,
"negbin" = gradNegBin)
## binary link processing
linkstr <- match.arg(link)
linkobj <- make.link(linkstr)
linkinv <- linkobj$linkinv
if(trace) cat("Zero-inflated Count Model\n",
paste("count model:", family, "with log link\n"),
paste("zero-inflation model: binomial with", linkstr, "link\n"), sep = "")
## sanity checks
if(length(Y) < 1) stop("empty model")
if(all(Y > 0)) stop("invalid dependent variable, minimum count is not zero")
if(!isTRUE(all.equal(as.vector(Y), as.integer(round(Y + 0.001)))))
stop("invalid dependent variable, non-integer values")
Y <- as.integer(round(Y + 0.001))
if(any(Y < 0)) stop("invalid dependent variable, negative counts")
if(trace) {
cat("dependent variable:\n")
tab <- table(factor(Y, levels = 0:max(Y)), exclude = NULL)
names(dimnames(tab)) <- NULL
print(tab)
}
## convenience variables
n <- length(Y)
kx <- NCOL(X)
kz <- NCOL(Z)
Xold <- X
Zold <- Z
Xnew <- as.matrix(X[,-1])
Znew <- as.matrix(Z[,-1])
nobs <- n
Y0 <- Y <= 0
Y1 <- Y > 0
## weights and offset
# weights <- model.weights(mf)
if(is.null(weights)) weights <- 1
if(length(weights) == 1) weights <- rep.int(weights, n)
sumw <- sum(weights)
weights <- weights/sumw
weights <- as.vector(weights)
if(is.null(offsetx)) offsetx <- 0
if(length(offsetx) == 1) offsetx <- rep.int(offsetx, n)
offsetx <- as.vector(offsetx)
if(is.null(offsetz)) offsetz <- 0
if(length(offsetz) == 1) offsetz <- rep.int(offsetz, n)
offsetz <- as.vector(offsetz)
### standardize variables
if(standardize){
wt <- weights
one <- rep(1, length(Y))
if(dim(Xnew)[2] > 0){
xx <- Xnew
meanx <- drop(wt %*% Xnew)
Xnew <- scale(Xnew, meanx, FALSE) # centers x
Xnew <- sqrt(wt) * Xnew
normx <- sqrt(drop(one %*% (Xnew^2)))
X[,-1] <- Xnew <- scale(xx, meanx, normx)
}
if(dim(Znew)[2] > 0){
zz <- Znew
meanz <- drop(wt %*% Znew)
Znew <- scale(Znew, meanz, FALSE) # centers x
Znew <- sqrt(wt) * Znew
normz <- sqrt(drop(one %*% (Znew^2)))
Z[,-1] <- Znew <- scale(zz, meanz, normz)
}
}
## starting values
if(!is.null(start)) {
valid <- TRUE
if(!("count" %in% names(start))) {
valid <- FALSE
warning("invalid starting values, count model coefficients not specified")
start$count <- rep.int(0, kx)
}
if(!("zero" %in% names(start))) {
valid <- FALSE
warning("invalid starting values, zero-inflation model coefficients not specified")
start$zero <- rep.int(0, kz)
}
if(length(start$count) != kx) {
valid <- FALSE
warning("invalid starting values, wrong number of count model coefficients")
}
if(length(start$zero) != kz) {
valid <- FALSE
warning("invalid starting values, wrong number of zero-inflation model coefficients")
}
if(family == "negbin") {
if(!("theta" %in% names(start))) start$theta <- 1
start <- list(count = start$count, zero = start$zero, theta = as.vector(start$theta[1]))
} else {
start <- list(count = start$count, zero = start$zero)
}
if(!valid) start <- NULL
}
if(!is.null(lambda.count) && !is.null(lambda.zero)){
if(length(lambda.count) != length(lambda.zero))
stop("length of lambda.count must be the same as lambda.zero\n")
else nlambda <- length(lambda.count)
}
if(family == "negbin") theta <- rep(NA, nlambda)
coefc <- matrix(NA, nrow=dim(X)[2], ncol=nlambda)
coefz <- matrix(NA, nrow=dim(Z)[2], ncol=nlambda)
se <- matrix(NA, nrow=dim(X)[2]+dim(Z)[2]+(family=="negbin"), ncol=nlambda)
ll <- rep(NA, nlambda)
convout <- rep(NA, nlambda)
### How to incorporate offset here?
if(all(diff(weights)==0)) weightsnow <- rep.int(1L, n)
else weightsnow <- weights
fit0 <- zeroinfl(Y~1+offset(offsetx)|1+offset(offsetz), weights=weightsnow, dist=family)
if(family=="negbin" && is.null(init.theta))
init.theta <- fit0$theta
### find solution of intercept model with family = negbin and fixed theta parameter, and replace the estimates for fit0
## ML estimation of theta
if(family=="negbin" && theta.fixed) {
#if(is.null(init.theta)){
# init.theta <- fit0$theta
#}
Xtmp <- X
Ztmp <- Z
X <- as.matrix(X[,1])
Z <- as.matrix(Z[,1])
kx <- 1
kz <- 1
### find the solution of estimates for the intercept-only model with fixed init.theta; what about offset?
fit0.nb <- optim(fn = ziNegBin2, gr = gradfun,
par = c(fit0$coef$count, fit0$coef$zero), #if(dist == "negbin") log(start$theta) else NULL),
method = "Nelder-Mead", hessian = FALSE, control = list(fnscale=-1))
fit0$coefficients <- list(count=fit0.nb$par[1], zero=fit0.nb$par[2])
fit0$theta <- init.theta
X <- Xtmp
Z <- Ztmp
kx <- NCOL(X)
kz <- NCOL(Z)
}
if(is.null(penalty.factor.count)) penalty.factor.count <- rep(1, dim(Xnew)[2])
if(all(penalty.factor.count==0)){
penalty.factor.count <- rep(1, dim(Xnew)[2])
lambda.count <- rep(0, nlambda)
}
if(is.null(penalty.factor.zero)) penalty.factor.zero <- rep(1, dim(Znew)[2])
if(all(penalty.factor.zero==0)){
penalty.factor.zero <- rep(1, dim(Znew)[2])
lambda.zero <- rep(0, nlambda)
}
#for poisson family or negbin using KKT conditions to compute lambda_count and lambda_zero values
if(is.null(lambda.count) || is.null(lambda.zero)){
if(family=="poisson") thetanow <- 10000
else thetanow <- init.theta
#if(family=="negbin")
# if(theta.fixed) thetanow <- init.theta
# else thetanow <- fit0$theta
lmax <- .Fortran("lmax_zipath",
B=as.double(X),
G=as.double(Z),
y=as.double(Y),
y1=as.integer(Y1),
weights=as.double(weights),
n=as.integer(n),
kx=as.integer(kx),
kz=as.integer(kz),
family=as.integer(famtype),
coefc=as.double(fit0$coefficients$count),
coefz=as.double(fit0$coefficients$zero),
alpha_count=as.double(alpha.count),
alpha_zero=as.double(alpha.zero),
penaltyfactor_count=as.double(c(1,penalty.factor.count)),
penaltyfactor_zero=as.double(c(1, penalty.factor.zero)),
theta=as.double(thetanow),
lmax_count=as.double(1),
lmax_zero=as.double(1),
PACKAGE="mpath")
if(kx > 1){
lpath <- seq(log(lmax$lmax_count), log(lambda.count.min.ratio * lmax$lmax_count), length.out=nlambda)
lambda.count <- exp(lpath)
}
else lambda.count <- rep(0, nlambda)
if(kz > 1){
lpath <- seq(log(lmax$lmax_zero), log(lambda.zero.min.ratio * lmax$lmax_zero), length.out=nlambda)
lambda.zero <- exp(lpath)
}
else lambda.zero <- rep(0, nlambda)
}
model_count <- list(coefficients = c(fit0$coefficients$count, rep(0, dim(Xnew)[2])), fitted.values=predict(fit0, type="count"))
model_zero <- list(coefficients = c(fit0$coefficients$zero, rep(0, dim(Znew)[2])), fitted.values=predict(fit0, type="zero"))
start <- list(count = model_count$coefficients, zero = model_zero$coefficients)
vcov <- vector("list", length=nlambda)
if(family == "negbin"){
#if(!theta.fixed)
# start$theta <- fit0$theta
#else start$theta <- init.theta
}
if(is.null(start$theta)) start$theta <- 1
if(active==1 && (kx == 1 || kz == 1))
warnings("type.path='active' is not implemented when count model or zero model is intercept-only, and the model is fitted with type.path='nonactive'.\n")
if(active==1 && kx > 1 && kz > 1)
RET1 <- .Fortran("zipath_active",
x=as.double(Xnew),
z=as.double(Znew),
y=as.double(Y),
y1=as.integer(Y1),
weights=as.double(weights/sum(weights)),
n=as.integer(n),
kx=as.integer(kx-1),
kz=as.integer(kz-1),
start_count=as.double(model_count$coefficients),
start_zero=as.double(model_zero$coefficients),
mustart_count=as.double(model_count$fitted.values),
mustart_zero=as.double(model_zero$fitted.values),
offsetx=as.double(offsetx),
offsetz=as.double(offsetz),
intercept=as.integer(intercept),
nlambda=as.integer(nlambda),
lambda_count=as.double(lambda.count),
lambda_zero=as.double(lambda.zero),
alpha_count=as.double(alpha.count),
alpha_zero=as.double(alpha.zero),
gam_count=as.double(gamma.count),
gam_zero=as.double(gamma.zero),
penaltyfactor_count=as.double(penalty.factor.count),
penaltyfactor_zero=as.double(penalty.factor.zero),
maxit=as.integer(maxit),
eps=as.double(.Machine$double.eps),
family=as.integer(famtype),
penalty=as.integer(pentype),
trace=as.integer(trace),
coefc=as.double(matrix(0, kx, nlambda)),
coefz=as.double(matrix(0, kz, nlambda)),
yhat=as.double(rep(0, n)),
iter=as.integer(maxit.em),
del=as.double(reltol),
rescale=as.integer(rescale),
thresh=as.double(thresh),
epsbino=as.double(eps.bino),
theta_fixed=as.integer(theta.fixed),
maxit_theta=as.integer(maxit.theta),
theta=as.double(start$theta),
thetaout=as.double(rep(start$theta, nlambda)),
PACKAGE="mpath")
else RET1 <- .Fortran("zipath_nonactive",
x=as.double(Xnew),
z=as.double(Znew),
y=as.double(Y),
y1=as.integer(Y1),
weights=as.double(weights/sum(weights)),
n=as.integer(n),
kx=as.integer(kx-1),
kz=as.integer(kz-1),
start_count=as.double(model_count$coefficients),
start_zero=as.double(model_zero$coefficients),
mustart_count=as.double(model_count$fitted.values),
mustart_zero=as.double(model_zero$fitted.values),
offsetx=as.double(offsetx),
offsetz=as.double(offsetz),
intercept=as.integer(intercept),
nlambda=as.integer(nlambda),
lambda_count=as.double(lambda.count),
lambda_zero=as.double(lambda.zero),
alpha_count=as.double(alpha.count),
alpha_zero=as.double(alpha.zero),
gam_count=as.double(gamma.count),
gam_zero=as.double(gamma.zero),
penaltyfactor_count=as.double(penalty.factor.count),
penaltyfactor_zero=as.double(penalty.factor.zero),
maxit=as.integer(maxit),
eps=as.double(.Machine$double.eps),
family=as.integer(famtype),
penalty=as.integer(pentype),
trace=as.integer(trace),
coefc=as.double(matrix(0, kx, nlambda)),
coefz=as.double(matrix(0, kz, nlambda)),
yhat=as.double(rep(0, n)),
iter=as.integer(maxit.em),
del=as.double(reltol),
rescale=as.integer(rescale),
thresh=as.double(thresh),
epsbino=as.double(eps.bino),
theta_fixed=as.integer(theta.fixed),
maxit_theta=as.integer(maxit.theta),
theta=as.double(start$theta),
thetaout=as.double(rep(start$theta, nlambda)),
PACKAGE="mpath")
coefc <- matrix(RET1$coefc, ncol=nlambda)
coefz <- matrix(RET1$coefz, ncol=nlambda)
if(trace && penalty=="enet" && alpha.count==1 && alpha.zero==1){
if(family=="poisson")
tmp <- rbind(coefc, coefz)
else
tmp <- rbind(coefc, coefz, matrix(log(theta),nrow=1))
check_kkt(tmp)
}
theta <- RET1$thetaout
for(k in 1:nlambda)
ll[k] <- loglikfun(c(coefc[,k], coefz[,k], log(theta[k])))
ll <- sumw*ll
if(standardize) {
coefc <- as.matrix(coefc)
if(dim(coefc)[1] > 1){
coefc[-1,] <- coefc[-1,]/normx
coefc[1,] <- coefc[1,] - crossprod(meanx, coefc[-1,])
}
coefz <- as.matrix(coefz)
if(dim(coefz)[1] > 1){
coefz[-1,] <- coefz[-1,]/normz
coefz[1,] <- coefz[1,] - crossprod(meanz, coefz[-1,])
}
}
if(family != "negbin") theta <- NULL
else names(theta) <- lambda.count
rownames(coefc) <- names(start$count) <- colnames(X)
rownames(coefz) <- names(start$zero) <- colnames(Z)
if(nlambda > 1){
if(lambda.count[1] > 0.0001)
colnames(coefc) <- round(lambda.count, digits=4)
else colnames(coefc) <- round(lambda.count, digits=8)
if(lambda.zero[1] > 0.0001)
colnames(coefz) <- round(lambda.zero, digits=4)
else colnames(coefz) <- round(lambda.zero, digits=8)
}
## fitted and residuals
mu <- exp(Xold %*% coefc + offsetx)
phi <- linkinv(Zold %*% coefz + offsetz)
Yhat <- (1-phi) * mu
#res <- sqrt(weights) * (Y - Yhat)
res <- sqrt(weights*sumw) * (Y - Yhat)
## effective observations
nobs <- sum(weights > 0) ## = n - sum(weights == 0)
dfc <- apply(abs(coefc) > 0, 2, sum)
if(family=="negbin") dfc <- 1+dfc
dfz <- apply(abs(coefz) > 0, 2, sum)
aic <- -2*ll + 2*(dfc+dfz)
bic <- -2*ll + log(n)*(dfc+dfz)
if(shortlist)
rval <- list(coefficients = list(count = coefc, zero = coefz),
lambda.count = lambda.count,
lambda.zero = lambda.zero,
alpha.count = alpha.count,
alpha.zero = alpha.zero,
gamma.count = gamma.count,
gamma.zero = gamma.zero,
loglik = ll,
aic = aic,
bic = bic,
family = family,
nlambda = nlambda,
theta = theta)
else
rval <- list(coefficients = list(count = coefc, zero = coefz),
residuals = res,
fitted.values = Yhat,
#satu = model_zero$satu,
nlambda=nlambda,
lambda.count = lambda.count,
lambda.zero = lambda.zero,
alpha.count = alpha.count,
alpha.zero = alpha.zero,
gamma.count = gamma.count,
gamma.zero = gamma.zero,
start = start,
#weights = if(identical(as.vector(weights), rep.int(1L, n))) NULL else weights,
weights = weights*sumw,
offset = list(count = if(identical(offsetx, rep.int(0, n))) NULL else offsetx,
zero = if(identical(offsetz, rep.int(0, n))) NULL else offsetz),
n = nobs,
df.null = nobs - 2,
df.residual = dfc + dfz,
# terms = list(count = mtX, zero = mtZ, full = mt),
theta = theta,
theta.fixed = theta.fixed,
loglik = ll,
aic = aic,
bic = bic,
family = family,
theta.fixed=theta.fixed,
penalty=penalty,
link = linkstr,
linkinv = linkinv)
if(y) rval$y <- Y
if(x) rval$x <- list(count = Xold, zero = Zold)
class(rval) <- "zipath"
return(rval)
}
zipath.control <- function(method = "BFGS", maxit.em=50, maxit = 10000, trace = FALSE, EM = TRUE, start = NULL, ...) {
rval <- list(method = method, maxit.em = maxit.em, maxit = maxit, trace = trace, EM = EM, start = start)
rval <- c(rval, list(...))
rval
}
coef.zipath <- function(object, which=1:object$nlambda, model = c("full", "count", "zero"), ...) {
model <- match.arg(model)
rval <- object$coefficients
rval <- switch(model,
"full" = list(count=rval$count[, which], zero=rval$zero[, which]),
"count" = rval$count[,which],
"zero" = rval$zero[,which])
rval
}
print.zipath <- function(x, digits = max(3, getOption("digits") - 3), ...)
{
cat("\nCall:", deparse(x$call, width.cutoff = floor(getOption("width") * 0.85)), "", sep = "\n")
cat(paste("Count model coefficients (", x$family, " with log link):\n", sep = ""))
print.default(format(x$coefficients$count, digits = digits), print.gap = 2, quote = FALSE)
if(x$family == "negbin"){
cat("\nTheta:", round(x$theta, digits),"\n")} else cat("\n")
cat(paste("\nZero-inflation model coefficients (binomial with ", x$link, " link):\n", sep = ""))
print.default(format(x$coefficients$zero, digits = digits), print.gap = 2, quote = FALSE)
cat("\n")
# }
invisible(x)
}
summary.zipath <- function(object,...)
{
## residuals
object$residuals <- residuals(object, type = "pearson")
if(object$family == "negbin") {
object$coefficients$count <- rbind(object$coefficients$count, "Log(theta)" = log(object$theta))
}
object$fitted.values <- object$terms <- object$model <- object$y <-
object$x <- object$levels <- object$contrasts <- object$start <- NULL
class(object) <- "summary.zipath"
object
}
print.summary.zipath <- function(x, digits = max(3, getOption("digits") - 3), ...)
{
cat("\nCall:", deparse(x$call, width.cutoff = floor(getOption("width") * 0.85)), "", sep = "\n")
cat(paste("\nCount model coefficients (", x$family, " with log link):\n", sep = ""))
printCoefmat(x$coefficients$count, digits = digits, signif.legend = FALSE)
cat(paste("\nZero-inflation model coefficients (binomial with ", x$link, " link):\n", sep = ""))
printCoefmat(x$coefficients$zero, digits = digits, signif.legend = FALSE)
if(x$family == "negbin"){
cat("\nTheta:", round(x$theta, digits),"\n")} else cat("\n")
cat("Log-likelihood:", formatC(x$loglik, digits = digits), "\n")
#cat("Converged:", x$converged, "\n")
invisible(x)
}
### Based on predict.zeroinfl in R package pscl
predict.zipath <- function(object, newdata, which=1:object$nlambda, type = c("response", "prob", "count", "zero", "nonzero"),
na.action = na.pass, at = NULL, ...)
{
type <- match.arg(type)
if(type=="nonzero"){
nbetacount=object$coefficients$count[,which]
nbetazero=object$coefficients$zero[,which]
if(length(which)>1)
return(list(nbetacount=eval(parse(text="glmnet:::nonzeroCoef(nbetacount[-1,,drop=FALSE],bystep=TRUE)")),
nbetazero=eval(parse(text="glmnet:::nonzeroCoef(nbetazero[-1,,drop=FALSE],bystep=TRUE"))))
else return(list(nbetacount=which(abs(nbetacount[-1]) > 0), nbetazero=which(abs(nbetazero[-1]) > 0)))
}
predictzeroinfl1(object, newdata, which, type,
na.action = na.pass, at = at, ...)
}
### based on pscl package predict.zeroinfl function
predictzeroinfl1 <- function(object, newdata, which=1:object$nlambda, type = c("response", "prob", "count", "zero"),
na.action = na.pass, at = NULL, ...)
{
type <- match.arg(type)
object$coefficients$count <- object$coefficients$count[,which]
object$coefficients$zero <- object$coefficients$zero[,which]
object$nlambda <- length(which)
## if no new data supplied
if(missing(newdata)) {
rval <- object$fitted.values[,which]
if(type != "response") {
if(!is.null(object$x)) {
X <- object$x$count
Z <- object$x$zero
} else if(!is.null(object$model)) {
X <- model.matrix(object$terms$count, object$model, contrasts = object$contrasts$count)
Z <- model.matrix(object$terms$zero, object$model, contrasts = object$contrasts$zero)
} else {
stop("predicted probabilities cannot be computed with missing newdata")
}
offsetx <- if(is.null(object$offset$count)) rep.int(0, NROW(X)) else object$offset$count
offsetz <- if(is.null(object$offset$zero)) rep.int(0, NROW(Z)) else object$offset$zero
mu <- exp(X %*% object$coefficients$count + offsetx)
phi <- object$linkinv(Z %*% object$coefficients$zero + offsetz)
}
} else {
mf <- model.frame(delete.response(object$terms$full), newdata, na.action = na.action, xlev = object$levels)
X <- model.matrix(delete.response(object$terms$count), mf, contrasts = object$contrasts$count)
Z <- model.matrix(delete.response(object$terms$zero), mf, contrasts = object$contrasts$zero)
offsetx <- model_offset_2(mf, terms = object$terms$count, offset = FALSE)
offsetz <- model_offset_2(mf, terms = object$terms$zero, offset = FALSE)
if(is.null(offsetx)) offsetx <- rep.int(0, NROW(X))
if(is.null(offsetz)) offsetz <- rep.int(0, NROW(Z))
if(!is.null(object$call$offset)) offsetx <- offsetx + eval(object$call$offset, newdata)
mu <- exp(X %*% object$coefficients$count + offsetx)
phi <- object$linkinv(Z %*% object$coefficients$zero + offsetz)
rval <- (1-phi) * mu
}
## predicted means for count/zero component
if(type == "count") rval <- mu
if(type == "zero") rval <- phi
## predicted probabilities
if(type == "prob") {
if(!is.null(object$y)) y <- object$y
else if(!is.null(object$model)) y <- model.response(object$model)
else stop("predicted probabilities cannot be computed for fits with y = FALSE and model = FALSE")
yUnique <- if(is.null(at)) 0:max(y) else at
nUnique <- length(yUnique)
rval <- vector("list", length=object$nlambda)
for(j in 1:length(rval)){
rval[[j]] <- matrix(NA, nrow = length(y), ncol = nUnique)
}
for(j in 1:object$nlambda){
switch(object$family,
"poisson" = {
rval[[j]][, 1] <- phi[,j] + (1-phi[,j]) * exp(-mu[,j])
for(i in 2:nUnique) rval[[j]][,i] <- (1-phi[,j]) * dpois(yUnique[i], lambda = mu[,j])
},
"negbin" = {
theta <- object$theta[j]
rval[[j]][, 1] <- phi[,j] + (1-phi[,j]) * dnbinom(0, mu = mu[,j], size = theta[j])
for(i in 2:nUnique) rval[[j]][,i] <- (1-phi[,j]) * dnbinom(yUnique[i], mu = mu[,j], size = theta[j])
},
"geometric" = {
rval[[j]][, 1] <- phi[,j] + (1-phi[,j]) * dnbinom(0, mu = mu[,j], size = 1)
for(i in 2:nUnique) rval[[j]][,i] <- (1-phi[,j]) * dnbinom(yUnique[i], mu = mu[,j], size = 1)
})
}
}
rval
}
fitted.zipath <- function(object, ...) {
object$fitted.values
}
residuals.zipath <- function(object, type = c("pearson", "response"), ...) {
type <- match.arg(type)
res <- object$residuals
switch(type,
"response" = {
return(res)
},
"pearson" = {
mu <- predict(object, type = "count")
phi <- predict(object, type = "zero")
theta1 <- switch(object$family,
"poisson" = 0,
"geometric" = 1,
"negbin" = 1/object$theta)
vv <- object$fitted.values * (1 + (phi + theta1) * mu)
return(res/sqrt(vv))
})
}
terms.zipath <- function(x, model = c("count", "zero"), ...) {
x$terms[[match.arg(model)]]
}
model.matrix.zipath <- function(object, model = c("count", "zero"), ...) {
model <- match.arg(model)
if(!is.null(object$x)) rval <- object$x[[model]]
else if(!is.null(object$model)) rval <- model.matrix(object$terms[[model]], object$model, contrasts = object$contrasts[[model]])
else stop("not enough information in fitted model to return model.matrix")
return(rval)
}
### evaluation of penalty function
###consider enet: \lambda_1 |theta| + 1/2 * \lambda_2 theta^2, mnet and snet penalty
pen_eval <- function(theta, lone, ltwo, gamma,
penalty = c("enet", "mnet", "snet")){
if(lone < 0 || ltwo < 0) stop("lone and ltwo should be non-negative values\n")
penalty <- match.arg(penalty)
if(!penalty %in% c("enet", "mnet", "snet"))
stop("penalty type ", penalty, " is not implemented\n")
if(penalty == "mnet"){
if(gamma <= 1) stop("gamma should > 1 for mnet penenaly\n")
if(abs(theta) <= (gamma * lone)) return(lone * abs(theta) - theta^2/(2*gamma) + 0.5*ltwo*theta^2)
else return(0.5*gamma*lone^2 + 0.5*ltwo*theta^2)
}
if(penalty == "snet"){
if(gamma <= 2) stop("gamma should > 2 for snet penalty\n")
if(abs(theta) <= lone) return(lone*abs(theta) + 0.5*ltwo*theta^2)
if(abs(theta) <= (gamma * lone)) return((gamma * lone * abs(theta) - 0.5*(theta^2+lone^2))/(gamma-1) + 0.5*ltwo*theta^2)
return(lone^2*(gamma^2 - 1)/(2*(gamma-1)) + 0.5*ltwo*theta^2)
}
if(penalty == "enet")
return(abs(theta)*lone + 0.5*ltwo*theta^2)
}
### evaluation of second derivative of penalty function
###consider enet: \lambda_1 |theta| + 1/2 * \lambda_2 theta^2, mnet and snet penalty
pen2_eval <- function(theta, lone, ltwo, gamma,
penalty = c("enet", "mnet", "snet")){
if(lone < 0 || ltwo < 0) stop("lone and ltwo should be non-negative values\n")
penalty <- match.arg(penalty)
if(!penalty %in% c("enet", "mnet", "snet"))
stop("penalty type ", penalty, " is not implemented\n")
if(penalty == "mnet"){
if(gamma <= 1) stop("gamma should > 1 for mnet penalty\n")
if(abs(theta) <= (gamma * lone)) return(-1/gamma+ltwo)
else return(ltwo)
}
if(penalty == "snet"){
if(gamma <= 2) stop("gamma should > 2 for snet penalty\n")
if(abs(theta) > lone && abs(theta) < (gamma * lone)) return(-1/(gamma-1) + ltwo)
else return(ltwo)
}
if(penalty == "enet")
return(ltwo)
}
### convert zeroinfl object to class zipath, thus can be used to predict newdata
conv2zipath <- function(object, family=c("poisson", "negbin", "geometric")){
family <- match.arg(family)
if(inherits(object, "zeroinfl")) class(object) <- "zipath"
object$family <- family
object$nlambda <- 1
object$fitted.values <- matrix(object$fitted.values, ncol=1)
namecount <- names(object$coefficients$count)
namezero <- names(object$coefficients$zero)
object$coefficients$count <- matrix(object$coefficients$count, ncol=1)
object$coefficients$zero <- matrix(object$coefficients$zero, ncol=1)
rownames(object$coefficients$count) <- namecount
rownames(object$coefficients$zero) <- namezero
object
}
predprob.zipath <- function(obj, which, at, ...) {
if(length(which) > 1)
stop("length of which should be 1\n")
predict(obj, type = "prob", which=which, at=at, ...)[[1]]
}
hessianReg <- function(x, which, ...)
UseMethod("hessianReg")
hessianReg.zipath <- function(x, which, log=FALSE, ...) {
## extract data
Y <- if(is.null(x$y)) model.response(model.frame(x)) else x$y
X <- model.matrix(x, model = "count")
Z <- model.matrix(x, model = "zero")
beta <- coef(x, model = "count", which=which)
gamma <- coef(x, model = "zero", which=which)
kx1 <- abs(beta) > 0
kx2 <- which(kx1)
kz1 <- abs(gamma) > 0
kz2 <- which(kz1)
X <- X[,kx2]
Z <- Z[,kz2]
if(length(kx2)==1) X <- matrix(X)
if(length(kz2)==1) Z <- matrix(Z)
beta <- beta[kx2]
gamma <- gamma[kz2]
if(x$family=="negbin")
theta <- x$theta[which]
offset <- x$offset
if(is.list(offset)) {
offsetx <- offset$count
offsetz <- offset$zero
} else {
offsetx <- offset
offsetz <- NULL
}
if(is.null(offsetx)) offsetx <- 0
if(is.null(offsetz)) offsetz <- 0
linkobj <- make.link(x$link)
# wts <- weights(x)
weights <- weights(x)
if(is.null(weights)) weights <- 1
Y0 <- Y <= 0
Y1 <- Y > 0
eta <- as.vector(X %*% beta + offsetx)
mu <- exp(eta) ### \mu_i in paper
etaz <- as.vector(Z %*% gamma + offsetz)
muz <- linkobj$linkinv(etaz)
if(x$family=="negbin"){
g1 <- theta/(theta+mu) ### r_i in paper
g1t <- g1^theta ### s_i in paper
g2 <- mu/(theta+mu) ### t_i in paper
### warning: check if this works for other links: muz=exp(etaz)/(1+exp(etaz))?
v <- muz ### p_i in paper
etaz.e <- exp(etaz) ### \xi_i in paper
h <- etaz.e + g1t ### h_i in paper
p <- dim(X)[2]
q <- dim(Z)[2]
rval <- matrix(NA, p+q+1, p+q+1)
### w.r.t (theta,theta)
rv0 <- g1t*theta^3*(log(g1))^2*etaz.e+2*g1t*log(g1)*mu*etaz.e*(log(g1)+1)*theta^2 + mu^2*etaz.e*g1t*(2*log(g1)+1+(log(g1))^2)*theta+mu^2*etaz.e*g1t+mu^2*g1t^2
rv0 <- rv0/(theta*(theta+mu)^2*h^2)
rv1 <- trigamma(Y+theta)-trigamma(theta)+(Y+theta)/((theta+mu)^2)+1/theta*g2-1/(theta+mu)
rval[p+q+1, p+q+1] <- sum(weights[Y0]*rv0[Y0])+sum(weights[Y1]*rv1[Y1])
if(log) rval[p+q+1, p+q+1] <- theta^2*rval[p+q+1, p+q+1] # for log(theta)
### for X coef partial derivative w.r.t. (beta_j, beta_k)
for(j in 1:p)
for(k in j:p){
rv <- ifelse(Y1, -theta*g2^2/mu*(theta+Y),
-theta^2*mu*(-g1t*mu*etaz.e+etaz.e*g1t+g1t^2)/(((theta+mu)*h)^2))
rval[j,k] <- sum(weights*X[,j]*X[,k]*rv)
}
### for X coef and Z coef (beta, gamma)
for(j in 1:p)
for(k in (p+1):(p+q)){
rv0 <- theta*X[,j]*Z[,k-p]*g1^theta*g2*etaz.e/h^2
rval[j,k] <- sum(weights[Y0]*rv0[Y0])
}
### for (beta, theta)
for(j in 1:p){
rv0 <- -(theta^2*g1t*log(g1)*etaz.e+theta*g1t*log(g1)*mu*etaz.e+g1t*theta*mu*etaz.e+g1t*mu*etaz.e+g1t^2*mu)*mu
rv0 <- rv0/(((theta+mu)*h)^2)
rv1 <- -mu*(-Y+mu)/((theta+mu)^2)
rval[j, p+q+1] <- sum(weights[Y0]*rv0[Y0]*X[Y0,j]) + sum(weights[Y1]*rv1[Y1]*X[Y1,j])
if(log) rval[j, p+q+1] <- theta*rval[j, p+q+1] ### if theta was parameterized as log(theta), see gradfun
}
###w.r.t. (gamma_j, gamma_k)
for(j in (p+1):(p+q))
for(k in j:(p+q)){
rv <- ifelse(Y1, -v*(1-v), -v+v^2+etaz.e/h-(etaz.e/h)^2)
rval[j,k] <- sum(weights*Z[,j-p]*Z[,k-p]*rv)
}
###w.r.t. {theta, gamma}
for(k in (p+1):(p+q)){
rv0 <- g1t*(log(g1)+g2)*etaz.e*Z[,k-p]
#rv0 <- g1t*(log(g1)+(1/(theta+mu)-theta/(theta+mu)^2)*(theta+mu))*etaz.e*Z[,k-p]
rv0 <- -rv0/h^2
rval[k,p+q+1] <- sum(weights[Y0]*rv0[Y0])
if(log) rval[k,p+q+1] <- theta * rval[k, p+q+1] ### if theta was parameterized as log(theta)
}
rval[lower.tri(rval)] = t(rval)[lower.tri(rval)]
}
# if(log)
# colnames(rval) <- c(names(coef(x)$count), names(coef(x)$zero), "log(theta)")
# else colnames(rval) <- c(names(coef(x)$count), names(coef(x)$zero), "theta")
#else colnames(rval) <- c(names(coef(x)), "theta")
# rownames(rval) <- colnames(rval)
return(rval)
}
### derived from sandwich function in R package sandwich
sandwichReg <- function(x, breadreg.=breadReg, meatreg.=meatReg, which, log=FALSE, ...){
if(missing(which)) which <- 1:x$nlambda
RET <- vector("list", length=x$nlambda)
for(k in which){
if(is.function(breadReg)) breadreg. <- breadreg.(x, k, log=log)
if(is.function(meatReg)) meatreg. <- meatreg.(x, k, log=log)
n <- x$n
RET[[k]] <- 1/n*(breadreg. %*% meatreg. %*% breadreg.)
}
RET
}
meatReg <- function(x, which, ...)
UseMethod("meatReg")
meatReg.zipath <- function(x, which, log=FALSE, ...){
obj1 <- x
k <- which
count <- as.matrix(coef(x)$count)[,k]
zero <- as.matrix(coef(x)$zero)[,k]
obj1$coefficients <- list(count=matrix(count, ncol=1), zero=matrix(zero, ncol=1))
obj1$nlambda <- 1
if(x$family=="negbin")
obj1$theta <- x$theta[k]
obj1$fitted.values <- x$fitted.values[,k]
grad <- estfunReg(obj1, log=log)
kx1 <- abs(count) > 0
kx2 <- which(kx1)
kz1 <- abs(zero) > 0
kz2 <- which(kz1)
if(x$family=="negbin") npar <- c(kx2, length(kz1)+kz2, dim(grad)[2])
else npar <- c(kx2, length(kz1)+kz2)
grad <- grad[,npar]
n <- NROW(grad)
tmp <- apply(grad, 2, mean)
RET <- 1/n*crossprod(grad) - tmp %o% tmp ### note there is an additional 2nd item for penalized regression. in ordinary regression, the 2nd item=0
# rownames(RET) <- colnames(RET)
return(RET)
}
breadReg <- function(x, which, ...)
UseMethod("breadReg")
breadReg.zipath <- function(x, which, log=FALSE, ...){
family <- x$family
n <- x$n
kx <- dim(x$coefficients$count)[1]
kx1 <- abs(x$coefficients$count[,which]) > 0
kx2 <- which(kx1)
# kz <- dim(x$coefficients$zero)[1]
kz1 <- abs(x$coefficients$zero[,which]) > 0
kz2 <- which(kz1)
sigmax <- rep(0, sum(kx1))
sigmaz <- rep(0, sum(kz1)+(family=="negbin"))
#sigma <- rep(0, sum(kx1)+sum(kz1)+(family=="negbin"))
#sigma <- rep(0, kx+kz+(family=="negbin"))
k <- which
start <- x$coefficient
if(length(kx2) > 1)
for(ss in 2:length(kx2))
sigmax[ss] <- pen2_eval(abs(start$count[kx2[ss],k]), lone=x$lambda.count[k]*x$alpha.count, ltwo=x$lambda.count[k]*(1-x$alpha.count), gamma=x$gamma.count, penalty=x$penalty)
if(length(kz2) > 1)
for(ss in 2:length(kz2))
sigmaz[ss] <- pen2_eval(abs(start$zero[kz2[ss],k]), lone=x$lambda.zero[k]*x$alpha.zero, ltwo=x$lambda.zero[k]*(1-x$alpha.zero), gamma=x$gamma.zero, penalty=x$penalty)
sigma <- c(sigmax, sigmaz)
hes <- hessianReg(x, which, log=log)
s1 <- try(-solve(hes - n*diag(sigma))*n)
return(s1)
}
estfunReg <- function(x, ...)
UseMethod("estfunReg")
estfunReg.zipath <- function(x, which=1, log=FALSE, ...) {
## extract data
if(length(which)!=1) stop("argument which should be one integer only\n")
Y <- if(is.null(x$y)) model.response(model.frame(x)) else x$y
X <- model.matrix(x, model = "count")
Z <- model.matrix(x, model = "zero")
beta <- coef(x, model = "count")
gamma <- coef(x, model = "zero")
if(x$nlambda > 1){
beta <- beta[,which]
gamma <- gamma[,which]
}
if(x$family=="negbin")
theta <- x$theta[which]
offset <- x$offset
if(is.list(offset)) {
offsetx <- offset$count
offsetz <- offset$zero
} else {
offsetx <- offset
offsetz <- NULL
}
if(is.null(offsetx)) offsetx <- 0
if(is.null(offsetz)) offsetz <- 0
linkobj <- make.link(x$link)
wts <- weights(x)
if(is.null(wts)) wts <- 1
Y1 <- Y > 0
eta <- as.vector(X %*% beta + offsetx)
mu <- exp(eta)
etaz <- as.vector(Z %*% gamma + offsetz)
muz <- linkobj$linkinv(etaz)
## density for y = 0
clogdens0 <- switch(x$family,
"poisson" = -mu,
"geometric" = dnbinom(0, size = 1, mu = mu, log = TRUE),
"negbin" = dnbinom(0, size = theta, mu = mu, log = TRUE))
dens0 <- muz * (1 - as.numeric(Y1)) + exp(log(1 - muz) + clogdens0)
## working residuals
wres_count <- switch(x$family,
"poisson" = ifelse(Y1, Y - mu, -exp(-log(dens0) + log(1 - muz) + clogdens0 + log(mu))),
"geometric" = ifelse(Y1, Y - mu * (Y + 1)/(mu + 1), -exp(-log(dens0) +
log(1 - muz) + clogdens0 - log(mu + 1) + log(mu))),
"negbin" = ifelse(Y1, Y - mu * (Y + theta)/(mu + theta), -exp(-log(dens0) +
log(1 - muz) + clogdens0 + log(theta) - log(mu + theta) + log(mu))))
wres_zero <- ifelse(Y1, -1/(1-muz) * linkobj$mu.eta(etaz),
(linkobj$mu.eta(etaz) - exp(clogdens0) * linkobj$mu.eta(etaz))/dens0)
wres_theta <- ifelse(Y1, digamma(Y + theta) - digamma(theta) +
log(theta) - log(mu + theta) + 1 - (Y + theta)/(mu + theta),
exp(-log(dens0) + log(1 - muz) + clogdens0) *
(log(theta) - log(mu + theta) + 1 - theta/(mu + theta)))
## compute gradient from data
if(log)
rval <- cbind(wres_count * wts * X, wres_zero * wts * Z, theta*wres_theta)
else rval <- cbind(wres_count * wts * X, wres_zero * wts * Z, wres_theta)
if(x$family!="negbin")
colnames(rval) <- c(names(coef(x)$count), names(coef(x)$zero))
else{
# if(log)
# colnames(rval) <- c(names(coef(x)$count), names(coef(x)$zero), "log(theta)")
# else colnames(rval) <- c(names(coef(x)$count), names(coef(x)$zero), "theta")
}
# rownames(rval) <- rownames(X)
return(rval)
}
se <- function(x, which, log=TRUE, ...)
UseMethod("se")
se.zipath <- function(x, which, log=TRUE, ...){
if(x$family=="poisson"){
###use a large theta of negbin distribution to approximate poisson distribution
x$family <- "negbin"
x$theta.fixed <- TRUE
x$theta <- rep(10000, x$nlambda)
}
if(identical(as.vector(x$weights), rep.int(1L, x$n))) NULL
else if(all(diff(x$weights)==0)) x$weights <- rep.int(1L, x$n)
else stop("The results may be incorrect if weights are different\n")
if(is.null(which)) stop("which has no numerical value\n")
if(length(which) > 1) stop("which is an integer value\n")
tmp <- sandwichReg(x, which=which, log=log)
tmp0 <- sqrt(diag(tmp[[which]]))
varcount <- which(abs(coef(x)$count[,which]) > 0)
varzero <- which(abs(coef(x)$zero[,which]) > 0)
tmp1 <- rownames(x$coefficient$count)[varcount]
kx <- length(tmp1)
tmp2 <- rownames(x$coefficient$zero)[varzero]
kz <- length(tmp2)
res <- list(count=tmp0[1:kx],zero=tmp0[(kx+1):(kx+kz)])
names(res$count) <- tmp1
names(res$zero) <- tmp2
if(x$family=="negbin" & !x$theta.fixed)
res$theta <- tmp0[length(tmp0)]
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.