###Adapted from file MASS/R/negbin.R
glmregNB <- function(formula, data, weights, offset=NULL, nlambda=100, lambda=NULL, lambda.min.ratio=ifelse(nobs<nvars,.05, .001),alpha=1, gamma=3, rescale=TRUE, standardize=TRUE, penalty.factor = rep(1, nvars), thresh=1e-3, maxit.theta=10, maxit=1000, eps=.Machine$double.eps, trace=FALSE, start = NULL, etastart = NULL, mustart = NULL, theta.fixed=FALSE, theta0=NULL, init.theta=NULL, link=log, penalty=c("enet","mnet","snet"), method="glmreg_fit", model=TRUE, x.keep=FALSE, y.keep=TRUE, contrasts=NULL, convex=FALSE, parallel=TRUE, n.cores=2, ...)
{
if(!theta.fixed) init.theta <- theta0[1]
if(theta.fixed)
if(length(theta0)!=nlambda)
stop("length of theta0 must be the same as nlambda if theta.fixed=TRUE\n")
else if(any(theta0 <= 0))
stop("theta0 must be positive\n")
penalty <- match.arg(penalty)
if(penalty=="enet") convex=FALSE
loglik <- function(n, th, mu, y, w)
sum(w*(lgamma(th + y) - lgamma(th) - lgamma(y + 1) + th * log(th) +
y * log(mu + (y == 0)) - (th + y) * log(th + mu)))
link <- substitute(link)
fam0 <- if(missing(init.theta) || is.null(init.theta))
do.call("poisson", list(link = link))
else
do.call("negative.binomial", list(theta = init.theta, link = link))
dots <- list(...)
mf <- Call <- match.call()
m <- match(c("formula", "data", "subset", "weights", "na.action",
"etastart", "mustart", "offset"), names(mf), 0)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- as.name("model.frame")
mf <- eval.parent(mf)
Terms <- attr(mf, "terms")
if(method == "model.frame") return(mf)
Y <- model.response(mf, "numeric")
## null model support
X <- if (!is.empty.model(Terms)) model.matrix(Terms, mf, contrasts) else matrix(,NROW(Y),0)
w <- model.weights(mf)
nm <- dim(X[,-1])
nobs <- n <- nm[1]
nvars <- m <- nm[2]
if(is.null(penalty.factor))
penalty.factor <- rep(1, nvars)
# if(missing(weights)) w <- weights <- rep(1, length(y))
if(!length(w)) w <- weights <- rep(1, nrow(mf))
else w <- weights
if(any(w < 0)) stop("negative weights not allowed")
offset <- model.offset(mf)
## these allow starting values to be expressed in terms of other vars.
mustart <- model.extract(mf, "mustart")
etastart <- model.extract(mf, "etastart")
### thoughts: use glm.nb to estimate intercept-only model which can take care of theta estimate
if(is.null(lambda)){
lmax <- findlam(x=X[,-1], y=Y, weights=weights, family="negbin", theta=NULL, w=NULL, z=NULL, alpha=alpha, penalty.factor=penalty.factor, standardize=standardize)
lpath <- seq(log(lmax), log(lambda.min.ratio * lmax), length.out=nlambda)
lambda <- exp(lpath)
}
else
nlambda <- length(lambda)
if(standardize){
wt <- weights/sum(weights)
one <- rep(1, length(Y))
Xnew <- X[,-1]
if(dim(Xnew)[2] > 0){
xx <- Xnew
meanx <- drop(wt %*% Xnew)
Xnew <- scale(Xnew, meanx, FALSE) # centers x
Xnew <- sqrt(wt) * Xnew
normx <- sqrt(drop(one %*% (Xnew^2)))
X[,-1] <- Xnew <- scale(xx, meanx, normx)
}
}
beta <- matrix(NA, ncol=nlambda, nrow=m)
fitted <- matrix(NA, ncol=nlambda, nrow=n)
pll <- matrix(NA, ncol=nlambda, nrow=maxit)
b0 <- tht <- nulldev <- resdev <- rep(NA, nlambda)
convout <- twologlik <- rep(NA, nlambda)
if(parallel){
warning("trace argument is ignored since parallel=TRUE")
cl <- parallel::makeCluster(n.cores)
registerDoParallel(cl)
fitall <- foreach(k=1:nlambda) %dopar%{
if(!missing(method)) {
if(!exists(method, mode = "function"))
stop("unimplemented method: ", sQuote(method))
glm.fitter <- get(method)
} else {
method <- "glm.fit"
glm.fitter <- stats::glm.fit
}
if(is.null(mustart) || missing(init.theta)){
fit <- glm.nb(Y ~ 1, weights=weights)
mu <- fit$fitted.values
th <- fit$theta
}
else{
mu <- mustart
th <- init.theta
}
if(theta.fixed) th <- theta0[k]
iter <- 0
#d1 <- sqrt(2 * max(1, fit$df.residual))
d2 <- del <- 1
Lm <- loglik(n, th, mu, Y, w)
converged <- FALSE
while((iter <- iter + 1) <= maxit.theta && !converged){
eta <- log(mu)
fit <- glmreg_fit(x=X[,-1], y=Y, weights=w, start = start, etastart=eta, mustart = mu, offset=offset, lambda=lambda[k],alpha=alpha,gamma=gamma,rescale=rescale, standardize=standardize, penalty.factor = penalty.factor, thresh=thresh, maxit=maxit, eps=eps, family="negbin", theta=th, trace=trace, penalty=penalty)
t0 <- th
mu <- fit$fitted.values
if(!theta.fixed){
# th <- theta.ml(Y, mu, sum(w), w, limit=maxit.theta,
th <- theta.ml(Y, mu, sum(w), w,
trace = trace)
}
else th <- theta0[k]
start <- c(fit$b0, fit$beta)
del <- t0 - th
Lm0 <- Lm
#penval <- ifelse(standardize, n*fit$penval, fit$penval)
#Lm <- loglik(n, th, mu, Y, w) - penval
Lm <- loglik(n, th, mu, Y, w)
fit$df.residual <- n - fit$df - 1
d1 <- sqrt(2 * max(1, fit$df.residual))
converged <- abs((Lm0 - Lm)/d1) + abs(del/d2) < 1e-8
}
Lm <- loglik(n, th, mu, Y, w)
fit$converged <- converged
fit$twologlik <- 2 * Lm
fit$th <- th
fit
}
#stopImplicitCluster()
parallel::stopCluster(cl)
for(k in 1:nlambda){
tht[k] <- fitall[[k]]$th
beta[,k] <- fitall[[k]]$beta
b0[k] <- fitall[[k]]$b0
fitted[,k] <- fitall[[k]]$fitted
convout[k] <- fitall[[k]]$converged
twologlik[k] <- fitall[[k]]$twologlik
nulldev[k] <- fitall[[k]]$nulldev
resdev[k] <- fitall[[k]]$resdev
if(trace)
pll[,k] <- fitall[[k]]$pll
}
fit <- fitall[[nlambda]] ### Why the last model?
}else{
k <- 1
while(k <= nlambda){
if(trace) message("loop in lambda:", k, "\n")
if(k==1){
if(trace){ cat("Initial fit family is:")
print(fam0)
}
if(trace) message("Initial fit:")
if(!missing(method)) {
if(!exists(method, mode = "function"))
stop("unimplemented method: ", sQuote(method))
glm.fitter <- get(method)
} else {
method <- "glm.fit"
glm.fitter <- stats::glm.fit
}
if(is.null(mustart) || missing(init.theta)){
fit <- glm.nb(Y ~ 1, weights=weights)
mu <- fit$fitted.values
th <- fit$theta
}
else{
mu <- mustart
th <- init.theta
}
if(trace)
message("Initial value for theta:", signif(th))
}
if(theta.fixed) th <- theta0[k]
iter <- 0
#d1 <- sqrt(2 * max(1, fit$df.residual))
d2 <- del <- 1
Lm <- loglik(n, th, mu, Y, w)
converged <- FALSE
while((iter <- iter + 1) <= maxit.theta && !converged){
eta <- log(mu)
fit <- glmreg_fit(x=X[,-1], y=Y, weights=w, start = start, etastart=eta, mustart = mu, offset=offset, lambda=lambda[k],alpha=alpha,gamma=gamma, rescale=rescale, standardize=standardize, penalty.factor = penalty.factor, thresh=thresh, maxit=maxit, eps=eps, family="negbin", theta=th, trace=trace, penalty=penalty)
t0 <- th
mu <- fit$fitted.values
if(!theta.fixed){
th <- theta.ml(Y, mu, sum(w), w,
trace = trace)
}
else th <- theta0[k]
start <- c(fit$b0, fit$beta)
del <- t0 - th
Lm0 <- Lm
#penval <- ifelse(standardize, n*fit$penval, fit$penval)
#Lm <- loglik(n, th, mu, Y, w) - penval # is penval needed?
Lm <- loglik(n, th, mu, Y, w)
fit$df.residual <- n - fit$df - 1
d1 <- sqrt(2 * max(1, fit$df.residual))
converged <- abs((Lm0 - Lm)/d1) + abs(del/d2) < 1e-8
if(trace) {
Ls <- loglik(n, th, Y, Y, w)
Dev <- 2 * (Ls - Lm)
message("Theta(", iter, ") =", signif(th),
", 2(Ls - Lm) =", signif(Dev))
}
}
if(!is.null(attr(th, "warn"))) fit$th.warn <- attr(th, "warn")
if(trace && iter > maxit.theta) {
warning("alternation limit reached")
fit$th.warn <- gettext("alternation limit reached")
}
tht[k] <- th
beta[,k] <- as.vector(fit$beta)
b0[k] <- fit$b0
Lm <- loglik(n, th, mu, Y, w)
twologlik[k] <- as.vector(2 * Lm)
nulldev[k] <- fit$nulldev
resdev[k] <- fit$resdev
if(trace) pll[,k] <- fit$pll
convout[k] <- converged
fitted[,k] <- fit$fitted.values
k <- k + 1
}
}
class(fit) <- c("glmregNB", "glmreg", "lm")
fit$terms <- Terms
fit$formula <- as.vector(attr(Terms, "formula"))
## make result somewhat reproducible, but not sure what does init.theta mean here!
Call$init.theta <- signif(as.vector(fit$theta), 10)
Call$link <- link
fit$call <- Call
if(model) fit$model <- mf
fit$na.action <- attr(mf, "na.action")
if(x.keep) fit$x <- data[,colnames(data)%in%attr(Terms, "term.labels")]
if(!y.keep) fit$y <- NULL
if(standardize) {
beta <- beta/normx
b0 <- b0 - crossprod(meanx, beta)
}
fit$twologlik <- twologlik
fit$df <- apply(abs(beta) > 0, 2, sum) ### excluding intercept and theta
fit$aic <- -fit$twologlik + 2*(2+fit$df)
fit$bic <- -fit$twologlik + log(n)*(2+fit$df)
fit$fitted.values <- fitted
fit$contrasts <- attr(X, "contrasts")
fit$xlevels <- .getXlevels(Terms, mf)
fit$method <- method
fit$offset <- fit$offset
if(is.null(colnames(X[,-1]))) varnames <- paste("V", 1:ncol(X[,-1]), sep="")
else varnames <- colnames(X[,-1])
dimnames(beta) <- list(varnames, round(lambda, digits=4))
fit$beta <- beta
fit$b0 <- matrix(b0, nrow=1)
fit$lambda <- lambda
fit$nlambda <- nlambda
fit$nulldev <- nulldev
fit$resdev <- resdev
if(trace)
fit$pll <- pll
else fit$pll <- NULL
fit$theta <- as.vector(tht)
fit$converged <- convout
fit
}
summary.glmregNB <- function(object, ...)
{
if(!is.null(object$th.warn))
summ <- object[c("theta", "twologlik", "th.warn")]
else summ <- object[c("theta", "twologlik")]
class(summ) <- "summary.glmregNB"
summ
}
print.summary.glmregNB <- function(x, ...)
{
NextMethod()
cat("\n Theta: ", format(round(x$theta, 3)),
"\n")
if(!is.null(x$th.warn))
cat("Warning while fitting theta:", x$th.warn,"\n")
cat("\n 2 x log-likelihood: ", format(round(x$twologlik, 3)), "\n")
invisible(x)
}
pen_sum <- function(b, n, lambda, alpha, gamma, penalty){
lc <- length(b)
pen1 <- rep(0, lc)
if(lc > 0){
for(j in 1:lc)
pen1[j] <- pen_eval(abs(b[j]), lone=lambda*alpha, ltwo=lambda*(1-alpha), gamma=gamma, penalty=penalty)
}
return(n*(sum(pen1)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.