MBnegbin | R Documentation |
Family generators for model-based boosting of count data regressions using mboost.
MBbinomial(link = "logit")
MBnegbin(theta = NULL, link = "log",
control = list(reltol = .Machine$double.eps^(1/1.5), maxit = 500))
MBztpoisson(link = "log",
control = list(reltol = .Machine$double.eps^(1/1.5), maxit = 500))
MBztnegbin(theta = NULL, link = "log",
control = list(reltol = .Machine$double.eps^(1/1.5), maxit = 500))
link |
character or object of class |
theta |
numeric or |
control |
list with control parameters passed to
|
The family generators MBbinomial
, MBnegbin
, MBztpoisson
, MBztnegbin
enable
boosting of binary regressions, negative binomial count regressions, zero-truncated
Poisson count regressions, and zero-truncated negative binomial count regressions, respectively.
Family MBbinomial
is comparable
to Binomial
but supports any link function (not just logit and probit).
Family MBnegbin
is comparable to NBinomial
but is typically
much faster because the nuisance parameter theta
is estimated using analytical
gradients (via optim
) and setting better starting values.
MBztpoisson
and MBztnegbin
enable zero-truncated Poisson and negative binomial regressions so that
also the count parts of hurdle models can be easily estimated.
An object of class boost_family_glm
.
mboost
,
glmboost
,
gamboost
,
Binomial
,
Poisson
,
NBinomial
### Negative binomial regression for CrabSatellites ----------------------------
if(require("mboost")) {
## crab satellite data using ordered factors as numeric
data("CrabSatellites", package = "countreg")
CrabSatellites <- transform(CrabSatellites,
color = as.numeric(color),
spine = as.numeric(spine)
)
## comparison of ML and boosting with NBinomial() vs. MBnegbin()
system.time(m0 <- glm.nb(satellites ~ width + color, data = CrabSatellites))
system.time(m1 <- glmboost(satellites ~ width + color, data = CrabSatellites,
family = NBinomial(), control = boost_control(mstop = 500)))
system.time(m2 <- glmboost(satellites ~ width + color, data = CrabSatellites,
family = MBnegbin(), control = boost_control(mstop = 500)))
## note that mstop is _not_ tuned here to (ab)use mboost to get the ML estimator
## compare coefficients
cbind(c(coef(m0), "theta" = m0$theta),
c(coef(m1, off2int = TRUE, which = ""), nuisance(m1)),
c(coef(m1, off2int = TRUE, which = ""), nuisance(m1))
)
}
### Hurdle regression for CrabSatellites using spline terms --------------------
if(require("mboost")) {
## ML estimation
g <- hurdle(satellites ~ width + color, data = CrabSatellites, dist = "negbin")
summary(g)
## boosting of zero hurdle
g0 <- gamboost(factor(satellites > 0) ~ bbs(width) + bbs(color, knots = 3),
data = CrabSatellites, family = MBbinomial())
set.seed(0)
g0cv <- cvrisk(g0)
g0[mstop(g0cv)]
## boosting of count regression
g1 <- gamboost(satellites ~ bbs(width) + bbs(color, knots = 3),
data = subset(CrabSatellites, satellites > 0), family = MBztnegbin())
set.seed(1)
g1cv <- cvrisk(g1)
g1[mstop(g1cv)]
par(mfrow = c(1, 2))
## optimal mstop values
plot(g0cv)
plot(g1cv)
## -> no effects in covariates for count part
## partial effects in zero hurdle
plot(g0)
## -> large effect of width, moderate effect of color with
## width effect almost linear
}
### Hurdle regression for RecreationDemand using linear terms ------------------
## Not run:
library("mboost")
data("RecreationDemand", package = "AER")
### Zero hurdle ##
## ML vs. boosting
z0 <- glm(factor(trips > 0) ~ ., data = RecreationDemand, family = binomial)
z1 <- glmboost(factor(trips > 0) ~ ., data = RecreationDemand, family = MBbinomial(),
control = boost_control(mstop = 5000))
plot(z1)
## tune mstop
set.seed(0)
z1cv <- cvrisk(z1)
z1cv
plot(z1cv)
## very flat (presumably due to separation?)
## -> stop earlier manually
z1[3000]
## compare coefficients
cbind(coef(z0), coef(z1, off2int = TRUE, which = ""))
## -> some shrunken entirely to zero,
## coefficient of variable with separation (userfee) shrunken considerably
### Count (zero-truncated)
## ML and boosting count part
c0 <- zerotrunc(trips ~ ., data = subset(RecreationDemand, trips > 0), dist = "negbin")
c1 <- glmboost(trips ~ ., data = subset(RecreationDemand, trips > 0),
family = MBztnegbin(), control = boost_control(mstop = 5000))
plot(c1)
## tune mstop
set.seed(0)
c1cv <- cvrisk(c1)
c1cv
plot(c1cv)
## use mstop from cvrisk
c1[mstop(c1cv)]
## compare coefficients
cbind(c(coef(c0), "theta" = c0$theta),
c(coef(c1, off2int = TRUE, which = ""), nuisance(c1)))
## -> similar
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.