library(knitr) opts_chunk$set( dev.args=list(family="Palatino")) options(width=68)
To install the stable version of [mcglm
][], use
devtools::install_git()
. For more information, visit mcglm/README.
library(devtools) install_git("wbonat/mcglm")
library(mcglm) packageVersion("mcglm")
The mcglm
package implements the multivariate covariance generalized
linear models (McGLMs) proposed by Bonat and J$\o$rgensen (2016).
In this introductory vignette we employed the mcglm
package for
fitting a set of generalized linear models and compare our results
with the ones obtained by ordinary R
functions like lm
and glm
.
Consider the count data obtained in Dobson (1990).
## Dobson (1990) Page 93: Randomized Controlled Trial : counts <- c(18,17,15,20,10,20,25,13,12) outcome <- gl(3,1,9) treatment <- gl(3,3) print(d.AD <- data.frame(treatment, outcome, counts))
Ordinary analysis using quasi-Poisson model.
fit.glm <- glm(counts ~ outcome + treatment, family = quasipoisson)
The orthodox quasi-Poisson model is obtained by specifying the variance
function as tweedie
and fix the power parameter at $1$.
Since, we are dealing with independent data, the matrix linear predictor
is composed of a diagonal matrix.
require(mcglm) require(Matrix) # Matrix linear predictor Z0 <- mc_id(d.AD) fit.qglm <- mcglm(linear_pred = c(counts ~ outcome + treatment), matrix_pred = list("resp1" = Z0), link = "log", variance = "tweedie", data = d.AD, control_algorithm = list(verbose = FALSE, method = "chaser", tuning = 0.8))
Comparing regression parameter estimates and standard errors.
cbind("GLM" = coef(fit.glm), "McGLM" = coef(fit.qglm, type = "beta")$Estimates) cbind("GLM" = sqrt(diag(vcov(fit.glm))), "McGLM" = coef(fit.qglm, type = "beta", std.error = TRUE)$Std.error)
Consider the example from Venables & Ripley (2002, p.189).
The response variable is continuous for which we can assume the
Gaussian distribution. In this example, we exemplify the use of the
offset
argument.
# Loading the data set utils::data(anorexia, package = "MASS") # GLM fit anorex.1 <- glm(Postwt ~ Prewt + Treat + offset(Prewt), family = gaussian, data = anorexia) # McGLM fit Z0 <- mc_id(anorexia) fit.anorexia <- mcglm(linear_pred = c(Postwt ~ Prewt + Treat), matrix_pred = list(Z0), offset = list(anorexia$Prewt), power_fixed = TRUE, data = anorexia, control_algorithm = list("correct" = TRUE))
Comparing parameter estimates and standard errors.
# Estimates cbind("McGLM" = round(coef(fit.anorexia, type = "beta")$Estimates,5), "GLM" = round(coef(anorex.1),5)) # Standard errors cbind("McGLM" = sqrt(diag(vcov(fit.anorexia))), "GLM" = c(sqrt(diag(vcov(anorex.1))),NA))
Consider the following data set from McCullagh & Nelder (1989, pp.300-2). It is an example of Gamma regression model.
clotting <- data.frame( u = c(5,10,15,20,30,40,60,80,100), lot1 = c(118,58,42,35,27,25,21,19,18), lot2 = c(69,35,26,21,18,16,13,12,12))
Analysis using generalized linear models glm
function in R
.
fit.lot1 <- glm(lot1 ~ log(u), data = clotting, family = Gamma(link = "inverse")) fit.lot2 <- glm(lot2 ~ log(u), data = clotting, family = Gamma(link = "inverse"))
The code below exemplify how to use the control_initial
argument for fixing the power
parameter at $p = 2$.
list_initial = list() list_initial$regression <- list(coef(fit.lot1)) list_initial$power <- list(c(2)) list_initial$tau <- list(summary(fit.lot1)$dispersion) list_initial$rho = 0
The control_initial
argument should be a named list with initial
values for all parameters involved in the model. Note that, in this
example we used the parameter estimates from the glm
fit as initial
values for the regression and dispersion parameters. The power parameter
was fixed at $p = 2$, since we want to fit Gamma regression models.
In that case, we have only one response variable, but the initial value
for correlation parameter $\rho$ should be specified.
Z0 <- mc_id(clotting) fit.lot1.mcglm <- mcglm(linear_pred = c(lot1 ~ log(u)), matrix_pred = list(Z0), link = "inverse", variance = "tweedie", data = clotting, control_initial = list_initial)
Comparing parameter estimates and standard errors.
# Estimates cbind("mcglm" = round(coef(fit.lot1.mcglm, type = "beta")$Estimates,5), "glm" = round(coef(fit.lot1),5)) # Standard errors cbind("mcglm" = sqrt(diag(vcov(fit.lot1.mcglm))), "glm" = c(sqrt(diag(vcov(fit.lot1))),NA))
Initial values for the response variable lot2
list_initial$regression <- list("resp1" = coef(fit.lot2)) list_initial$tau <- list("resp1" = c(var(1/clotting$lot2)))
Note that, since the list_initial
object already have all components
required, we just modify the entries
regression and tau.
fit.lot2.mcglm <- mcglm(linear_pred = c(lot2 ~ log(u)), matrix_pred = list(Z0), link = "inverse", variance = "tweedie", data = clotting, control_initial = list_initial)
Comparing parameter estimates and standard errors.
# Estimates cbind("mcglm" = round(coef(fit.lot2.mcglm, type = "beta")$Estimates,5), "glm" = round(coef(fit.lot2),5)) # Standard errors cbind("mcglm" = sqrt(diag(vcov(fit.lot2.mcglm))), "glm" = c(sqrt(diag(vcov(fit.lot2))),NA))
The main contribution of the mcglm
package is that it easily fits
multivariate regression models. For example, for the clotting
data a
bivariate Gamma model is a suitable choice.
# Initial values list_initial = list() list_initial$regression <- list(coef(fit.lot1), coef(fit.lot2)) list_initial$power <- list(c(2),c(2)) list_initial$tau <- list(c(0.00149), c(0.001276)) list_initial$rho = 0.80 # Matrix linear predictor Z0 <- mc_id(clotting) # Fit bivariate Gamma model fit.joint.mcglm <- mcglm(linear_pred = c(lot1 ~ log(u), lot2 ~ log(u)), matrix_pred = list(Z0, Z0), link = c("inverse", "inverse"), variance = c("tweedie", "tweedie"), data = clotting, control_initial = list_initial, control_algorithm = list("correct" = TRUE, "method" = "chaser", "tuning" = 0.1, "max_iter" = 1000)) summary(fit.joint.mcglm)
We also can easily change the link function. The code below fit a
bivariate Gamma model using the log
link function.
# Initial values list_initial = list() list_initial$regression <- list(c(log(mean(clotting$lot1)),0), c(log(mean(clotting$lot2)),0)) list_initial$power <- list(c(2), c(2)) list_initial$tau <- list(c(0.023), c(0.024)) list_initial$rho = 0 # Fit bivariate Gamma model fit.joint.log <- mcglm(linear_pred = c(lot1 ~ log(u), lot2 ~ log(u)), matrix_pred = list(Z0,Z0), link = c("log", "log"), variance = c("tweedie", "tweedie"), data = clotting, control_initial = list_initial) summary(fit.joint.log)
Consider the example menarche
from the MASS R
package.
require(MASS) data(menarche) data <- data.frame("resp" = menarche$Menarche/menarche$Total, "Ntrial" = menarche$Total, "Age" = menarche$Age)
Logistic regression model.
glm.out = glm(cbind(Menarche, Total-Menarche) ~ Age, family=binomial(logit), data=menarche)
The same fitted by mcglm
function.
# Matrix linear predictor Z0 <- mc_id(data) fit.logit <- mcglm(linear_pred = c(resp ~ Age), matrix_pred = list(Z0), link = "logit", variance = "binomialP", Ntrial = list(data$Ntrial), data = data)
Comparing parameter estimates and standard errors.
# Estimates cbind("GLM" = coef(glm.out), "McGLM" = coef(fit.logit, type = "beta")$Estimates) # Standard error cbind("GLM" = c(sqrt(diag(vcov(glm.out))),NA), "McGLM" = sqrt(diag(vcov(fit.logit))))
We can estimate a more flexible model using the extended binomial variance function.
fit.logit.power <- mcglm(linear_pred = c(resp ~ Age), matrix_pred = list(Z0), link = "logit", variance = "binomialP", Ntrial = list(data$Ntrial), power_fixed = FALSE, data = data) summary(fit.logit.power)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.