aictab | R Documentation |
This function creates a model selection table based on one of the
following information criteria: AIC, AICc, QAIC, QAICc. The table
ranks the models based on the selected information criteria and also
provides delta AIC and Akaike weights. aictab
selects the
appropriate function to create the model selection table based on the
object class. The current version works with lists containing objects
of aov
, betareg
, clm
, clmm
, clogit
,
coxme
, coxph
, fitdist
, fitdistr
, glm
,
glmmTMB
, gls
, gnls
, hurdle
, lavaan
,
lm
, lme
, lmekin
, maxlikeFit
, mer
,
merMod
, lmerModLmerTest
, multinom
, negbin
,
nlme
, nls
, polr
, rlm
, survreg
,
vglm
, and zeroinfl
classes as well as various models of
unmarkedFit
classes but does not yet allow mixing of different
classes.
aictab(cand.set, modnames = NULL, second.ord = TRUE, nobs = NULL,
sort = TRUE, ...)
## S3 method for class 'AICaov.lm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICbetareg'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICsclm.clm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICclmm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICclm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICcoxme'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICcoxph'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICfitdist'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICfitdistr'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICglm.lm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICglmmTMB'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICgls'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICgnls.gls'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIChurdle'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClavaan'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClme'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClmekin'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICmaxlikeFit.list'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICmer'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClmerMod'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClmerModLmerTest'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICglmerMod'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICnlmerMod'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICmultinom.nnet'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICnegbin.glm.lm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICnlme.lme'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICnls'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICpolr'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICrlm.lm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICsurvreg'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICunmarkedFitOccu'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitColExt'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitOccuRN'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitPCount'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitPCO'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitDS'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitGDS'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitOccuFP'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitMPois'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitGMM'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitGPC'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitOccuMulti'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitOccuMS'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitOccuTTD'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitMMO'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitDSO'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICvglm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICzeroinfl'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
cand.set |
a list storing each of the models in the candidate model set. |
modnames |
a character vector of model names to facilitate the identification of
each model in the model selection table. If |
second.ord |
logical. If |
nobs |
this argument allows to specify a numeric value other than total sample
size to compute the AICc (i.e., |
sort |
logical. If |
c.hat |
value of overdispersion parameter (i.e., variance inflation factor) such
as that obtained from |
... |
additional arguments passed to the function. |
aictab
internally creates a new class for the cand.set
list of candidate models, according to the contents of the list. The
current function is implemented for clogit
, coxme
,
coxph
, fitdist
, fitdistr
, glm
,
glmmTMB
, gls
, gnls
, hurdle
, lavaan
,
lm
, lme
, lmekin
, maxlikeFit
, mer
,
merMod
, lmerModLmerTest
, multinom
, negbin
,
nlme
, nls
, polr
, rlm
, survreg
,
vglm
, and zeroinfl
classes as well as various
unmarkedFit
classes.
The function constructs a model selection table based on one of the four information criteria: AIC, AICc, QAIC, and QAICc.
Ten guidelines for model selection:
1) Carefully construct your candidate model set. Each model should represent a specific (interesting) hypothesis to test.
2) Keep your candidate model set short. It is ill-advised to consider as many models as there are data.
3) Check model fit. Use your global model (most complex model) or subglobal models to determine if the assumptions are valid. If none of your models fit the data well, information criteria will only indicate the most parsimonious of the poor models.
4) Avoid data dredging (i.e., looking for patterns after an initial round of analysis).
5) Avoid overfitting models. You should not estimate too many parameters for the number of observations available in the sample.
6) Be careful of missing values. Remember that values that are missing only for certain variables change the data set and sample size, depending on which variable is included in any given model. I suggest to remove missing cases before starting model selection.
7) Use the same response variable for all models of the candidate model set. It is inappropriate to run some models with a transformed response variable and others with the untransformed variable. A workaround is to use a different link function for some models (e.g., identity vs log link).
8) When dealing with models with overdispersion, use the same value of c-hat for all models in the candidate model set. For binomial models with trials > 1 (i.e., success/trial or cbind(success, failure) syntax) or with Poisson GLM's, you should estimate the c-hat from the most complex model (global model). If c-hat > 1, you should use the same value for each model of the candidate model set (where appropriate) and include it in the count of parameters (K). Similarly, for negative binomial models, you should estimate the dispersion parameter from the global model and use the same value across all models.
9) Burnham and Anderson (2002) recommend to avoid mixing the information-theoretic approach and notions of significance (i.e., P values). It is best to provide estimates and a measure of their precision (standard error, confidence intervals).
10) Determining the ranking of the models is just the first step. Akaike weights sum to 1 for the entire model set and can be interpreted as the weight of evidence in favor of a given model being the best one given the candidate model set considered and the data at hand. Models with large Akaike weights have strong support. Evidence ratios, importance values, and confidence sets for the best model are all measures that assist in interpretation. In cases where the top ranking model has an Akaike weight > 0.9, one can base inference on this single most parsimonious model. When many models rank highly (i.e., delta (Q)AIC(c) < 4), one should model-average effect sizes for the parameters with most support across the entire set of models. Model averaging consists in making inference based on the whole set of candidate models, instead of basing conclusions on a single 'best' model. It is an elegant way of making inference based on the information contained in the entire model set.
aictab
creates an object of class aictab
with the
following components:
Modname |
the name of each model of the candidate model set. |
K |
the number of estimated parameters for each model. |
(Q)AIC(c) |
the information criterion requested for each model (AIC, AICc, QAIC, QAICc). |
Delta_(Q)AIC(c) |
the appropriate delta AIC component depending on the information criteria selected. |
ModelLik |
the relative likelihood of the model given the data (exp(-0.5*delta[i])). This is not to be confused with the likelihood of the parameters given the data. The relative likelihood can then be normalized across all models to get the model probabilities. |
(Q)AIC(c)Wt |
the Akaike weights, also termed "model probabilities" sensu Burnham and Anderson (2002) and Anderson (2008). These measures indicate the level of support (i.e., weight of evidence) in favor of any given model being the most parsimonious among the candidate model set. |
Cum.Wt |
the cumulative Akaike weights. These are only meaningful
if results in table are sorted in decreasing order of Akaike weights
(i.e., |
c.hat |
if c.hat was specified as an argument, it is included in the table. |
LL |
if c.hat = 1 and parameters estimated by maximum likelihood, the log-likelihood of each model. |
Quasi.LL |
if c.hat > 1, the quasi log-likelihood of each model. |
Res.LL |
if parameters are estimated by restricted maximum-likelihood (REML), the restricted log-likelihood of each model. |
Marc J. Mazerolle
Anderson, D. R. (2008) Model-based Inference in the Life Sciences: a primer on evidence. Springer: New York.
Burnham, K. P., Anderson, D. R. (2002) Model Selection and Multimodel Inference: a practical information-theoretic approach. Second edition. Springer: New York.
Burnham, K. P., Anderson, D. R. (2004) Multimodel inference: understanding AIC and BIC in model selection. Sociological Methods and Research 33, 261–304.
Dail, D., Madsen, L. (2011) Models for estimating abundance from repeated counts of an open population. Biometrics 67, 577–587.
MacKenzie, D. I., Nichols, J. D., Lachman, G. B., Droege, S., Royle, J. A., Langtimm, C. A. (2002) Estimating site occupancy rates when detection probabilities are less than one. Ecology 83, 2248–2255.
MacKenzie, D. I., Nichols, J. D., Hines, J. E., Knutson, M. G., Franklin, A. B. (2003) Estimating site occupancy, colonization, and local extinction when a species is detected imperfectly. Ecology 84, 2200–2207.
Mazerolle, M. J. (2006) Improving data analysis in herpetology: using Akaike's Information Criterion (AIC) to assess the strength of biological hypotheses. Amphibia-Reptilia 27, 169–180.
Royle, J. A. (2004) N-mixture models for estimating population size from spatially replicated counts. Biometrics 60, 108–115.
AICc
, aictabCustom
, bictab
,
confset
, c_hat
, evidence
,
importance
, modavg
,
modavgEffect
, modavgShrink
,
modavgPred
##Mazerolle (2006) frog water loss example
data(dry.frog)
##setup a subset of models of Table 1
Cand.models <- list( )
Cand.models[[1]] <- lm(log_Mass_lost ~ Shade + Substrate +
cent_Initial_mass + Initial_mass2,
data = dry.frog)
Cand.models[[2]] <- lm(log_Mass_lost ~ Shade + Substrate +
cent_Initial_mass + Initial_mass2 +
Shade:Substrate, data = dry.frog)
Cand.models[[3]] <- lm(log_Mass_lost ~ cent_Initial_mass +
Initial_mass2, data = dry.frog)
Cand.models[[4]] <- lm(log_Mass_lost ~ Shade + cent_Initial_mass +
Initial_mass2, data = dry.frog)
Cand.models[[5]] <- lm(log_Mass_lost ~ Substrate + cent_Initial_mass +
Initial_mass2, data = dry.frog)
##create a vector of names to trace back models in set
Modnames <- paste("mod", 1:length(Cand.models), sep = " ")
##generate AICc table
aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)
##round to 4 digits after decimal point and give log-likelihood
print(aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE),
digits = 4, LL = TRUE)
## Not run:
##Burnham and Anderson (2002) flour beetle data
data(beetle)
##models as suggested by Burnham and Anderson p. 198
Cand.set <- list( )
Cand.set[[1]] <- glm(Mortality_rate ~ Dose, family =
binomial(link = "logit"), weights = Number_tested,
data = beetle)
Cand.set[[2]] <- glm(Mortality_rate ~ Dose, family =
binomial(link = "probit"), weights = Number_tested,
data = beetle)
Cand.set[[3]] <- glm(Mortality_rate ~ Dose, family =
binomial(link ="cloglog"), weights = Number_tested,
data = beetle)
##check c-hat
c_hat(Cand.set[[1]])
c_hat(Cand.set[[2]])
c_hat(Cand.set[[3]])
##lowest value of c-hat < 1 for these non-nested models, thus use
##c.hat = 1
##set up named list
names(Cand.set) <- c("logit", "probit", "cloglog")
##compare models
##model names will be taken from the list if modnames is not specified
res.table <- aictab(cand.set = Cand.set, second.ord = FALSE)
##note that delta AIC and Akaike weights are identical to Table 4.7
print(res.table, digits = 2, LL = TRUE) #print table with 2 digits and
##print log-likelihood in table
print(res.table, digits = 4, LL = FALSE) #print table with 4 digits and
##do not print log-likelihood
## End(Not run)
##two-way ANOVA with interaction
data(iron)
##full model
m1 <- lm(Iron ~ Pot + Food + Pot:Food, data = iron)
##additive model
m2 <- lm(Iron ~ Pot + Food, data = iron)
##null model
m3 <- lm(Iron ~ 1, data = iron)
##candidate models
Cand.aov <- list(m1, m2, m3)
Cand.names <- c("full", "additive", "null")
aictab(Cand.aov, Cand.names)
##single-season occupancy model example modified from ?occu
## Not run:
require(unmarked)
##single season example modified from ?occu
data(frogs)
pferUMF <- unmarkedFrameOccu(pfer.bin)
##add fake covariates
siteCovs(pferUMF) <- data.frame(sitevar1 = rnorm(numSites(pferUMF)),
sitevar2 = runif(numSites(pferUMF)))
##observation covariates
obsCovs(pferUMF) <- data.frame(obsvar1 = rnorm(numSites(pferUMF) *
obsNum(pferUMF)))
##set up candidate model set
fm1 <- occu(~ obsvar1 ~ sitevar1, pferUMF)
fm2 <- occu(~ 1 ~ sitevar1, pferUMF)
fm3 <- occu(~ obsvar1 ~ sitevar2, pferUMF)
fm4 <- occu(~ 1 ~ sitevar2, pferUMF)
##assemble models in named list (alternative to using 'modnames' argument)
Cand.mods <- list("fm1" = fm1, "fm2" = fm2, "fm3" = fm3, "fm4" = fm4)
##compute table
aictab(cand.set = Cand.mods, second.ord = TRUE)
detach(package:unmarked)
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.