Create Model Selection Tables

Description

This function creates a model selection table based on one of the following information criteria: AIC, AICc, QAIC, QAICc. The table ranks the models based on the selected information criteria and also provides delta AIC and Akaike weights. aictab selects the appropriate function to create the model selection table based on the object class. The current version works with lists containing objects of aov, betareg, clm, clmm, clogit, coxme, coxph, fitdist, fitdistr, glm, gls, gnls, hurdle, lavaan, lm, lme, lmekin, maxlikeFit, mer, merMod, multinom, nlme, nls, polr, rlm, survreg, vglm, and zeroinfl classes as well as various models of unmarkedFit classes but does not yet allow mixing of different classes.

Usage

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
aictab(cand.set, modnames = NULL, second.ord = TRUE, nobs = NULL,
       sort = TRUE, ...)

## S3 method for class 'AICaov.lm'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICbetareg'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICsclm.clm'
aictab(cand.set, modnames = NULL,
       second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICclmm'
aictab(cand.set, modnames = NULL,
       second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICclm'
aictab(cand.set, modnames = NULL,
       second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICcoxme'
aictab(cand.set, modnames = NULL,
       second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICcoxph'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICfitdist'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICfitdistr'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICglm.lm'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)  

## S3 method for class 'AICgls'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICgnls.gls'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIChurdle'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClavaan'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClm'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClme'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClmekin'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICmaxlikeFit.list'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...) 

## S3 method for class 'AICmer'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClmerMod'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICglmerMod'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...) 

## S3 method for class 'AICnlmerMod'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICmultinom.nnet'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICnlme.lme'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...) 

## S3 method for class 'AICnls'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICpolr'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICrlm.lm'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICsurvreg'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICunmarkedFitOccu'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitColExt'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitOccuRN'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitPCount'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitPCO'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitDS'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitGDS'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitOccuFP'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitMPois'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitGMM'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitGPC'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICvglm'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICzeroinfl'
aictab(cand.set, modnames = NULL,
        second.ord = TRUE, nobs = NULL, sort = TRUE, ...)

Arguments

cand.set

a list storing each of the models in the candidate model set.

modnames

a character vector of model names to facilitate the identification of each model in the model selection table. If NULL, the function uses the names in the cand.set list of candidate models (i.e., a named list). If no names appear in the list and no character vector is provided, generic names (e.g., Mod1, Mod2) are supplied in the table in the same order as in the list of candidate models.

second.ord

logical. If TRUE, the function returns the second-order Akaike information criterion (i.e., AICc).

nobs

this argument allows to specify a numeric value other than total sample size to compute the AICc (i.e., nobs defaults to total number of observations). This is relevant only for mixed models or various models of unmarkedFit classes where sample size is not straightforward. In such cases, one might use total number of observations or number of independent clusters (e.g., sites) as the value of nobs.

sort

logical. If TRUE, the model selection table is ranked according to the (Q)AIC(c) values.

c.hat

value of overdispersion parameter (i.e., variance inflation factor) such as that obtained from c_hat. Note that values of c.hat different from 1 are only appropriate for binomial GLM's with trials > 1 (i.e., success/trial or cbind(success, failure) syntax), with Poisson GLM's, single-season occupancy models (MacKenzie et al. 2002), dynamic occupancy models (MacKenzie et al. 2003), or N-mixture models (Royle 2004, Dail and Madsen 2011). If c.hat > 1, aictab will return the quasi-likelihood analogue of the information criteria requested and multiply the variance-covariance matrix of the estimates by this value (i.e., SE's are multiplied by sqrt(c.hat)). This option is not supported for generalized linear mixed models of the mer or merMod classes.

...

additional arguments passed to the function.

Details

aictab internally creates a new class for the cand.set list of candidate models, according to the contents of the list. The current function is implemented for clogit, coxme, coxph, fitdist, fitdistr, glm, gls, gnls, hurdle, lavaan, lm, lme, lmekin, maxlikeFit, mer, merMod, multinom, nlme, nls, polr, rlm, survreg, vglm, and zeroinfl classes as well as various unmarkedFit classes. The function constructs a model selection table based on one of the four information criteria: AIC, AICc, QAIC, and QAICc.

Ten guidelines for model selection:

1) Carefully construct your candidate model set. Each model should represent a specific (interesting) hypothesis to test.

2) Keep your candidate model set short. It is ill-advised to consider as many models as there are data.

3) Check model fit. Use your global model (most complex model) or subglobal models to determine if the assumptions are valid. If none of your models fit the data well, information criteria will only indicate the most parsimonious of the poor models.

4) Avoid data dredging (i.e., looking for patterns after an initial round of analysis).

5) Avoid overfitting models. You should not estimate too many parameters for the number of observations available in the sample.

6) Be careful of missing values. Remember that values that are missing only for certain variables change the data set and sample size, depending on which variable is included in any given model. I suggest to remove missing cases before starting model selection.

7) Use the same response variable for all models of the candidate model set. It is inappropriate to run some models with a transformed response variable and others with the untransformed variable. A workaround is to use a different link function for some models (e.g., identity vs log link).

8) When dealing with models with overdispersion, use the same value of c-hat for all models in the candidate model set. For binomial models with trials > 1 (i.e., success/trial or cbind(success, failure) syntax) or with Poisson GLM's, you should estimate the c-hat from the most complex model (global model). If c-hat > 1, you should use the same value for each model of the candidate model set (where appropriate) and include it in the count of parameters (K). Similarly, for negative binomial models, you should estimate the dispersion parameter from the global model and use the same value across all models.

9) Burnham and Anderson (2002) recommend to avoid mixing the information-theoretic approach and notions of significance (i.e., P values). It is best to provide estimates and a measure of their precision (standard error, confidence intervals).

10) Determining the ranking of the models is just the first step. Akaike weights sum to 1 for the entire model set and can be interpreted as the weight of evidence in favor of a given model being the best one given the candidate model set considered and the data at hand. Models with large Akaike weights have strong support. Evidence ratios, importance values, and confidence sets for the best model are all measures that assist in interpretation. In cases where the top ranking model has an Akaike weight > 0.9, one can base inference on this single most parsimonious model. When many models rank highly (i.e., delta (Q)AIC(c) < 4), one should model-average effect sizes for the parameters with most support across the entire set of models. Model averaging consists in making inference based on the whole set of candidate models, instead of basing conclusions on a single 'best' model. It is an elegant way of making inference based on the information contained in the entire model set.

Value

aictab creates an object of class aictab with the following components:

Modname

the names of each model of the candidate model set.

K

the number of estimated parameters for each model.

(Q)AIC(c)

the information criterion requested for each model (AIC, AICc, QAIC, QAICc).

Delta_(Q)AIC(c)

the appropriate delta AIC component depending on the information criteria selected.

ModelLik

the relative likelihood of the model given the data (exp(-0.5*delta[i])). This is not to be confused with the likelihood of the parameters given the data. The relative likelihood can then be normalized across all models to get the model probabilities.

(Q)AIC(c)Wt

the Akaike weights, also termed "model probabilities" sensu Burnham and Anderson (2002) and Anderson (2008). These measures indicate the level of support (i.e., weight of evidence) in favor of any given model being the most parsimonious among the candidate model set.

Cum.Wt

the cumulative Akaike weights. These are only meaningful if results in table are sorted in decreasing order of Akaike weights (i.e., sort = TRUE).

c.hat

if c.hat was specified as an argument, it is included in the table.

LL

if c.hat = 1 and parameters estimated by maximum likelihood, the log-likelihood of each model.

Quasi.LL

if c.hat > 1, the quasi log-likelihood of each model.

Res.LL

if parameters are estimated by restricted maximum-likelihood (REML), the restricted log-likelihood of each model.

Author(s)

Marc J. Mazerolle

References

Anderson, D. R. (2008) Model-based Inference in the Life Sciences: a primer on evidence. Springer: New York.

Burnham, K. P., Anderson, D. R. (2002) Model Selection and Multimodel Inference: a practical information-theoretic approach. Second edition. Springer: New York.

Burnham, K. P., Anderson, D. R. (2004) Multimodel inference: understanding AIC and BIC in model selection. Sociological Methods and Research 33, 261–304.

Dail, D., Madsen, L. (2011) Models for estimating abundance from repeated counts of an open population. Biometrics 67, 577–587.

MacKenzie, D. I., Nichols, J. D., Lachman, G. B., Droege, S., Royle, J. A., Langtimm, C. A. (2002) Estimating site occupancy rates when detection probabilities are less than one. Ecology 83, 2248–2255.

MacKenzie, D. I., Nichols, J. D., Hines, J. E., Knutson, M. G., Franklin, A. B. (2003) Estimating site occupancy, colonization, and local extinction when a species is detected imperfectly. Ecology 84, 2200–2207.

Mazerolle, M. J. (2006) Improving data analysis in herpetology: using Akaike's Information Criterion (AIC) to assess the strength of biological hypotheses. Amphibia-Reptilia 27, 169–180.

Royle, J. A. (2004) N-mixture models for estimating population size from spatially replicated counts. Biometrics 60, 108–115.

See Also

AICc, aictabCustom, bictab, confset, c_hat, evidence, importance, modavg, modavgEffect, modavgShrink, modavgPred

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
##Mazerolle (2006) frog water loss example
data(dry.frog)

##setup a subset of models of Table 1
Cand.models <- list( )
Cand.models[[1]] <- lm(log_Mass_lost ~ Shade + Substrate +
                       cent_Initial_mass + Initial_mass2,
                       data = dry.frog)
Cand.models[[2]] <- lm(log_Mass_lost ~ Shade + Substrate +
                       cent_Initial_mass + Initial_mass2 +
                       Shade:Substrate, data = dry.frog)
Cand.models[[3]] <- lm(log_Mass_lost ~ cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[4]] <- lm(log_Mass_lost ~ Shade + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[5]] <- lm(log_Mass_lost ~ Substrate + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)

##create a vector of names to trace back models in set
Modnames <- paste("mod", 1:length(Cand.models), sep = " ")

##generate AICc table
aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)
##round to 4 digits after decimal point and give log-likelihood
print(aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE),
      digits = 4, LL = TRUE)



## Not run: 
##Burnham and Anderson (2002) flour beetle data
data(beetle)
##models as suggested by Burnham and Anderson p. 198          
Cand.set <- list( )
Cand.set[[1]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link = "logit"), weights = Number_tested,
                     data = beetle)
Cand.set[[2]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link = "probit"), weights = Number_tested,
                     data = beetle)
Cand.set[[3]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link ="cloglog"), weights = Number_tested,
                     data = beetle)

##check c-hat
c_hat(Cand.set[[1]])
c_hat(Cand.set[[2]])
c_hat(Cand.set[[3]])
##lowest value of c-hat < 1 for these non-nested models, thus use
##c.hat = 1 
       
##set up named list
names(Cand.set) <- c("logit", "probit", "cloglog")

##compare models
##model names will be taken from the list if modnames is not specified
res.table <- aictab(cand.set = Cand.set, second.ord = FALSE)
##note that delta AIC and Akaike weights are identical to Table 4.7
print(res.table, digits = 2, LL = TRUE) #print table with 2 digits and
##print log-likelihood in table
print(res.table, digits = 4, LL = FALSE) #print table with 4 digits and
##do not print log-likelihood

## End(Not run)


##two-way ANOVA with interaction
data(iron)
##full model
m1 <- lm(Iron ~ Pot + Food + Pot:Food, data = iron)
##additive model
m2 <- lm(Iron ~ Pot + Food, data = iron)
##null model
m3 <- lm(Iron ~ 1, data = iron)

##candidate models
Cand.aov <- list(m1, m2, m3)
Cand.names <- c("full", "additive", "null")
aictab(Cand.aov, Cand.names)



##single-season occupancy model example modified from ?occu
## Not run: 
require(unmarked)
##single season example modified from ?occu
data(frogs)
pferUMF <- unmarkedFrameOccu(pfer.bin)
##add fake covariates
siteCovs(pferUMF) <- data.frame(sitevar1 = rnorm(numSites(pferUMF)),
                                sitevar2 = runif(numSites(pferUMF))) 
     
##observation covariates
obsCovs(pferUMF) <- data.frame(obsvar1 = rnorm(numSites(pferUMF) *
                                 obsNum(pferUMF))) 

##set up candidate model set
fm1 <- occu(~ obsvar1 ~ sitevar1, pferUMF)
fm2 <- occu(~ 1 ~ sitevar1, pferUMF)
fm3 <- occu(~ obsvar1 ~ sitevar2, pferUMF)
fm4 <- occu(~ 1 ~ sitevar2, pferUMF)

##assemble models in named list (alternative to using 'modnames' argument)
Cand.mods <- list("fm1" = fm1, "fm2" = fm2, "fm3" = fm3, "fm4" = fm4)

##compute table
aictab(cand.set = Cand.mods, second.ord = TRUE)

detach(package:unmarked)

## End(Not run)

Want to suggest features or report bugs for rdrr.io? Use the GitHub issue tracker.